1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 *  ihevce_sao.c
24 *
25 * @brief
26 *  Contains definition for the ctb level sao function
27 *
28 * @author
29 *  Ittiam
30 *
31 * @par List of Functions:
32 *  ihevce_sao_set_avilability()
33 *  ihevce_sao_ctb()
34 *  ihevce_sao_analyse()
35 *
36 * @remarks
37 *  None
38 *
39 *******************************************************************************
40 */
41 
42 /*****************************************************************************/
43 /* File Includes                                                             */
44 /*****************************************************************************/
45 /* System include files */
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <assert.h>
50 #include <stdarg.h>
51 #include <math.h>
52 
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57 
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61 
62 #include "ihevc_defs.h"
63 #include "ihevc_structs.h"
64 #include "ihevc_platform_macros.h"
65 #include "ihevc_deblk.h"
66 #include "ihevc_itrans_recon.h"
67 #include "ihevc_chroma_itrans_recon.h"
68 #include "ihevc_chroma_intra_pred.h"
69 #include "ihevc_intra_pred.h"
70 #include "ihevc_inter_pred.h"
71 #include "ihevc_mem_fns.h"
72 #include "ihevc_padding.h"
73 #include "ihevc_weighted_pred.h"
74 #include "ihevc_sao.h"
75 #include "ihevc_resi_trans.h"
76 #include "ihevc_quant_iquant_ssd.h"
77 #include "ihevc_cabac_tables.h"
78 
79 #include "ihevce_defs.h"
80 #include "ihevce_lap_enc_structs.h"
81 #include "ihevce_multi_thrd_structs.h"
82 #include "ihevce_me_common_defs.h"
83 #include "ihevce_had_satd.h"
84 #include "ihevce_error_codes.h"
85 #include "ihevce_bitstream.h"
86 #include "ihevce_cabac.h"
87 #include "ihevce_rdoq_macros.h"
88 #include "ihevce_function_selector.h"
89 #include "ihevce_enc_structs.h"
90 #include "ihevce_entropy_structs.h"
91 #include "ihevce_cmn_utils_instr_set_router.h"
92 #include "ihevce_enc_loop_structs.h"
93 #include "ihevce_cabac_rdo.h"
94 #include "ihevce_sao.h"
95 
96 /*****************************************************************************/
97 /* Function Definitions                                                      */
98 /*****************************************************************************/
99 
100 /**
101 *******************************************************************************
102 *
103 * @brief
104 *     ihevce_sao_set_avilability
105 *
106 * @par Description:
107 *     Sets the availability flag for SAO.
108 *
109 * @param[in]
110 *   ps_sao_ctxt:   Pointer to SAO context
111 * @returns
112 *
113 * @remarks
114 *  None
115 *
116 *******************************************************************************
117 */
ihevce_sao_set_avilability(UWORD8 * pu1_avail,sao_ctxt_t * ps_sao_ctxt,ihevce_tile_params_t * ps_tile_params)118 void ihevce_sao_set_avilability(
119     UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
120 {
121     WORD32 i;
122 
123     WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x;
124     WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y;
125 
126     for(i = 0; i < 8; i++)
127     {
128         pu1_avail[i] = 255;
129     }
130 
131     /* SAO_note_01: If the CTB lies on a tile or a slice boundary and
132     in-loop filtering is enabled at tile and slice boundary, then SAO must
133     be performed at tile/slice boundaries also.
134     Hence the boundary checks should be based on frame position of CTB
135     rather than s_ctb_nbr_avail_flags.u1_left_avail flags.
136     Search for <SAO_note_01> in workspace to know more */
137     /* Availaibility flags for first col*/
138     if(ctb_x_pos == ps_tile_params->i4_first_ctb_x)
139     {
140         pu1_avail[0] = 0;
141         pu1_avail[4] = 0;
142         pu1_avail[6] = 0;
143     }
144 
145     /* Availaibility flags for last col*/
146     if((ctb_x_pos + 1) ==
147        (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit))
148     {
149         pu1_avail[1] = 0;
150         pu1_avail[5] = 0;
151         pu1_avail[7] = 0;
152     }
153 
154     /* Availaibility flags for first row*/
155     if(ctb_y_pos == ps_tile_params->i4_first_ctb_y)
156     {
157         pu1_avail[2] = 0;
158         pu1_avail[4] = 0;
159         pu1_avail[5] = 0;
160     }
161 
162     /* Availaibility flags for last row*/
163     if((ctb_y_pos + 1) ==
164        (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit))
165     {
166         pu1_avail[3] = 0;
167         pu1_avail[6] = 0;
168         pu1_avail[7] = 0;
169     }
170 }
171 
172 /**
173 *******************************************************************************
174 *
175 * @brief
176 *   Sao CTB level function.
177 *
178 * @par Description:
179 *   For a given CTB, sao is done. Both the luma and chroma
180 *   blocks are processed
181 *
182 * @param[in]
183 *   ps_sao_ctxt:   Pointer to SAO context
184 *
185 * @returns
186 *
187 * @remarks
188 *  None
189 *
190 *******************************************************************************
191 */
ihevce_sao_ctb(sao_ctxt_t * ps_sao_ctxt,ihevce_tile_params_t * ps_tile_params)192 void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
193 {
194     sao_enc_t *ps_sao;
195     UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2];
196     UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf;
197     UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf;
198     UWORD8 *pu1_src_luma, *pu1_src_chroma;
199     WORD32 luma_src_stride, ctb_size;
200     WORD32 chroma_src_stride;
201     UWORD8 au1_avail_luma[8], au1_avail_chroma[8];
202     WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma;
203     UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma;
204     UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma;
205     UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma;
206     UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
207 
208     ps_sao = ps_sao_ctxt->ps_sao;
209 
210     ASSERT(
211         (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) &&
212         (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7));
213     ASSERT(
214         (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) &&
215         (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7));
216     ASSERT(
217         (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) &&
218         (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7));
219     ASSERT(
220         (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) &&
221         (ps_sao->b5_cr_band_pos <= 28));
222 
223     if(ps_sao_ctxt->i1_slice_sao_luma_flag)
224     {
225         /*initialize the src pointer to current row*/
226         luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
227 
228         ctb_size = ps_sao_ctxt->i4_ctb_size;
229 
230         /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
231         ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used  */
232         sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd;
233         sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht;
234 
235         pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
236         /* Pointer to the top luma buffer corresponding to the current ctb row*/
237         pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma;
238 
239         /* Pointer to left luma buffer corresponding to the current ctb row*/
240         pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch;
241 
242         /* Pointer to the top right luma buffer corresponding to the current ctb row*/
243         pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd;
244 
245         /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
246         pu1_src_bot_left_luma =
247             ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride -
248             1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
249             (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
250 
251         /* Back up the top left pixel for (x+1, y+1)th ctb*/
252         u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1);
253         pu1_top_left_luma = pu1_src_top_luma_buf - 1;
254 
255         if(SAO_BAND == ps_sao->b3_y_type_idx)
256         {
257             ihevc_sao_band_offset_luma(
258                 pu1_src_luma,
259                 luma_src_stride,
260                 pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
261                 pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
262                 pu1_src_top_luma_buf - 1, /* Top left*/
263                 ps_sao->b5_y_band_pos,
264                 ps_sao->u1_y_offset,
265                 sao_blk_wd,
266                 sao_blk_ht);
267 
268             if((ps_sao_ctxt->i4_ctb_y > 0))
269             {
270                 *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
271             }
272         }
273         else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG)
274         {
275             /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
276             * corresponding to EO category 1 and 2 which should be always positive
277             * And 3rd and 4th offsets are always inferred as offsets corresponding to
278             * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
279             */
280             // clang-format off
281             ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0));
282             ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0));
283             // clang-format on
284 
285             ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params);
286 
287             ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2](
288                 pu1_src_luma,
289                 luma_src_stride,
290                 pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
291                 pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
292                 pu1_top_left_luma, /* Top left*/
293                 pu1_src_top_right_luma, /* Top right*/
294                 pu1_src_bot_left_luma, /* Bottom left*/
295                 au1_avail_luma,
296                 ps_sao->u1_y_offset,
297                 sao_blk_wd,
298                 sao_blk_ht);
299 
300             if((ps_sao_ctxt->i4_ctb_y > 0))
301             {
302                 *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
303             }
304         }
305     }
306 
307     if(ps_sao_ctxt->i1_slice_sao_chroma_flag)
308     {
309         /*initialize the src pointer to current row*/
310         chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
311         ctb_size = ps_sao_ctxt->i4_ctb_size;
312 
313         /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
314         //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2;
315         ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used  */
316         ps_sao->u1_cr_offset[0] = 0;
317         sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd;
318         sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1);
319 
320         pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
321         /* Pointer to the top luma buffer corresponding to the current ctb row*/
322         pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma;
323         // clang-format off
324         /* Pointer to left luma buffer corresponding to the current ctb row*/
325         pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch;  //ps_sao_ctxt->au1_sao_src_left_chroma;
326         // clang-format on
327         /* Pointer to the top right chroma buffer corresponding to the current ctb row*/
328         pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma;
329 
330         /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
331         pu1_src_bot_left_chroma =
332             ps_sao_ctxt->pu1_frm_chroma_recon_buf +
333             (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 +
334             (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
335              (ctb_size >> !u1_is_422)) +
336             (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
337 
338         /* Back up the top left pixel for (x+1, y+1)th ctb*/
339         u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2);
340         u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1);
341         pu1_top_left_chroma = pu1_src_top_chroma_buf - 2;
342 
343         if(SAO_BAND == ps_sao->b3_cb_type_idx)
344         {
345             ihevc_sao_band_offset_chroma(
346                 pu1_src_chroma,
347                 chroma_src_stride,
348                 pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
349                 pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
350                 pu1_top_left_chroma, /* Top left*/
351                 ps_sao->b5_cb_band_pos,
352                 ps_sao->b5_cr_band_pos,
353                 ps_sao->u1_cb_offset,
354                 ps_sao->u1_cr_offset,
355                 sao_wd_chroma,
356                 sao_ht_chroma);
357 
358             if((ps_sao_ctxt->i4_ctb_y > 0))
359             {
360                 *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
361                 *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
362             }
363         }
364         else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG)
365         {
366             /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
367             * corresponding to EO category 1 and 2 which should be always positive
368             * And 3rd and 4th offsets are always inferred as offsets corresponding to
369             * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
370             */
371             ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0));
372             ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0));
373 
374             ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0));
375             ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0));
376 
377             ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params);
378 
379             ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](
380                 pu1_src_chroma,
381                 chroma_src_stride,
382                 pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
383                 pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
384                 pu1_top_left_chroma, /* Top left*/
385                 pu1_src_top_right_chroma, /* Top right*/
386                 pu1_src_bot_left_chroma, /* Bottom left*/
387                 au1_avail_chroma,
388                 ps_sao->u1_cb_offset,
389                 ps_sao->u1_cr_offset,
390                 sao_wd_chroma,
391                 sao_ht_chroma);
392 
393             if((ps_sao_ctxt->i4_ctb_y > 0))
394             {
395                 *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
396                 *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
397             }
398         }
399     }
400 }
401 
402 /**
403 *******************************************************************************
404 *
405 * @brief
406 *   CTB level function to do SAO analysis.
407 *
408 * @par Description:
409 *   For a given CTB, sao analysis is done for both luma and chroma.
410 *
411 *
412 * @param[in]
413 *   ps_sao_ctxt:   Pointer to SAO context
414 *   ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop
415 *
416 * @returns
417 *
418 * @remarks
419 *  None
420 *
421 * @Assumptions:
422 *   1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be
423 *      almost same as cabac state of (x,y)th ctb.
424 *   2) Distortion is calculated in spatial domain but lamda used to calculate the cost is
425 *      in freq domain.
426 *******************************************************************************
427 */
ihevce_sao_analyse(sao_ctxt_t * ps_sao_ctxt,ctb_enc_loop_out_t * ps_ctb_enc_loop_out,UWORD32 * pu4_frame_rdopt_header_bits,ihevce_tile_params_t * ps_tile_params)428 void ihevce_sao_analyse(
429     sao_ctxt_t *ps_sao_ctxt,
430     ctb_enc_loop_out_t *ps_ctb_enc_loop_out,
431     UWORD32 *pu4_frame_rdopt_header_bits,
432     ihevce_tile_params_t *ps_tile_params)
433 {
434     UWORD8 *pu1_luma_scratch_buf;
435     UWORD8 *pu1_chroma_scratch_buf;
436     UWORD8 *pu1_src_luma, *pu1_recon_luma;
437     UWORD8 *pu1_src_chroma, *pu1_recon_chroma;
438     WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht;
439     WORD32 chroma_src_stride, chroma_recon_stride;
440     WORD32 i4_luma_scratch_buf_stride;
441     WORD32 i4_chroma_scratch_buf_stride;
442     sao_ctxt_t s_sao_ctxt;
443     UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0;
444     LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf;
445     WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0;
446     WORD32 curr_buf_idx, best_buf_idx, best_cand_idx;
447     WORD32 row;
448     WORD32 edgeidx;
449     WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 };
450     sao_enc_t s_best_luma_chroma_cand;
451     WORD32 best_ctb_sao_bits = 0;
452 #if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2)
453     UWORD8 u1_force_no_offset =
454         ps_sao_ctxt
455             ->ps_ctb_data
456                 [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y]
457             .s_ctb_noise_params.i4_noise_present;
458 #endif
459     UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
460 
461     *pu4_frame_rdopt_header_bits = 0;
462 
463     ctb_size = ps_sao_ctxt->i4_ctb_size;
464     ctb_wd = ps_sao_ctxt->i4_sao_blk_wd;
465     ctb_ht = ps_sao_ctxt->i4_sao_blk_ht;
466 
467     s_sao_ctxt = ps_sao_ctxt[0];
468 
469     /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/
470     memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t));
471 
472     /* Initialize the pointer and strides for luma buffers*/
473     pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
474     luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
475 
476     pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf;
477     luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride;
478     i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
479 
480     /* Initialize the pointer and strides for luma buffers*/
481     pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
482     chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
483 
484     pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf;
485     chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride;
486     i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
487 
488     i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf;
489     i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf;
490 
491     /*****************************************************/
492     /********************RDO FOR LUMA CAND****************/
493     /*****************************************************/
494 
495 #if !DISABLE_SAO_WHEN_NOISY
496     if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
497 #else
498     if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset)
499 #endif
500     {
501         /* Candidate for Edge offset SAO*/
502         /* Following is the convention for curr pixel and
503         * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */
504         /*
505         * 0 deg :  a c b     90 deg:  a       135 deg: a          45 deg:     a
506         *                             c                  c                  c
507         *                             b                    b              b
508         */
509 
510         /* 0 deg SAO CAND*/
511         /* Reset the error and edge count*/
512         for(edgeidx = 0; edgeidx < 5; edgeidx++)
513         {
514             acc_error_category[edgeidx] = 0;
515             category_count[edgeidx] = 0;
516         }
517 
518         /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/
519         // clang-format off
520         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG,
521                 acc_error_category, category_count);
522         // clang-format on
523         // clang-format off
524         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG;
525         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
526                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
527                 : 0;
528         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
529                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
530                 : 0;
531         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
532                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
533                 : 0;
534         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4]
535                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
536                 : 0;
537         // clang-format on
538         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
539         // clang-format off
540         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
541         // clang-format on
542         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
543         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
544         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
545         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
546         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
547 
548         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
549         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
550         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
551         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
552         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
553         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
554         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
555         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
556 
557         num_luma_rdo_cand++;
558 
559         /* 90 degree SAO CAND*/
560         for(edgeidx = 0; edgeidx < 5; edgeidx++)
561         {
562             acc_error_category[edgeidx] = 0;
563             category_count[edgeidx] = 0;
564         }
565 
566         /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/
567         // clang-format off
568         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG,
569                 acc_error_category, category_count);
570 
571         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG;
572         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
573                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
574                 : 0;
575         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
576                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
577                 : 0;
578         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
579                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
580                 : 0;
581         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
582                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
583                 : 0;
584         // clang-format on
585         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
586 
587         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
588         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
589         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
590         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
591         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
592         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
593 
594         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
595         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
596         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
597         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
598         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
599         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
600         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
601         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
602 
603         num_luma_rdo_cand++;
604 
605         /* 135 degree SAO CAND*/
606         for(edgeidx = 0; edgeidx < 5; edgeidx++)
607         {
608             acc_error_category[edgeidx] = 0;
609             category_count[edgeidx] = 0;
610         }
611 
612         /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/
613         // clang-format off
614         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG,
615                 acc_error_category, category_count);
616 
617         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG;
618         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
619                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
620                 : 0;
621         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
622                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
623                 : 0;
624         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
625                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
626                 : 0;
627         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
628                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
629                 : 0;
630         // clang-format on
631         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
632 
633         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
634         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
635         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
636         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
637         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
638         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
639 
640         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
641         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
642         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
643         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
644         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
645         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
646         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
647         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
648 
649         num_luma_rdo_cand++;
650 
651         /* 45 degree SAO CAND*/
652         for(edgeidx = 0; edgeidx < 5; edgeidx++)
653         {
654             acc_error_category[edgeidx] = 0;
655             category_count[edgeidx] = 0;
656         }
657 
658         /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/
659         // clang-format off
660         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG,
661                 acc_error_category, category_count);
662 
663         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG;
664         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
665                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
666                 : 0;
667         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
668                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
669                 : 0;
670         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
671                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
672                 : 0;
673         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
674                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
675                 : 0;
676         // clang-format on
677         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
678 
679         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
680         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
681         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
682         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
683         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
684         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
685 
686         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
687         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
688         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
689         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
690         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
691         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
692         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
693         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
694 
695         num_luma_rdo_cand++;
696 
697         /* First cand will be best cand after 1st iteration*/
698         curr_buf_idx = 0;
699         best_buf_idx = 1;
700         best_cost = 0xFFFFFFFF;
701         best_cand_idx = 0;
702 
703         /*Back up the top pixels for (x,y+1)th ctb*/
704         if(!ps_sao_ctxt->i4_is_last_ctb_row)
705         {
706             memcpy(
707                 ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
708                 pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
709                 ps_sao_ctxt->i4_sao_blk_wd);
710         }
711 
712         for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++)
713         {
714             s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
715 
716             /* This memcpy is required because cabac uses parameters from this structure
717             * to evaluate bits and this structure ptr is sent to cabac through
718             * "ihevce_cabac_rdo_encode_sao" function
719             */
720             memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
721 
722             /* Copy the left pixels to the scratch buffer for evry rdo cand because its
723             overwritten by the sao leaf level function for next ctb*/
724             memcpy(
725                 s_sao_ctxt.au1_left_luma_scratch,
726                 ps_sao_ctxt->au1_sao_src_left_luma,
727                 ps_sao_ctxt->i4_sao_blk_ht);
728 
729             /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
730             overwritten by the sao leaf level function for next ctb*/
731             memcpy(
732                 s_sao_ctxt.au1_top_luma_scratch,
733                 ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
734                 ps_sao_ctxt->i4_sao_blk_wd + 2);
735             s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
736 
737             pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
738 
739             ASSERT(
740                 (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
741                 (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
742                 (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
743                 (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
744             ASSERT(
745                 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
746                 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
747                 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
748                 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
749             ASSERT(
750                 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
751                 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
752                 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
753                 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
754             ASSERT(
755                 (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
756                 (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
757                 (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
758 
759             /* Copy the deblocked recon data to scratch buffer to do sao*/
760 
761             ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
762                 pu1_luma_scratch_buf,
763                 i4_luma_scratch_buf_stride,
764                 pu1_recon_luma,
765                 luma_recon_stride,
766                 SCRATCH_BUF_STRIDE,
767                 ctb_ht + 1);
768 
769             s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
770             s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
771 
772             s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
773             s_sao_ctxt.i1_slice_sao_chroma_flag = 0;
774 
775             ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
776 
777             /* Calculate the distortion between sao'ed ctb and original src ctb*/
778             // clang-format off
779             distortion =
780                 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
781                         s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
782                         s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht, NULL_PLANE);
783             // clang-format on
784 
785             ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
786             ctb_bits = ihevce_cabac_rdo_encode_sao(
787                 ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
788 
789             /* Calculate the cost as D+(lamda)*R   */
790             curr_cost = distortion +
791                         COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
792 
793             if(curr_cost < best_cost)
794             {
795                 best_cost = curr_cost;
796                 best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
797                 best_cand_idx = rdo_cand;
798                 curr_buf_idx = !curr_buf_idx;
799             }
800         }
801 
802         /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
803         * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
804         */
805         s_best_luma_chroma_cand.b3_y_type_idx =
806             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx;
807         s_best_luma_chroma_cand.u1_y_offset[1] =
808             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1];
809         s_best_luma_chroma_cand.u1_y_offset[2] =
810             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2];
811         s_best_luma_chroma_cand.u1_y_offset[3] =
812             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3];
813         s_best_luma_chroma_cand.u1_y_offset[4] =
814             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4];
815         s_best_luma_chroma_cand.b5_y_band_pos =
816             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos;
817     }
818     else
819     {
820         /*Back up the top pixels for (x,y+1)th ctb*/
821         if(!ps_sao_ctxt->i4_is_last_ctb_row)
822         {
823             memcpy(
824                 ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
825                 pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
826                 ps_sao_ctxt->i4_sao_blk_wd);
827         }
828 
829         s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE;
830         s_best_luma_chroma_cand.u1_y_offset[1] = 0;
831         s_best_luma_chroma_cand.u1_y_offset[2] = 0;
832         s_best_luma_chroma_cand.u1_y_offset[3] = 0;
833         s_best_luma_chroma_cand.u1_y_offset[4] = 0;
834         s_best_luma_chroma_cand.b5_y_band_pos = 0;
835         s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
836         s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
837 
838         s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
839         s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
840         s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
841         s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
842         s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
843         s_best_luma_chroma_cand.b5_cb_band_pos = 0;
844 
845         s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
846         s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
847         s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
848         s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
849         s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
850         s_best_luma_chroma_cand.b5_cr_band_pos = 0;
851     }
852     /*****************************************************/
853     /********************RDO FOR CHROMA CAND**************/
854     /*****************************************************/
855 #if !DISABLE_SAO_WHEN_NOISY
856     if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
857 #else
858     if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset)
859 #endif
860     {
861         /*Back up the top pixels for (x,y+1)th ctb*/
862         if(!ps_sao_ctxt->i4_is_last_ctb_row)
863         {
864             memcpy(
865                 ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
866                     ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
867                 pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
868                 ps_sao_ctxt->i4_sao_blk_wd);
869         }
870 
871         /* Reset the error and edge count*/
872         for(edgeidx = 0; edgeidx < 5; edgeidx++)
873         {
874             acc_error_category[edgeidx] = 0;
875             category_count[edgeidx] = 0;
876         }
877         // clang-format off
878         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt,
879                 s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category,
880                 category_count);
881         // clang-format on
882 
883         /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
884         * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
885         */
886         // clang-format off
887         s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
888         s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0]
889                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
890                 : 0;
891         s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1]
892                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
893                 : 0;
894         s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3]
895                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
896                 : 0;
897         s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4]
898                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
899                 : 0;
900         s_best_luma_chroma_cand.b5_cb_band_pos = 0;
901 
902         s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
903         s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0]
904                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
905                 : 0;
906         s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1]
907                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
908                 : 0;
909         s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3]
910                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
911                 : 0;
912         s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4]
913                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
914                 : 0;
915         // clang-format on
916         s_best_luma_chroma_cand.b5_cr_band_pos = 0;
917     }
918     else
919     {
920         /*Back up the top pixels for (x,y+1)th ctb*/
921         if(!ps_sao_ctxt->i4_is_last_ctb_row)
922         {
923             memcpy(
924                 ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
925                     ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
926                 pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
927                 ps_sao_ctxt->i4_sao_blk_wd);
928         }
929 
930         s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
931         s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
932         s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
933         s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
934         s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
935         s_best_luma_chroma_cand.b5_cb_band_pos = 0;
936 
937         s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
938         s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
939         s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
940         s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
941         s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
942         s_best_luma_chroma_cand.b5_cr_band_pos = 0;
943 
944         s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
945         s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
946     }
947 
948     s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
949     s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
950 
951     /*****************************************************/
952     /**RDO for Best Luma - Chroma combined, No SAO,*******/
953     /*************Left merge and Top merge****************/
954     /*****************************************************/
955 
956     /* No SAO cand*/
957     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
958     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
959 
960     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE;
961     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0;
962     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0;
963     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0;
964     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0;
965     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0;
966 
967     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE;
968     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0;
969     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0;
970     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0;
971     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0;
972     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0;
973 
974     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE;
975     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0;
976     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0;
977     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0;
978     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0;
979     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0;
980     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
981     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
982 
983     num_rdo_cand++;
984 
985     /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then
986     the standard mandates that the merge candidates must be set to unavailable.
987     Hence, check for tile boundary condition by reading
988     s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB.
989     A special case: Merge-candidates should be available at dependent-slices boundaries.
990     Search for <SAO_note_01> in workspace to know more */
991 
992 #if !DISABLE_SAO_WHEN_NOISY
993     if(1)
994 #else
995     if(!u1_force_no_offset)
996 #endif
997     {
998         /* Merge left cand*/
999         if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail)
1000         {
1001             memcpy(
1002                 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1003                 &ps_sao_ctxt->s_left_ctb_sao,
1004                 sizeof(sao_enc_t));
1005             ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1;
1006             ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
1007             num_rdo_cand++;
1008         }
1009 
1010         /* Merge top cand*/
1011         if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail)
1012         {
1013             memcpy(
1014                 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1015                 (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz),
1016                 sizeof(sao_enc_t));
1017             ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
1018             ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1;
1019             num_rdo_cand++;
1020         }
1021 
1022         /* Best luma-chroma candidate*/
1023         memcpy(
1024             &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1025             &s_best_luma_chroma_cand,
1026             sizeof(sao_enc_t));
1027         num_rdo_cand++;
1028     }
1029 
1030     {
1031         UWORD32 luma_distortion = 0, chroma_distortion = 0;
1032         /* First cand will be best cand after 1st iteration*/
1033         curr_buf_idx = 0;
1034         best_buf_idx = 1;
1035         best_cost = 0xFFFFFFFF;
1036         best_cand_idx = 0;
1037 
1038         for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++)
1039         {
1040             s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
1041 
1042             distortion = 0;
1043 
1044             /* This memcpy is required because cabac uses parameters from this structure
1045             * to evaluate bits and this structure ptr is sent to cabac through
1046             * "ihevce_cabac_rdo_encode_sao" function
1047             */
1048             memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
1049 
1050             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1051             {
1052                 /* Copy the left pixels to the scratch buffer for evry rdo cand because its
1053                 overwritten by the sao leaf level function for next ctb*/
1054                 memcpy(
1055                     s_sao_ctxt.au1_left_luma_scratch,
1056                     ps_sao_ctxt->au1_sao_src_left_luma,
1057                     ps_sao_ctxt->i4_sao_blk_ht);
1058 
1059                 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
1060                 overwritten by the sao leaf level function for next ctb*/
1061                 memcpy(
1062                     s_sao_ctxt.au1_top_luma_scratch,
1063                     ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
1064                     ps_sao_ctxt->i4_sao_blk_wd + 2);
1065                 s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
1066 
1067                 pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
1068 
1069                 /* Copy the deblocked recon data to scratch buffer to do sao*/
1070 
1071                 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1072                     pu1_luma_scratch_buf,
1073                     i4_luma_scratch_buf_stride,
1074                     pu1_recon_luma,
1075                     luma_recon_stride,
1076                     SCRATCH_BUF_STRIDE,
1077                     ctb_ht + 1);
1078                 s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
1079                 s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
1080 
1081                 ASSERT(
1082                     (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
1083                     (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
1084                     (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
1085                     (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
1086             }
1087             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1088             {
1089                 /* Copy the left pixels to the scratch buffer for evry rdo cand because its
1090                 overwritten by the sao leaf level function for next ctb*/
1091                 memcpy(
1092                     s_sao_ctxt.au1_left_chroma_scratch,
1093                     ps_sao_ctxt->au1_sao_src_left_chroma,
1094                     (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2);
1095 
1096                 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
1097                 overwritten by the sao leaf level function for next ctb*/
1098                 memcpy(
1099                     s_sao_ctxt.au1_top_chroma_scratch,
1100                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2,
1101                     ps_sao_ctxt->i4_sao_blk_wd + 4);
1102 
1103                 s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2;
1104 
1105                 pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx];
1106 
1107                 /* Copy the deblocked recon data to scratch buffer to do sao*/
1108 
1109                 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1110                     pu1_chroma_scratch_buf,
1111                     i4_chroma_scratch_buf_stride,
1112                     pu1_recon_chroma,
1113                     chroma_recon_stride,
1114                     SCRATCH_BUF_STRIDE,
1115                     (ctb_ht >> !u1_is_422) + 1);
1116 
1117                 s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf;
1118                 s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride;
1119 
1120                 ASSERT(
1121                     (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
1122                     (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
1123                     (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
1124                     (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
1125                 ASSERT(
1126                     (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
1127                     (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
1128                     (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
1129                     (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
1130             }
1131 
1132             ASSERT(
1133                 (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
1134                 (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
1135                 (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
1136 
1137             s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
1138             s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag;
1139 
1140             ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
1141 
1142             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1143             {  // clang-format off
1144                 luma_distortion =
1145                     ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
1146                             s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
1147                             s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd,
1148                             ctb_ht,
1149                             NULL_PLANE);
1150             }  // clang-format on
1151 
1152             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1153             {  // clang-format off
1154                 chroma_distortion =
1155                     ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma,
1156                             s_sao_ctxt.pu1_cur_chroma_recon_buf,
1157                             chroma_src_stride,
1158                             s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd,
1159                             (ctb_ht >> !u1_is_422),
1160                             NULL_PLANE);
1161             }  // clang-format on
1162 
1163             /*chroma distortion is added after correction because of lambda difference*/
1164             distortion =
1165                 luma_distortion +
1166                 (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf));
1167 
1168             ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
1169             ctb_bits = ihevce_cabac_rdo_encode_sao(
1170                 ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
1171 
1172             /* Calculate the cost as D+(lamda)*R   */
1173             curr_cost = distortion +
1174                         COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
1175 
1176             if(curr_cost < best_cost)
1177             {
1178                 best_ctb_sao_bits = ctb_bits;
1179                 best_cost = curr_cost;
1180                 best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1181                 best_cand_idx = rdo_cand;
1182                 curr_buf_idx = !curr_buf_idx;
1183             }
1184         }
1185         /*Adding sao bits to header bits*/
1186         *pu4_frame_rdopt_header_bits = best_ctb_sao_bits;
1187 
1188         ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx);
1189 
1190         /* store the sao parameters of curr ctb for top merge and left merge*/
1191         memcpy(
1192             ps_sao_ctxt->ps_top_ctb_sao,
1193             &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1194             sizeof(sao_enc_t));
1195         memcpy(
1196             &ps_sao_ctxt->s_left_ctb_sao,
1197             &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1198             sizeof(sao_enc_t));
1199 
1200         /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/
1201         memcpy(
1202             &ps_ctb_enc_loop_out->s_sao,
1203             &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1204             sizeof(sao_enc_t));
1205 
1206         if(!ps_sao_ctxt->i4_is_last_ctb_col)
1207         {
1208             /* Update left luma buffer for next ctb */
1209             for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++)
1210             {
1211                 ps_sao_ctxt->au1_sao_src_left_luma[row] =
1212                     ps_sao_ctxt->pu1_cur_luma_recon_buf
1213                         [row * ps_sao_ctxt->i4_cur_luma_recon_stride +
1214                          (ps_sao_ctxt->i4_sao_blk_wd - 1)];
1215             }
1216         }
1217 
1218         if(!ps_sao_ctxt->i4_is_last_ctb_col)
1219         {
1220             /* Update left chroma buffer for next ctb */
1221             for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++)
1222             {
1223                 *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) =
1224                     *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf +
1225                                  row * ps_sao_ctxt->i4_cur_chroma_recon_stride +
1226                                  (ps_sao_ctxt->i4_sao_blk_wd - 2));
1227             }
1228         }
1229 
1230         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1231         {
1232             /* Copy the sao'ed output of the best candidate to the recon buffer*/
1233 
1234             ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1235                 ps_sao_ctxt->pu1_cur_luma_recon_buf,
1236                 ps_sao_ctxt->i4_cur_luma_recon_stride,
1237                 ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx],
1238                 i4_luma_scratch_buf_stride,
1239                 ctb_wd,
1240                 ctb_ht);
1241         }
1242         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1243         {
1244             /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/
1245 
1246             ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1247                 ps_sao_ctxt->pu1_cur_chroma_recon_buf,
1248                 ps_sao_ctxt->i4_cur_chroma_recon_stride,
1249                 ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx],
1250                 i4_chroma_scratch_buf_stride,
1251                 ctb_wd,
1252                 ctb_ht >> !u1_is_422);
1253         }
1254     }
1255 }
1256