1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 *  ihevc_sao.c
22 *
23 * @brief
24 *  Contains leaf level function definitions for sample adaptive offset process
25 *
26 * @author
27 *  Srinivas T
28 *
29 * @par List of Functions:
30 *   - ihevc_sao_band_offset_luma()
31 *   - ihevc_sao_band_offset_chroma()
32 *   - ihevc_sao_edge_offset_class0()
33 *   - ihevc_sao_edge_offset_class0_chroma()
34 *   - ihevc_sao_edge_offset_class1()
35 *   - ihevc_sao_edge_offset_class1_chroma()
36 *   - ihevc_sao_edge_offset_class2()
37 *   - ihevc_sao_edge_offset_class2_chroma()
38 *   - ihevc_sao_edge_offset_class3()
39 *   - ihevc_sao_edge_offset_class3_chroma()
40 * @remarks
41 *  None
42 *
43 *******************************************************************************
44 */
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <string.h>
48 #include "ihevc_typedefs.h"
49 #include "ihevc_macros.h"
50 #include "ihevc_platform_macros.h"
51 #include "ihevc_func_selector.h"
52 #include "ihevc_defs.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_sao.h"
55 
56 #define NUM_BAND_TABLE  32
57 
58 const WORD32 gi4_ihevc_table_edge_idx[5] = { 1, 2, 0, 3, 4 };
59 /**
60  * au4_avail is an array of flags - one for each neighboring block specifying if the block is available
61  * au4_avail[0] - left
62  * au4_avail[1] - right
63  * au4_avail[2] - top
64  * au4_avail[3] - bottom
65  * au4_avail[4] - top-left
66  * au4_avail[5] - top-right
67  * au4_avail[6] - bottom-left
68  * au4_avail[7] - bottom-right
69  */
70 
71 
ihevc_sao_band_offset_luma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,WORD32 sao_band_pos,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)72 void ihevc_sao_band_offset_luma(UWORD8 *pu1_src,
73                                 WORD32 src_strd,
74                                 UWORD8 *pu1_src_left,
75                                 UWORD8 *pu1_src_top,
76                                 UWORD8 *pu1_src_top_left,
77                                 WORD32 sao_band_pos,
78                                 WORD8 *pi1_sao_offset,
79                                 WORD32 wd,
80                                 WORD32 ht)
81 {
82     WORD32 band_shift;
83     WORD32 band_table[NUM_BAND_TABLE];
84     WORD32 i;
85     WORD32 row, col;
86 
87     /* Updating left and top and top-left */
88     for(row = 0; row < ht; row++)
89     {
90         pu1_src_left[row] = pu1_src[row * src_strd + (wd - 1)];
91     }
92     pu1_src_top_left[0] = pu1_src_top[wd - 1];
93     for(col = 0; col < wd; col++)
94     {
95         pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
96     }
97 
98     band_shift = BIT_DEPTH_LUMA - 5;
99     for(i = 0; i < NUM_BAND_TABLE; i++)
100     {
101         band_table[i] = 0;
102     }
103     for(i = 0; i < 4; i++)
104     {
105         band_table[(i + sao_band_pos) & 31] = i + 1;
106     }
107 
108     for(row = 0; row < ht; row++)
109     {
110         for(col = 0; col < wd; col++)
111         {
112             WORD32 band_idx;
113 
114             band_idx = band_table[pu1_src[col] >> band_shift];
115             pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
116         }
117         pu1_src += src_strd;
118     }
119 }
120 
121 
122 
123 /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_band_offset_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,WORD32 sao_band_pos_u,WORD32 sao_band_pos_v,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)124 void ihevc_sao_band_offset_chroma(UWORD8 *pu1_src,
125                                   WORD32 src_strd,
126                                   UWORD8 *pu1_src_left,
127                                   UWORD8 *pu1_src_top,
128                                   UWORD8 *pu1_src_top_left,
129                                   WORD32 sao_band_pos_u,
130                                   WORD32 sao_band_pos_v,
131                                   WORD8 *pi1_sao_offset_u,
132                                   WORD8 *pi1_sao_offset_v,
133                                   WORD32 wd,
134                                   WORD32 ht)
135 {
136     WORD32 band_shift;
137     WORD32 band_table_u[NUM_BAND_TABLE];
138     WORD32 band_table_v[NUM_BAND_TABLE];
139     WORD32 i;
140     WORD32 row, col;
141 
142     /* Updating left and top and top-left */
143     for(row = 0; row < ht; row++)
144     {
145         pu1_src_left[2 * row] = pu1_src[row * src_strd + (wd - 2)];
146         pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + (wd - 1)];
147     }
148     pu1_src_top_left[0] = pu1_src_top[wd - 2];
149     pu1_src_top_left[1] = pu1_src_top[wd - 1];
150     for(col = 0; col < wd; col++)
151     {
152         pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
153     }
154 
155 
156     band_shift = BIT_DEPTH_CHROMA - 5;
157     for(i = 0; i < NUM_BAND_TABLE; i++)
158     {
159         band_table_u[i] = 0;
160         band_table_v[i] = 0;
161     }
162     for(i = 0; i < 4; i++)
163     {
164         band_table_u[(i + sao_band_pos_u) & 31] = i + 1;
165         band_table_v[(i + sao_band_pos_v) & 31] = i + 1;
166     }
167 
168     for(row = 0; row < ht; row++)
169     {
170         for(col = 0; col < wd; col++)
171         {
172             WORD32 band_idx;
173             WORD8 *pi1_sao_offset;
174 
175             pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
176             band_idx = (0 == col % 2) ? band_table_u[pu1_src[col] >> band_shift] : band_table_v[pu1_src[col] >> band_shift];
177             pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
178         }
179         pu1_src += src_strd;
180     }
181 }
182 
183 
184 
185 /* Horizontal filtering */
ihevc_sao_edge_offset_class0(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)186 void ihevc_sao_edge_offset_class0(UWORD8 *pu1_src,
187                                   WORD32 src_strd,
188                                   UWORD8 *pu1_src_left,
189                                   UWORD8 *pu1_src_top,
190                                   UWORD8 *pu1_src_top_left,
191                                   UWORD8 *pu1_src_top_right,
192                                   UWORD8 *pu1_src_bot_left,
193                                   UWORD8 *pu1_avail,
194                                   WORD8 *pi1_sao_offset,
195                                   WORD32 wd,
196                                   WORD32 ht)
197 {
198     WORD32 row, col;
199     UWORD8 au1_mask[MAX_CTB_SIZE];
200     UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
201     WORD8 u1_sign_left, u1_sign_right;
202     WORD32 bit_depth;
203     UNUSED(pu1_src_top_right);
204     UNUSED(pu1_src_bot_left);
205     bit_depth = BIT_DEPTH_LUMA;
206 
207     /* Initialize the mask values */
208     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
209 
210     /* Update top and top-left arrays */
211     *pu1_src_top_left = pu1_src_top[wd - 1];
212     for(row = 0; row < ht; row++)
213     {
214         au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
215     }
216     for(col = 0; col < wd; col++)
217     {
218         pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
219     }
220 
221     /* Update masks based on the availability flags */
222     if(0 == pu1_avail[0])
223     {
224         au1_mask[0] = 0;
225     }
226     if(0 == pu1_avail[1])
227     {
228         au1_mask[wd - 1] = 0;
229     }
230 
231     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
232     {
233         for(row = 0; row < ht; row++)
234         {
235             u1_sign_left = SIGN(pu1_src[0] - pu1_src_left[row]);
236             for(col = 0; col < wd; col++)
237             {
238                 WORD32 edge_idx;
239 
240                 u1_sign_right = SIGN(pu1_src[col] - pu1_src[col + 1]);
241                 edge_idx = 2 + u1_sign_left + u1_sign_right;
242                 u1_sign_left = -u1_sign_right;
243 
244                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
245 
246                 if(0 != edge_idx)
247                 {
248                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
249                 }
250             }
251 
252             pu1_src += src_strd;
253         }
254     }
255 
256     /* Update left array */
257     for(row = 0; row < ht; row++)
258     {
259         pu1_src_left[row] = au1_src_left_tmp[row];
260     }
261 
262 }
263 
264 
265 
266 
267 /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_edge_offset_class0_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)268 void ihevc_sao_edge_offset_class0_chroma(UWORD8 *pu1_src,
269                                          WORD32 src_strd,
270                                          UWORD8 *pu1_src_left,
271                                          UWORD8 *pu1_src_top,
272                                          UWORD8 *pu1_src_top_left,
273                                          UWORD8 *pu1_src_top_right,
274                                          UWORD8 *pu1_src_bot_left,
275                                          UWORD8 *pu1_avail,
276                                          WORD8 *pi1_sao_offset_u,
277                                          WORD8 *pi1_sao_offset_v,
278                                          WORD32 wd,
279                                          WORD32 ht)
280 {
281     WORD32 row, col;
282     UWORD8 au1_mask[MAX_CTB_SIZE];
283     UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE];
284     WORD8 u1_sign_left_u, u1_sign_right_u;
285     WORD8 u1_sign_left_v, u1_sign_right_v;
286     WORD32 bit_depth;
287     UNUSED(pu1_src_top_right);
288     UNUSED(pu1_src_bot_left);
289     bit_depth = BIT_DEPTH_CHROMA;
290 
291     /* Initialize the mask values */
292     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
293 
294     /* Update left, top and top-left arrays */
295     pu1_src_top_left[0] = pu1_src_top[wd - 2];
296     pu1_src_top_left[1] = pu1_src_top[wd - 1];
297     for(row = 0; row < ht; row++)
298     {
299         au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
300         au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
301     }
302     for(col = 0; col < wd; col++)
303     {
304         pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
305     }
306 
307     /* Update masks based on the availability flags */
308     if(0 == pu1_avail[0])
309     {
310         au1_mask[0] = 0;
311     }
312     if(0 == pu1_avail[1])
313     {
314         au1_mask[(wd - 1) >> 1] = 0;
315     }
316 
317     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
318     {
319         for(row = 0; row < ht; row++)
320         {
321             u1_sign_left_u = SIGN(pu1_src[0] - pu1_src_left[2 * row]);
322             u1_sign_left_v = SIGN(pu1_src[1] - pu1_src_left[2 * row + 1]);
323             for(col = 0; col < wd; col++)
324             {
325                 WORD32 edge_idx;
326                 WORD8 *pi1_sao_offset;
327 
328                 if(0 == col % 2)
329                 {
330                     pi1_sao_offset = pi1_sao_offset_u;
331                     u1_sign_right_u = SIGN(pu1_src[col] - pu1_src[col + 2]);
332                     edge_idx = 2 + u1_sign_left_u + u1_sign_right_u;
333                     u1_sign_left_u = -u1_sign_right_u;
334                 }
335                 else
336                 {
337                     pi1_sao_offset = pi1_sao_offset_v;
338                     u1_sign_right_v = SIGN(pu1_src[col] - pu1_src[col + 2]);
339                     edge_idx = 2 + u1_sign_left_v + u1_sign_right_v;
340                     u1_sign_left_v = -u1_sign_right_v;
341                 }
342 
343                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
344 
345                 if(0 != edge_idx)
346                 {
347                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
348                 }
349             }
350 
351             pu1_src += src_strd;
352         }
353     }
354 
355     for(row = 0; row < 2 * ht; row++)
356     {
357         pu1_src_left[row] = au1_src_left_tmp[row];
358     }
359 
360 }
361 
362 
363 
364 /* Vertical filtering */
ihevc_sao_edge_offset_class1(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)365 void ihevc_sao_edge_offset_class1(UWORD8 *pu1_src,
366                                   WORD32 src_strd,
367                                   UWORD8 *pu1_src_left,
368                                   UWORD8 *pu1_src_top,
369                                   UWORD8 *pu1_src_top_left,
370                                   UWORD8 *pu1_src_top_right,
371                                   UWORD8 *pu1_src_bot_left,
372                                   UWORD8 *pu1_avail,
373                                   WORD8 *pi1_sao_offset,
374                                   WORD32 wd,
375                                   WORD32 ht)
376 {
377     WORD32 row, col;
378     UWORD8 au1_mask[MAX_CTB_SIZE];
379     UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
380     WORD8 au1_sign_up[MAX_CTB_SIZE];
381     WORD8 u1_sign_down;
382     WORD32 bit_depth;
383     UNUSED(pu1_src_top_right);
384     UNUSED(pu1_src_bot_left);
385 
386     bit_depth = BIT_DEPTH_LUMA;
387 
388     /* Initialize the mask values */
389     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
390 
391     /* Update left, top and top-left arrays */
392     *pu1_src_top_left = pu1_src_top[wd - 1];
393     for(row = 0; row < ht; row++)
394     {
395         pu1_src_left[row] = pu1_src[row * src_strd + wd - 1];
396     }
397     for(col = 0; col < wd; col++)
398     {
399         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
400     }
401 
402     /* Update height and source pointers based on the availability flags */
403     if(0 == pu1_avail[2])
404     {
405         pu1_src += src_strd;
406         ht--;
407         for(col = 0; col < wd; col++)
408         {
409             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
410         }
411     }
412     else
413     {
414         for(col = 0; col < wd; col++)
415         {
416             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
417         }
418     }
419     if(0 == pu1_avail[3])
420     {
421         ht--;
422     }
423 
424     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
425     {
426         for(row = 0; row < ht; row++)
427         {
428             for(col = 0; col < wd; col++)
429             {
430                 WORD32 edge_idx;
431 
432                 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
433                 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
434                 au1_sign_up[col] = -u1_sign_down;
435 
436                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
437 
438                 if(0 != edge_idx)
439                 {
440                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
441                 }
442             }
443 
444             pu1_src += src_strd;
445         }
446     }
447 
448     for(col = 0; col < wd; col++)
449     {
450         pu1_src_top[col] = au1_src_top_tmp[col];
451     }
452 
453 }
454 
455 
456 
457 /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_edge_offset_class1_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)458 void ihevc_sao_edge_offset_class1_chroma(UWORD8 *pu1_src,
459                                          WORD32 src_strd,
460                                          UWORD8 *pu1_src_left,
461                                          UWORD8 *pu1_src_top,
462                                          UWORD8 *pu1_src_top_left,
463                                          UWORD8 *pu1_src_top_right,
464                                          UWORD8 *pu1_src_bot_left,
465                                          UWORD8 *pu1_avail,
466                                          WORD8 *pi1_sao_offset_u,
467                                          WORD8 *pi1_sao_offset_v,
468                                          WORD32 wd,
469                                          WORD32 ht)
470 {
471     WORD32 row, col;
472     UWORD8 au1_mask[MAX_CTB_SIZE];
473     UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
474     WORD8 au1_sign_up[MAX_CTB_SIZE];
475     WORD8 u1_sign_down;
476     WORD32 bit_depth;
477     UNUSED(pu1_src_top_right);
478     UNUSED(pu1_src_bot_left);
479 
480     bit_depth = BIT_DEPTH_CHROMA;
481 
482     /* Initialize the mask values */
483     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
484 
485     /* Update left, top and top-left arrays */
486     pu1_src_top_left[0] = pu1_src_top[wd - 2];
487     pu1_src_top_left[1] = pu1_src_top[wd - 1];
488     for(row = 0; row < ht; row++)
489     {
490         pu1_src_left[2 * row] = pu1_src[row * src_strd + wd - 2];
491         pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
492     }
493     for(col = 0; col < wd; col++)
494     {
495         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
496     }
497 
498     /* Update height and source pointers based on the availability flags */
499     if(0 == pu1_avail[2])
500     {
501         pu1_src += src_strd;
502         ht--;
503         for(col = 0; col < wd; col++)
504         {
505             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
506         }
507     }
508     else
509     {
510         for(col = 0; col < wd; col++)
511         {
512             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
513         }
514     }
515     if(0 == pu1_avail[3])
516     {
517         ht--;
518     }
519 
520     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
521     {
522         for(row = 0; row < ht; row++)
523         {
524             for(col = 0; col < wd; col++)
525             {
526                 WORD32 edge_idx;
527                 WORD8 *pi1_sao_offset;
528 
529                 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
530 
531                 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
532                 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
533                 au1_sign_up[col] = -u1_sign_down;
534 
535                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
536 
537                 if(0 != edge_idx)
538                 {
539                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
540                 }
541             }
542 
543             pu1_src += src_strd;
544         }
545     }
546 
547     for(col = 0; col < wd; col++)
548     {
549         pu1_src_top[col] = au1_src_top_tmp[col];
550     }
551 
552 }
553 
554 
555 
556 /* 135 degree filtering */
ihevc_sao_edge_offset_class2(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)557 void ihevc_sao_edge_offset_class2(UWORD8 *pu1_src,
558                                   WORD32 src_strd,
559                                   UWORD8 *pu1_src_left,
560                                   UWORD8 *pu1_src_top,
561                                   UWORD8 *pu1_src_top_left,
562                                   UWORD8 *pu1_src_top_right,
563                                   UWORD8 *pu1_src_bot_left,
564                                   UWORD8 *pu1_avail,
565                                   WORD8 *pi1_sao_offset,
566                                   WORD32 wd,
567                                   WORD32 ht)
568 {
569     WORD32 row, col;
570     UWORD8 au1_mask[MAX_CTB_SIZE];
571     UWORD8 au1_src_left_tmp[MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
572     UWORD8 u1_src_top_left_tmp;
573     WORD8 au1_sign_up[MAX_CTB_SIZE + 1], au1_sign_up_tmp[MAX_CTB_SIZE + 1];
574     WORD8 u1_sign_down;
575     WORD8 *pu1_sign_up;
576     WORD8 *pu1_sign_up_tmp;
577     UWORD8 *pu1_src_left_cpy;
578 
579     WORD32 bit_depth;
580     UWORD8 u1_pos_0_0_tmp;
581     UWORD8 u1_pos_wd_ht_tmp;
582     UNUSED(pu1_src_top_right);
583     UNUSED(pu1_src_bot_left);
584 
585     bit_depth = BIT_DEPTH_LUMA;
586     pu1_sign_up = au1_sign_up;
587     pu1_sign_up_tmp = au1_sign_up_tmp;
588     pu1_src_left_cpy = pu1_src_left;
589 
590     /* Initialize the mask values */
591     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
592 
593     /* Update left, top and top-left arrays */
594     u1_src_top_left_tmp = pu1_src_top[wd - 1];
595     for(row = 0; row < ht; row++)
596     {
597         au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
598     }
599     for(col = 0; col < wd; col++)
600     {
601         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
602     }
603 
604 
605     /* If top-left is available, process separately */
606     if(0 != pu1_avail[4])
607     {
608         WORD32 edge_idx;
609 
610         edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
611                         SIGN(pu1_src[0] - pu1_src[1 + src_strd]);
612 
613         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
614 
615         if(0 != edge_idx)
616         {
617             u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
618         }
619         else
620         {
621             u1_pos_0_0_tmp = pu1_src[0];
622         }
623     }
624     else
625     {
626         u1_pos_0_0_tmp = pu1_src[0];
627     }
628 
629     /* If bottom-right is available, process separately */
630     if(0 != pu1_avail[7])
631     {
632         WORD32 edge_idx;
633 
634         edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 1 - src_strd]) +
635                         SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 1 + src_strd]);
636 
637         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
638 
639         if(0 != edge_idx)
640         {
641             u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
642         }
643         else
644         {
645             u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
646         }
647     }
648     else
649     {
650         u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
651     }
652 
653     /* If Left is not available */
654     if(0 == pu1_avail[0])
655     {
656         au1_mask[0] = 0;
657     }
658 
659     /* If Top is not available */
660     if(0 == pu1_avail[2])
661     {
662         pu1_src += src_strd;
663         ht--;
664         pu1_src_left_cpy += 1;
665         for(col = 1; col < wd; col++)
666         {
667             pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 1 - src_strd]);
668         }
669     }
670     else
671     {
672         for(col = 1; col < wd; col++)
673         {
674             pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 1]);
675         }
676     }
677 
678     /* If Right is not available */
679     if(0 == pu1_avail[1])
680     {
681         au1_mask[wd - 1] = 0;
682     }
683 
684     /* If Bottom is not available */
685     if(0 == pu1_avail[3])
686     {
687         ht--;
688     }
689 
690     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
691     {
692         for(row = 0; row < ht; row++)
693         {
694             pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[row - 1]);
695             for(col = 0; col < wd; col++)
696             {
697                 WORD32 edge_idx;
698 
699                 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 1 + src_strd]);
700                 edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
701                 pu1_sign_up_tmp[col + 1] = -u1_sign_down;
702 
703                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
704 
705                 if(0 != edge_idx)
706                 {
707                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
708                 }
709             }
710 
711             /* Swapping pu1_sign_up_tmp and pu1_sign_up */
712             {
713                 WORD8 *pu1_swap_tmp = pu1_sign_up;
714                 pu1_sign_up = pu1_sign_up_tmp;
715                 pu1_sign_up_tmp = pu1_swap_tmp;
716             }
717 
718             pu1_src += src_strd;
719         }
720 
721         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp;
722         pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp;
723     }
724 
725     if(0 == pu1_avail[2])
726         ht++;
727     if(0 == pu1_avail[3])
728         ht++;
729     *pu1_src_top_left = u1_src_top_left_tmp;
730     for(row = 0; row < ht; row++)
731     {
732         pu1_src_left[row] = au1_src_left_tmp[row];
733     }
734     for(col = 0; col < wd; col++)
735     {
736         pu1_src_top[col] = au1_src_top_tmp[col];
737     }
738 
739 }
740 
741 
742 
743 
744 /* 135 degree filtering */
ihevc_sao_edge_offset_class2_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)745 void ihevc_sao_edge_offset_class2_chroma(UWORD8 *pu1_src,
746                                          WORD32 src_strd,
747                                          UWORD8 *pu1_src_left,
748                                          UWORD8 *pu1_src_top,
749                                          UWORD8 *pu1_src_top_left,
750                                          UWORD8 *pu1_src_top_right,
751                                          UWORD8 *pu1_src_bot_left,
752                                          UWORD8 *pu1_avail,
753                                          WORD8 *pi1_sao_offset_u,
754                                          WORD8 *pi1_sao_offset_v,
755                                          WORD32 wd,
756                                          WORD32 ht)
757 {
758     WORD32 row, col;
759     UWORD8 au1_mask[MAX_CTB_SIZE];
760     UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
761     UWORD8 au1_src_top_left_tmp[2];
762     WORD8 au1_sign_up[MAX_CTB_SIZE + 2], au1_sign_up_tmp[MAX_CTB_SIZE + 2];
763     WORD8 u1_sign_down;
764     WORD8 *pu1_sign_up;
765     WORD8 *pu1_sign_up_tmp;
766     UWORD8 *pu1_src_left_cpy;
767 
768     WORD32 bit_depth;
769 
770     UWORD8 u1_pos_0_0_tmp_u;
771     UWORD8 u1_pos_0_0_tmp_v;
772     UWORD8 u1_pos_wd_ht_tmp_u;
773     UWORD8 u1_pos_wd_ht_tmp_v;
774     UNUSED(pu1_src_top_right);
775     UNUSED(pu1_src_bot_left);
776 
777 
778     bit_depth = BIT_DEPTH_CHROMA;
779     pu1_sign_up = au1_sign_up;
780     pu1_sign_up_tmp = au1_sign_up_tmp;
781     pu1_src_left_cpy = pu1_src_left;
782 
783     /* Initialize the mask values */
784     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
785 
786     /* Update left, top and top-left arrays */
787     au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
788     au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
789     for(row = 0; row < ht; row++)
790     {
791         au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
792         au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
793     }
794     for(col = 0; col < wd; col++)
795     {
796         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
797     }
798 
799 
800     /* If top-left is available, process separately */
801     if(0 != pu1_avail[4])
802     {
803         WORD32 edge_idx;
804 
805         /* U */
806         edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
807                         SIGN(pu1_src[0] - pu1_src[2 + src_strd]);
808 
809         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
810 
811         if(0 != edge_idx)
812         {
813             u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
814         }
815         else
816         {
817             u1_pos_0_0_tmp_u = pu1_src[0];
818         }
819 
820         /* V */
821         edge_idx = 2 + SIGN(pu1_src[1] - pu1_src_top_left[1]) +
822                         SIGN(pu1_src[1] - pu1_src[1 + 2 + src_strd]);
823 
824         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
825 
826         if(0 != edge_idx)
827         {
828             u1_pos_0_0_tmp_v = CLIP3(pu1_src[1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
829         }
830         else
831         {
832             u1_pos_0_0_tmp_v = pu1_src[1];
833         }
834     }
835     else
836     {
837         u1_pos_0_0_tmp_u = pu1_src[0];
838         u1_pos_0_0_tmp_v = pu1_src[1];
839     }
840 
841     /* If bottom-right is available, process separately */
842     if(0 != pu1_avail[7])
843     {
844         WORD32 edge_idx;
845 
846         /* U */
847         edge_idx = 2 + SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd]) +
848                         SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd]);
849 
850         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
851 
852         if(0 != edge_idx)
853         {
854             u1_pos_wd_ht_tmp_u = CLIP3(pu1_src[wd - 2 + (ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
855         }
856         else
857         {
858             u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
859         }
860 
861         /* V */
862         edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd]) +
863                         SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd]);
864 
865         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
866 
867         if(0 != edge_idx)
868         {
869             u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
870         }
871         else
872         {
873             u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
874         }
875     }
876     else
877     {
878         u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
879         u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
880     }
881 
882     /* If Left is not available */
883     if(0 == pu1_avail[0])
884     {
885         au1_mask[0] = 0;
886     }
887 
888     /* If Top is not available */
889     if(0 == pu1_avail[2])
890     {
891         pu1_src += src_strd;
892         pu1_src_left_cpy += 2;
893         ht--;
894         for(col = 2; col < wd; col++)
895         {
896             pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 2 - src_strd]);
897         }
898     }
899     else
900     {
901         for(col = 2; col < wd; col++)
902         {
903             pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 2]);
904         }
905     }
906 
907     /* If Right is not available */
908     if(0 == pu1_avail[1])
909     {
910         au1_mask[(wd - 1) >> 1] = 0;
911     }
912 
913     /* If Bottom is not available */
914     if(0 == pu1_avail[3])
915     {
916         ht--;
917     }
918 
919     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
920     {
921         for(row = 0; row < ht; row++)
922         {
923             pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[2 * (row - 1)]);
924             pu1_sign_up[1] = SIGN(pu1_src[1] - pu1_src_left_cpy[2 * (row - 1) + 1]);
925             for(col = 0; col < wd; col++)
926             {
927                 WORD32 edge_idx;
928                 WORD8 *pi1_sao_offset;
929 
930                 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
931 
932                 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 2 + src_strd]);
933                 edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
934                 pu1_sign_up_tmp[col + 2] = -u1_sign_down;
935 
936                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
937 
938                 if(0 != edge_idx)
939                 {
940                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
941                 }
942             }
943 
944             /* Swapping pu1_sign_up_tmp and pu1_sign_up */
945             {
946                 WORD8 *pu1_swap_tmp = pu1_sign_up;
947                 pu1_sign_up = pu1_sign_up_tmp;
948                 pu1_sign_up_tmp = pu1_swap_tmp;
949             }
950 
951             pu1_src += src_strd;
952         }
953 
954         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp_u;
955         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + 1] = u1_pos_0_0_tmp_v;
956         pu1_src[(pu1_avail[3] ? wd - 2 - src_strd : wd - 2)] = u1_pos_wd_ht_tmp_u;
957         pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp_v;
958     }
959 
960     if(0 == pu1_avail[2])
961         ht++;
962     if(0 == pu1_avail[3])
963         ht++;
964     pu1_src_top_left[0] = au1_src_top_left_tmp[0];
965     pu1_src_top_left[1] = au1_src_top_left_tmp[1];
966     for(row = 0; row < 2 * ht; row++)
967     {
968         pu1_src_left[row] = au1_src_left_tmp[row];
969     }
970     for(col = 0; col < wd; col++)
971     {
972         pu1_src_top[col] = au1_src_top_tmp[col];
973     }
974 
975 }
976 
977 
978 
979 
980 /* 45 degree filtering */
ihevc_sao_edge_offset_class3(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)981 void ihevc_sao_edge_offset_class3(UWORD8 *pu1_src,
982                                   WORD32 src_strd,
983                                   UWORD8 *pu1_src_left,
984                                   UWORD8 *pu1_src_top,
985                                   UWORD8 *pu1_src_top_left,
986                                   UWORD8 *pu1_src_top_right,
987                                   UWORD8 *pu1_src_bot_left,
988                                   UWORD8 *pu1_avail,
989                                   WORD8 *pi1_sao_offset,
990                                   WORD32 wd,
991                                   WORD32 ht)
992 {
993     WORD32 row, col;
994     UWORD8 au1_mask[MAX_CTB_SIZE];
995     UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
996     UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
997     UWORD8 u1_src_top_left_tmp;
998     WORD8 au1_sign_up[MAX_CTB_SIZE];
999     UWORD8 *pu1_src_left_cpy;
1000     WORD8 u1_sign_down;
1001     WORD32 bit_depth;
1002 
1003     UWORD8 u1_pos_0_ht_tmp;
1004     UWORD8 u1_pos_wd_0_tmp;
1005 
1006     bit_depth = BIT_DEPTH_LUMA;
1007     pu1_src_left_cpy = pu1_src_left;
1008 
1009     /* Initialize the mask values */
1010     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1011 
1012     /* Update left, top and top-left arrays */
1013     u1_src_top_left_tmp = pu1_src_top[wd - 1];
1014     for(row = 0; row < ht; row++)
1015     {
1016         au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
1017     }
1018     for(col = 0; col < wd; col++)
1019     {
1020         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1021     }
1022 
1023     /* If top-right is available, process separately */
1024     if(0 != pu1_avail[5])
1025     {
1026         WORD32 edge_idx;
1027 
1028         edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) +
1029                         SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]);
1030 
1031         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1032 
1033         if(0 != edge_idx)
1034         {
1035             u1_pos_wd_0_tmp = CLIP3(pu1_src[wd - 1] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1036         }
1037         else
1038         {
1039             u1_pos_wd_0_tmp = pu1_src[wd - 1];
1040         }
1041     }
1042     else
1043     {
1044         u1_pos_wd_0_tmp = pu1_src[wd - 1];
1045     }
1046 
1047     /* If bottom-left is available, process separately */
1048     if(0 != pu1_avail[6])
1049     {
1050         WORD32 edge_idx;
1051 
1052         edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd]) +
1053                         SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1054 
1055         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1056 
1057         if(0 != edge_idx)
1058         {
1059             u1_pos_0_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1060         }
1061         else
1062         {
1063             u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1064         }
1065     }
1066     else
1067     {
1068         u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1069     }
1070 
1071     /* If Left is not available */
1072     if(0 == pu1_avail[0])
1073     {
1074         au1_mask[0] = 0;
1075     }
1076 
1077     /* If Top is not available */
1078     if(0 == pu1_avail[2])
1079     {
1080         pu1_src += src_strd;
1081         ht--;
1082         pu1_src_left_cpy += 1;
1083         for(col = 0; col < wd - 1; col++)
1084         {
1085             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 1 - src_strd]);
1086         }
1087     }
1088     else
1089     {
1090         for(col = 0; col < wd - 1; col++)
1091         {
1092             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 1]);
1093         }
1094     }
1095 
1096     /* If Right is not available */
1097     if(0 == pu1_avail[1])
1098     {
1099         au1_mask[wd - 1] = 0;
1100     }
1101 
1102     /* If Bottom is not available */
1103     if(0 == pu1_avail[3])
1104     {
1105         ht--;
1106     }
1107 
1108     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1109     {
1110         for(row = 0; row < ht; row++)
1111         {
1112             au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 1 - src_strd]);
1113             for(col = 0; col < wd; col++)
1114             {
1115                 WORD32 edge_idx;
1116 
1117                 u1_sign_down = SIGN(pu1_src[col] - ((col == 0) ? pu1_src_left_cpy[row + 1] :
1118                                                                  pu1_src[col - 1 + src_strd]));
1119                 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1120                 if(col > 0)
1121                     au1_sign_up[col - 1] = -u1_sign_down;
1122 
1123                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
1124 
1125                 if(0 != edge_idx)
1126                 {
1127                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1128                 }
1129             }
1130 
1131             pu1_src += src_strd;
1132         }
1133 
1134         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp;
1135         pu1_src[(pu1_avail[3] ?  (-src_strd) : 0)] = u1_pos_0_ht_tmp;
1136     }
1137 
1138     if(0 == pu1_avail[2])
1139         ht++;
1140     if(0 == pu1_avail[3])
1141         ht++;
1142     *pu1_src_top_left = u1_src_top_left_tmp;
1143     for(row = 0; row < ht; row++)
1144     {
1145         pu1_src_left[row] = au1_src_left_tmp[row];
1146     }
1147     for(col = 0; col < wd; col++)
1148     {
1149         pu1_src_top[col] = au1_src_top_tmp[col];
1150     }
1151 
1152 }
1153 
1154 
1155 
1156 
ihevc_sao_edge_offset_class3_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)1157 void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src,
1158                                          WORD32 src_strd,
1159                                          UWORD8 *pu1_src_left,
1160                                          UWORD8 *pu1_src_top,
1161                                          UWORD8 *pu1_src_top_left,
1162                                          UWORD8 *pu1_src_top_right,
1163                                          UWORD8 *pu1_src_bot_left,
1164                                          UWORD8 *pu1_avail,
1165                                          WORD8 *pi1_sao_offset_u,
1166                                          WORD8 *pi1_sao_offset_v,
1167                                          WORD32 wd,
1168                                          WORD32 ht)
1169 {
1170     WORD32 row, col;
1171     UWORD8 au1_mask[MAX_CTB_SIZE];
1172     UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
1173     UWORD8 au1_src_top_left_tmp[2];
1174     WORD8 au1_sign_up[MAX_CTB_SIZE];
1175     UWORD8 *pu1_src_left_cpy;
1176     WORD8 u1_sign_down;
1177     WORD32 bit_depth;
1178 
1179     UWORD8 u1_pos_wd_0_tmp_u;
1180     UWORD8 u1_pos_wd_0_tmp_v;
1181     UWORD8 u1_pos_0_ht_tmp_u;
1182     UWORD8 u1_pos_0_ht_tmp_v;
1183 
1184     bit_depth = BIT_DEPTH_CHROMA;
1185     pu1_src_left_cpy = pu1_src_left;
1186 
1187     /* Initialize the mask values */
1188     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1189 
1190     /* Update left, top and top-left arrays */
1191     au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
1192     au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
1193     for(row = 0; row < ht; row++)
1194     {
1195         au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
1196         au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
1197     }
1198     for(col = 0; col < wd; col++)
1199     {
1200         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1201     }
1202 
1203 
1204     /* If top-right is available, process separately */
1205     if(0 != pu1_avail[5])
1206     {
1207         WORD32 edge_idx;
1208 
1209         /* U */
1210         edge_idx = 2 + SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) +
1211                         SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]);
1212 
1213         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1214 
1215         if(0 != edge_idx)
1216         {
1217             u1_pos_wd_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1218         }
1219         else
1220         {
1221             u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1222         }
1223 
1224         /* V */
1225         edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) +
1226                         SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]);
1227 
1228         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1229 
1230         if(0 != edge_idx)
1231         {
1232             u1_pos_wd_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1233         }
1234         else
1235         {
1236             u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1237         }
1238     }
1239     else
1240     {
1241         u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1242         u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1243     }
1244 
1245     /* If bottom-left is available, process separately */
1246     if(0 != pu1_avail[6])
1247     {
1248         WORD32 edge_idx;
1249 
1250         /* U */
1251         edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) +
1252                         SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1253 
1254         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1255 
1256         if(0 != edge_idx)
1257         {
1258             u1_pos_0_ht_tmp_u = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1259         }
1260         else
1261         {
1262             u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1263         }
1264 
1265         /* V */
1266         edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) +
1267                         SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]);
1268 
1269         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1270 
1271         if(0 != edge_idx)
1272         {
1273             u1_pos_0_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd + 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1274         }
1275         else
1276         {
1277             u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1278         }
1279     }
1280     else
1281     {
1282         u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1283         u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1284     }
1285 
1286     /* If Left is not available */
1287     if(0 == pu1_avail[0])
1288     {
1289         au1_mask[0] = 0;
1290     }
1291 
1292     /* If Top is not available */
1293     if(0 == pu1_avail[2])
1294     {
1295         pu1_src += src_strd;
1296         ht--;
1297         pu1_src_left_cpy += 2;
1298         for(col = 0; col < wd - 2; col++)
1299         {
1300             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 2 - src_strd]);
1301         }
1302     }
1303     else
1304     {
1305         for(col = 0; col < wd - 2; col++)
1306         {
1307             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 2]);
1308         }
1309     }
1310 
1311     /* If Right is not available */
1312     if(0 == pu1_avail[1])
1313     {
1314         au1_mask[(wd - 1) >> 1] = 0;
1315     }
1316 
1317     /* If Bottom is not available */
1318     if(0 == pu1_avail[3])
1319     {
1320         ht--;
1321     }
1322 
1323     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1324     {
1325         for(row = 0; row < ht; row++)
1326         {
1327             au1_sign_up[wd - 2] = SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 + 2 - src_strd]);
1328             au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 2 - src_strd]);
1329             for(col = 0; col < wd; col++)
1330             {
1331                 WORD32 edge_idx;
1332                 WORD8 *pi1_sao_offset;
1333 
1334                 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
1335 
1336                 u1_sign_down = SIGN(pu1_src[col] - ((col < 2) ? pu1_src_left_cpy[2 * (row + 1) + col] :
1337                                                                 pu1_src[col - 2 + src_strd]));
1338                 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1339                 if(col > 1)
1340                     au1_sign_up[col - 2] = -u1_sign_down;
1341 
1342                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
1343 
1344                 if(0 != edge_idx)
1345                 {
1346                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1347                 }
1348             }
1349 
1350             pu1_src += src_strd;
1351         }
1352 
1353         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 2] = u1_pos_wd_0_tmp_u;
1354         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp_v;
1355         pu1_src[(pu1_avail[3] ?  (-src_strd) : 0)] = u1_pos_0_ht_tmp_u;
1356         pu1_src[(pu1_avail[3] ?  (-src_strd) : 0) + 1] = u1_pos_0_ht_tmp_v;
1357     }
1358 
1359     if(0 == pu1_avail[2])
1360         ht++;
1361     if(0 == pu1_avail[3])
1362         ht++;
1363     pu1_src_top_left[0] = au1_src_top_left_tmp[0];
1364     pu1_src_top_left[1] = au1_src_top_left_tmp[1];
1365     for(row = 0; row < 2 * ht; row++)
1366     {
1367         pu1_src_left[row] = au1_src_left_tmp[row];
1368     }
1369     for(col = 0; col < wd; col++)
1370     {
1371         pu1_src_top[col] = au1_src_top_tmp[col];
1372     }
1373 
1374 }
1375