1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_sao.c
22 *
23 * @brief
24 * Contains leaf level function definitions for sample adaptive offset process
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 * - ihevc_sao_band_offset_luma()
31 * - ihevc_sao_band_offset_chroma()
32 * - ihevc_sao_edge_offset_class0()
33 * - ihevc_sao_edge_offset_class0_chroma()
34 * - ihevc_sao_edge_offset_class1()
35 * - ihevc_sao_edge_offset_class1_chroma()
36 * - ihevc_sao_edge_offset_class2()
37 * - ihevc_sao_edge_offset_class2_chroma()
38 * - ihevc_sao_edge_offset_class3()
39 * - ihevc_sao_edge_offset_class3_chroma()
40 * @remarks
41 * None
42 *
43 *******************************************************************************
44 */
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <string.h>
48 #include "ihevc_typedefs.h"
49 #include "ihevc_macros.h"
50 #include "ihevc_platform_macros.h"
51 #include "ihevc_func_selector.h"
52 #include "ihevc_defs.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_sao.h"
55
56 #define NUM_BAND_TABLE 32
57
58 const WORD32 gi4_ihevc_table_edge_idx[5] = { 1, 2, 0, 3, 4 };
59 /**
60 * au4_avail is an array of flags - one for each neighboring block specifying if the block is available
61 * au4_avail[0] - left
62 * au4_avail[1] - right
63 * au4_avail[2] - top
64 * au4_avail[3] - bottom
65 * au4_avail[4] - top-left
66 * au4_avail[5] - top-right
67 * au4_avail[6] - bottom-left
68 * au4_avail[7] - bottom-right
69 */
70
71
ihevc_sao_band_offset_luma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,WORD32 sao_band_pos,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)72 void ihevc_sao_band_offset_luma(UWORD8 *pu1_src,
73 WORD32 src_strd,
74 UWORD8 *pu1_src_left,
75 UWORD8 *pu1_src_top,
76 UWORD8 *pu1_src_top_left,
77 WORD32 sao_band_pos,
78 WORD8 *pi1_sao_offset,
79 WORD32 wd,
80 WORD32 ht)
81 {
82 WORD32 band_shift;
83 WORD32 band_table[NUM_BAND_TABLE];
84 WORD32 i;
85 WORD32 row, col;
86
87 /* Updating left and top and top-left */
88 for(row = 0; row < ht; row++)
89 {
90 pu1_src_left[row] = pu1_src[row * src_strd + (wd - 1)];
91 }
92 pu1_src_top_left[0] = pu1_src_top[wd - 1];
93 for(col = 0; col < wd; col++)
94 {
95 pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
96 }
97
98 band_shift = BIT_DEPTH_LUMA - 5;
99 for(i = 0; i < NUM_BAND_TABLE; i++)
100 {
101 band_table[i] = 0;
102 }
103 for(i = 0; i < 4; i++)
104 {
105 band_table[(i + sao_band_pos) & 31] = i + 1;
106 }
107
108 for(row = 0; row < ht; row++)
109 {
110 for(col = 0; col < wd; col++)
111 {
112 WORD32 band_idx;
113
114 band_idx = band_table[pu1_src[col] >> band_shift];
115 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
116 }
117 pu1_src += src_strd;
118 }
119 }
120
121
122
123 /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_band_offset_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,WORD32 sao_band_pos_u,WORD32 sao_band_pos_v,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)124 void ihevc_sao_band_offset_chroma(UWORD8 *pu1_src,
125 WORD32 src_strd,
126 UWORD8 *pu1_src_left,
127 UWORD8 *pu1_src_top,
128 UWORD8 *pu1_src_top_left,
129 WORD32 sao_band_pos_u,
130 WORD32 sao_band_pos_v,
131 WORD8 *pi1_sao_offset_u,
132 WORD8 *pi1_sao_offset_v,
133 WORD32 wd,
134 WORD32 ht)
135 {
136 WORD32 band_shift;
137 WORD32 band_table_u[NUM_BAND_TABLE];
138 WORD32 band_table_v[NUM_BAND_TABLE];
139 WORD32 i;
140 WORD32 row, col;
141
142 /* Updating left and top and top-left */
143 for(row = 0; row < ht; row++)
144 {
145 pu1_src_left[2 * row] = pu1_src[row * src_strd + (wd - 2)];
146 pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + (wd - 1)];
147 }
148 pu1_src_top_left[0] = pu1_src_top[wd - 2];
149 pu1_src_top_left[1] = pu1_src_top[wd - 1];
150 for(col = 0; col < wd; col++)
151 {
152 pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
153 }
154
155
156 band_shift = BIT_DEPTH_CHROMA - 5;
157 for(i = 0; i < NUM_BAND_TABLE; i++)
158 {
159 band_table_u[i] = 0;
160 band_table_v[i] = 0;
161 }
162 for(i = 0; i < 4; i++)
163 {
164 band_table_u[(i + sao_band_pos_u) & 31] = i + 1;
165 band_table_v[(i + sao_band_pos_v) & 31] = i + 1;
166 }
167
168 for(row = 0; row < ht; row++)
169 {
170 for(col = 0; col < wd; col++)
171 {
172 WORD32 band_idx;
173 WORD8 *pi1_sao_offset;
174
175 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
176 band_idx = (0 == col % 2) ? band_table_u[pu1_src[col] >> band_shift] : band_table_v[pu1_src[col] >> band_shift];
177 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
178 }
179 pu1_src += src_strd;
180 }
181 }
182
183
184
185 /* Horizontal filtering */
ihevc_sao_edge_offset_class0(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)186 void ihevc_sao_edge_offset_class0(UWORD8 *pu1_src,
187 WORD32 src_strd,
188 UWORD8 *pu1_src_left,
189 UWORD8 *pu1_src_top,
190 UWORD8 *pu1_src_top_left,
191 UWORD8 *pu1_src_top_right,
192 UWORD8 *pu1_src_bot_left,
193 UWORD8 *pu1_avail,
194 WORD8 *pi1_sao_offset,
195 WORD32 wd,
196 WORD32 ht)
197 {
198 WORD32 row, col;
199 UWORD8 au1_mask[MAX_CTB_SIZE];
200 UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
201 WORD8 u1_sign_left, u1_sign_right;
202 WORD32 bit_depth;
203 UNUSED(pu1_src_top_right);
204 UNUSED(pu1_src_bot_left);
205 bit_depth = BIT_DEPTH_LUMA;
206
207 /* Initialize the mask values */
208 memset(au1_mask, 0xFF, MAX_CTB_SIZE);
209
210 /* Update top and top-left arrays */
211 *pu1_src_top_left = pu1_src_top[wd - 1];
212 for(row = 0; row < ht; row++)
213 {
214 au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
215 }
216 for(col = 0; col < wd; col++)
217 {
218 pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
219 }
220
221 /* Update masks based on the availability flags */
222 if(0 == pu1_avail[0])
223 {
224 au1_mask[0] = 0;
225 }
226 if(0 == pu1_avail[1])
227 {
228 au1_mask[wd - 1] = 0;
229 }
230
231 /* Processing is done on the intermediate buffer and the output is written to the source buffer */
232 {
233 for(row = 0; row < ht; row++)
234 {
235 u1_sign_left = SIGN(pu1_src[0] - pu1_src_left[row]);
236 for(col = 0; col < wd; col++)
237 {
238 WORD32 edge_idx;
239
240 u1_sign_right = SIGN(pu1_src[col] - pu1_src[col + 1]);
241 edge_idx = 2 + u1_sign_left + u1_sign_right;
242 u1_sign_left = -u1_sign_right;
243
244 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
245
246 if(0 != edge_idx)
247 {
248 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
249 }
250 }
251
252 pu1_src += src_strd;
253 }
254 }
255
256 /* Update left array */
257 for(row = 0; row < ht; row++)
258 {
259 pu1_src_left[row] = au1_src_left_tmp[row];
260 }
261
262 }
263
264
265
266
267 /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_edge_offset_class0_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)268 void ihevc_sao_edge_offset_class0_chroma(UWORD8 *pu1_src,
269 WORD32 src_strd,
270 UWORD8 *pu1_src_left,
271 UWORD8 *pu1_src_top,
272 UWORD8 *pu1_src_top_left,
273 UWORD8 *pu1_src_top_right,
274 UWORD8 *pu1_src_bot_left,
275 UWORD8 *pu1_avail,
276 WORD8 *pi1_sao_offset_u,
277 WORD8 *pi1_sao_offset_v,
278 WORD32 wd,
279 WORD32 ht)
280 {
281 WORD32 row, col;
282 UWORD8 au1_mask[MAX_CTB_SIZE];
283 UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE];
284 WORD8 u1_sign_left_u, u1_sign_right_u;
285 WORD8 u1_sign_left_v, u1_sign_right_v;
286 WORD32 bit_depth;
287 UNUSED(pu1_src_top_right);
288 UNUSED(pu1_src_bot_left);
289 bit_depth = BIT_DEPTH_CHROMA;
290
291 /* Initialize the mask values */
292 memset(au1_mask, 0xFF, MAX_CTB_SIZE);
293
294 /* Update left, top and top-left arrays */
295 pu1_src_top_left[0] = pu1_src_top[wd - 2];
296 pu1_src_top_left[1] = pu1_src_top[wd - 1];
297 for(row = 0; row < ht; row++)
298 {
299 au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
300 au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
301 }
302 for(col = 0; col < wd; col++)
303 {
304 pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
305 }
306
307 /* Update masks based on the availability flags */
308 if(0 == pu1_avail[0])
309 {
310 au1_mask[0] = 0;
311 }
312 if(0 == pu1_avail[1])
313 {
314 au1_mask[(wd - 1) >> 1] = 0;
315 }
316
317 /* Processing is done on the intermediate buffer and the output is written to the source buffer */
318 {
319 for(row = 0; row < ht; row++)
320 {
321 u1_sign_left_u = SIGN(pu1_src[0] - pu1_src_left[2 * row]);
322 u1_sign_left_v = SIGN(pu1_src[1] - pu1_src_left[2 * row + 1]);
323 for(col = 0; col < wd; col++)
324 {
325 WORD32 edge_idx;
326 WORD8 *pi1_sao_offset;
327
328 if(0 == col % 2)
329 {
330 pi1_sao_offset = pi1_sao_offset_u;
331 u1_sign_right_u = SIGN(pu1_src[col] - pu1_src[col + 2]);
332 edge_idx = 2 + u1_sign_left_u + u1_sign_right_u;
333 u1_sign_left_u = -u1_sign_right_u;
334 }
335 else
336 {
337 pi1_sao_offset = pi1_sao_offset_v;
338 u1_sign_right_v = SIGN(pu1_src[col] - pu1_src[col + 2]);
339 edge_idx = 2 + u1_sign_left_v + u1_sign_right_v;
340 u1_sign_left_v = -u1_sign_right_v;
341 }
342
343 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
344
345 if(0 != edge_idx)
346 {
347 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
348 }
349 }
350
351 pu1_src += src_strd;
352 }
353 }
354
355 for(row = 0; row < 2 * ht; row++)
356 {
357 pu1_src_left[row] = au1_src_left_tmp[row];
358 }
359
360 }
361
362
363
364 /* Vertical filtering */
ihevc_sao_edge_offset_class1(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)365 void ihevc_sao_edge_offset_class1(UWORD8 *pu1_src,
366 WORD32 src_strd,
367 UWORD8 *pu1_src_left,
368 UWORD8 *pu1_src_top,
369 UWORD8 *pu1_src_top_left,
370 UWORD8 *pu1_src_top_right,
371 UWORD8 *pu1_src_bot_left,
372 UWORD8 *pu1_avail,
373 WORD8 *pi1_sao_offset,
374 WORD32 wd,
375 WORD32 ht)
376 {
377 WORD32 row, col;
378 UWORD8 au1_mask[MAX_CTB_SIZE];
379 UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
380 WORD8 au1_sign_up[MAX_CTB_SIZE];
381 WORD8 u1_sign_down;
382 WORD32 bit_depth;
383 UNUSED(pu1_src_top_right);
384 UNUSED(pu1_src_bot_left);
385
386 bit_depth = BIT_DEPTH_LUMA;
387
388 /* Initialize the mask values */
389 memset(au1_mask, 0xFF, MAX_CTB_SIZE);
390
391 /* Update left, top and top-left arrays */
392 *pu1_src_top_left = pu1_src_top[wd - 1];
393 for(row = 0; row < ht; row++)
394 {
395 pu1_src_left[row] = pu1_src[row * src_strd + wd - 1];
396 }
397 for(col = 0; col < wd; col++)
398 {
399 au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
400 }
401
402 /* Update height and source pointers based on the availability flags */
403 if(0 == pu1_avail[2])
404 {
405 pu1_src += src_strd;
406 ht--;
407 for(col = 0; col < wd; col++)
408 {
409 au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
410 }
411 }
412 else
413 {
414 for(col = 0; col < wd; col++)
415 {
416 au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
417 }
418 }
419 if(0 == pu1_avail[3])
420 {
421 ht--;
422 }
423
424 /* Processing is done on the intermediate buffer and the output is written to the source buffer */
425 {
426 for(row = 0; row < ht; row++)
427 {
428 for(col = 0; col < wd; col++)
429 {
430 WORD32 edge_idx;
431
432 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
433 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
434 au1_sign_up[col] = -u1_sign_down;
435
436 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
437
438 if(0 != edge_idx)
439 {
440 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
441 }
442 }
443
444 pu1_src += src_strd;
445 }
446 }
447
448 for(col = 0; col < wd; col++)
449 {
450 pu1_src_top[col] = au1_src_top_tmp[col];
451 }
452
453 }
454
455
456
457 /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_edge_offset_class1_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)458 void ihevc_sao_edge_offset_class1_chroma(UWORD8 *pu1_src,
459 WORD32 src_strd,
460 UWORD8 *pu1_src_left,
461 UWORD8 *pu1_src_top,
462 UWORD8 *pu1_src_top_left,
463 UWORD8 *pu1_src_top_right,
464 UWORD8 *pu1_src_bot_left,
465 UWORD8 *pu1_avail,
466 WORD8 *pi1_sao_offset_u,
467 WORD8 *pi1_sao_offset_v,
468 WORD32 wd,
469 WORD32 ht)
470 {
471 WORD32 row, col;
472 UWORD8 au1_mask[MAX_CTB_SIZE];
473 UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
474 WORD8 au1_sign_up[MAX_CTB_SIZE];
475 WORD8 u1_sign_down;
476 WORD32 bit_depth;
477 UNUSED(pu1_src_top_right);
478 UNUSED(pu1_src_bot_left);
479
480 bit_depth = BIT_DEPTH_CHROMA;
481
482 /* Initialize the mask values */
483 memset(au1_mask, 0xFF, MAX_CTB_SIZE);
484
485 /* Update left, top and top-left arrays */
486 pu1_src_top_left[0] = pu1_src_top[wd - 2];
487 pu1_src_top_left[1] = pu1_src_top[wd - 1];
488 for(row = 0; row < ht; row++)
489 {
490 pu1_src_left[2 * row] = pu1_src[row * src_strd + wd - 2];
491 pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
492 }
493 for(col = 0; col < wd; col++)
494 {
495 au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
496 }
497
498 /* Update height and source pointers based on the availability flags */
499 if(0 == pu1_avail[2])
500 {
501 pu1_src += src_strd;
502 ht--;
503 for(col = 0; col < wd; col++)
504 {
505 au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
506 }
507 }
508 else
509 {
510 for(col = 0; col < wd; col++)
511 {
512 au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
513 }
514 }
515 if(0 == pu1_avail[3])
516 {
517 ht--;
518 }
519
520 /* Processing is done on the intermediate buffer and the output is written to the source buffer */
521 {
522 for(row = 0; row < ht; row++)
523 {
524 for(col = 0; col < wd; col++)
525 {
526 WORD32 edge_idx;
527 WORD8 *pi1_sao_offset;
528
529 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
530
531 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
532 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
533 au1_sign_up[col] = -u1_sign_down;
534
535 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
536
537 if(0 != edge_idx)
538 {
539 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
540 }
541 }
542
543 pu1_src += src_strd;
544 }
545 }
546
547 for(col = 0; col < wd; col++)
548 {
549 pu1_src_top[col] = au1_src_top_tmp[col];
550 }
551
552 }
553
554
555
556 /* 135 degree filtering */
ihevc_sao_edge_offset_class2(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)557 void ihevc_sao_edge_offset_class2(UWORD8 *pu1_src,
558 WORD32 src_strd,
559 UWORD8 *pu1_src_left,
560 UWORD8 *pu1_src_top,
561 UWORD8 *pu1_src_top_left,
562 UWORD8 *pu1_src_top_right,
563 UWORD8 *pu1_src_bot_left,
564 UWORD8 *pu1_avail,
565 WORD8 *pi1_sao_offset,
566 WORD32 wd,
567 WORD32 ht)
568 {
569 WORD32 row, col;
570 UWORD8 au1_mask[MAX_CTB_SIZE];
571 UWORD8 au1_src_left_tmp[MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
572 UWORD8 u1_src_top_left_tmp;
573 WORD8 au1_sign_up[MAX_CTB_SIZE + 1], au1_sign_up_tmp[MAX_CTB_SIZE + 1];
574 WORD8 u1_sign_down;
575 WORD8 *pu1_sign_up;
576 WORD8 *pu1_sign_up_tmp;
577 UWORD8 *pu1_src_left_cpy;
578
579 WORD32 bit_depth;
580 UWORD8 u1_pos_0_0_tmp;
581 UWORD8 u1_pos_wd_ht_tmp;
582 UNUSED(pu1_src_top_right);
583 UNUSED(pu1_src_bot_left);
584
585 bit_depth = BIT_DEPTH_LUMA;
586 pu1_sign_up = au1_sign_up;
587 pu1_sign_up_tmp = au1_sign_up_tmp;
588 pu1_src_left_cpy = pu1_src_left;
589
590 /* Initialize the mask values */
591 memset(au1_mask, 0xFF, MAX_CTB_SIZE);
592
593 /* Update left, top and top-left arrays */
594 u1_src_top_left_tmp = pu1_src_top[wd - 1];
595 for(row = 0; row < ht; row++)
596 {
597 au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
598 }
599 for(col = 0; col < wd; col++)
600 {
601 au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
602 }
603
604
605 /* If top-left is available, process separately */
606 if(0 != pu1_avail[4])
607 {
608 WORD32 edge_idx;
609
610 edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
611 SIGN(pu1_src[0] - pu1_src[1 + src_strd]);
612
613 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
614
615 if(0 != edge_idx)
616 {
617 u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
618 }
619 else
620 {
621 u1_pos_0_0_tmp = pu1_src[0];
622 }
623 }
624 else
625 {
626 u1_pos_0_0_tmp = pu1_src[0];
627 }
628
629 /* If bottom-right is available, process separately */
630 if(0 != pu1_avail[7])
631 {
632 WORD32 edge_idx;
633
634 edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 1 - src_strd]) +
635 SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 1 + src_strd]);
636
637 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
638
639 if(0 != edge_idx)
640 {
641 u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
642 }
643 else
644 {
645 u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
646 }
647 }
648 else
649 {
650 u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
651 }
652
653 /* If Left is not available */
654 if(0 == pu1_avail[0])
655 {
656 au1_mask[0] = 0;
657 }
658
659 /* If Top is not available */
660 if(0 == pu1_avail[2])
661 {
662 pu1_src += src_strd;
663 ht--;
664 pu1_src_left_cpy += 1;
665 for(col = 1; col < wd; col++)
666 {
667 pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 1 - src_strd]);
668 }
669 }
670 else
671 {
672 for(col = 1; col < wd; col++)
673 {
674 pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 1]);
675 }
676 }
677
678 /* If Right is not available */
679 if(0 == pu1_avail[1])
680 {
681 au1_mask[wd - 1] = 0;
682 }
683
684 /* If Bottom is not available */
685 if(0 == pu1_avail[3])
686 {
687 ht--;
688 }
689
690 /* Processing is done on the intermediate buffer and the output is written to the source buffer */
691 {
692 for(row = 0; row < ht; row++)
693 {
694 pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[row - 1]);
695 for(col = 0; col < wd; col++)
696 {
697 WORD32 edge_idx;
698
699 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 1 + src_strd]);
700 edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
701 pu1_sign_up_tmp[col + 1] = -u1_sign_down;
702
703 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
704
705 if(0 != edge_idx)
706 {
707 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
708 }
709 }
710
711 /* Swapping pu1_sign_up_tmp and pu1_sign_up */
712 {
713 WORD8 *pu1_swap_tmp = pu1_sign_up;
714 pu1_sign_up = pu1_sign_up_tmp;
715 pu1_sign_up_tmp = pu1_swap_tmp;
716 }
717
718 pu1_src += src_strd;
719 }
720
721 pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp;
722 pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp;
723 }
724
725 if(0 == pu1_avail[2])
726 ht++;
727 if(0 == pu1_avail[3])
728 ht++;
729 *pu1_src_top_left = u1_src_top_left_tmp;
730 for(row = 0; row < ht; row++)
731 {
732 pu1_src_left[row] = au1_src_left_tmp[row];
733 }
734 for(col = 0; col < wd; col++)
735 {
736 pu1_src_top[col] = au1_src_top_tmp[col];
737 }
738
739 }
740
741
742
743
744 /* 135 degree filtering */
ihevc_sao_edge_offset_class2_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)745 void ihevc_sao_edge_offset_class2_chroma(UWORD8 *pu1_src,
746 WORD32 src_strd,
747 UWORD8 *pu1_src_left,
748 UWORD8 *pu1_src_top,
749 UWORD8 *pu1_src_top_left,
750 UWORD8 *pu1_src_top_right,
751 UWORD8 *pu1_src_bot_left,
752 UWORD8 *pu1_avail,
753 WORD8 *pi1_sao_offset_u,
754 WORD8 *pi1_sao_offset_v,
755 WORD32 wd,
756 WORD32 ht)
757 {
758 WORD32 row, col;
759 UWORD8 au1_mask[MAX_CTB_SIZE];
760 UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
761 UWORD8 au1_src_top_left_tmp[2];
762 WORD8 au1_sign_up[MAX_CTB_SIZE + 2], au1_sign_up_tmp[MAX_CTB_SIZE + 2];
763 WORD8 u1_sign_down;
764 WORD8 *pu1_sign_up;
765 WORD8 *pu1_sign_up_tmp;
766 UWORD8 *pu1_src_left_cpy;
767
768 WORD32 bit_depth;
769
770 UWORD8 u1_pos_0_0_tmp_u;
771 UWORD8 u1_pos_0_0_tmp_v;
772 UWORD8 u1_pos_wd_ht_tmp_u;
773 UWORD8 u1_pos_wd_ht_tmp_v;
774 UNUSED(pu1_src_top_right);
775 UNUSED(pu1_src_bot_left);
776
777
778 bit_depth = BIT_DEPTH_CHROMA;
779 pu1_sign_up = au1_sign_up;
780 pu1_sign_up_tmp = au1_sign_up_tmp;
781 pu1_src_left_cpy = pu1_src_left;
782
783 /* Initialize the mask values */
784 memset(au1_mask, 0xFF, MAX_CTB_SIZE);
785
786 /* Update left, top and top-left arrays */
787 au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
788 au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
789 for(row = 0; row < ht; row++)
790 {
791 au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
792 au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
793 }
794 for(col = 0; col < wd; col++)
795 {
796 au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
797 }
798
799
800 /* If top-left is available, process separately */
801 if(0 != pu1_avail[4])
802 {
803 WORD32 edge_idx;
804
805 /* U */
806 edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
807 SIGN(pu1_src[0] - pu1_src[2 + src_strd]);
808
809 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
810
811 if(0 != edge_idx)
812 {
813 u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
814 }
815 else
816 {
817 u1_pos_0_0_tmp_u = pu1_src[0];
818 }
819
820 /* V */
821 edge_idx = 2 + SIGN(pu1_src[1] - pu1_src_top_left[1]) +
822 SIGN(pu1_src[1] - pu1_src[1 + 2 + src_strd]);
823
824 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
825
826 if(0 != edge_idx)
827 {
828 u1_pos_0_0_tmp_v = CLIP3(pu1_src[1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
829 }
830 else
831 {
832 u1_pos_0_0_tmp_v = pu1_src[1];
833 }
834 }
835 else
836 {
837 u1_pos_0_0_tmp_u = pu1_src[0];
838 u1_pos_0_0_tmp_v = pu1_src[1];
839 }
840
841 /* If bottom-right is available, process separately */
842 if(0 != pu1_avail[7])
843 {
844 WORD32 edge_idx;
845
846 /* U */
847 edge_idx = 2 + SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd]) +
848 SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd]);
849
850 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
851
852 if(0 != edge_idx)
853 {
854 u1_pos_wd_ht_tmp_u = CLIP3(pu1_src[wd - 2 + (ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
855 }
856 else
857 {
858 u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
859 }
860
861 /* V */
862 edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd]) +
863 SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd]);
864
865 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
866
867 if(0 != edge_idx)
868 {
869 u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
870 }
871 else
872 {
873 u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
874 }
875 }
876 else
877 {
878 u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
879 u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
880 }
881
882 /* If Left is not available */
883 if(0 == pu1_avail[0])
884 {
885 au1_mask[0] = 0;
886 }
887
888 /* If Top is not available */
889 if(0 == pu1_avail[2])
890 {
891 pu1_src += src_strd;
892 pu1_src_left_cpy += 2;
893 ht--;
894 for(col = 2; col < wd; col++)
895 {
896 pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 2 - src_strd]);
897 }
898 }
899 else
900 {
901 for(col = 2; col < wd; col++)
902 {
903 pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 2]);
904 }
905 }
906
907 /* If Right is not available */
908 if(0 == pu1_avail[1])
909 {
910 au1_mask[(wd - 1) >> 1] = 0;
911 }
912
913 /* If Bottom is not available */
914 if(0 == pu1_avail[3])
915 {
916 ht--;
917 }
918
919 /* Processing is done on the intermediate buffer and the output is written to the source buffer */
920 {
921 for(row = 0; row < ht; row++)
922 {
923 pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[2 * (row - 1)]);
924 pu1_sign_up[1] = SIGN(pu1_src[1] - pu1_src_left_cpy[2 * (row - 1) + 1]);
925 for(col = 0; col < wd; col++)
926 {
927 WORD32 edge_idx;
928 WORD8 *pi1_sao_offset;
929
930 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
931
932 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 2 + src_strd]);
933 edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
934 pu1_sign_up_tmp[col + 2] = -u1_sign_down;
935
936 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
937
938 if(0 != edge_idx)
939 {
940 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
941 }
942 }
943
944 /* Swapping pu1_sign_up_tmp and pu1_sign_up */
945 {
946 WORD8 *pu1_swap_tmp = pu1_sign_up;
947 pu1_sign_up = pu1_sign_up_tmp;
948 pu1_sign_up_tmp = pu1_swap_tmp;
949 }
950
951 pu1_src += src_strd;
952 }
953
954 pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp_u;
955 pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + 1] = u1_pos_0_0_tmp_v;
956 pu1_src[(pu1_avail[3] ? wd - 2 - src_strd : wd - 2)] = u1_pos_wd_ht_tmp_u;
957 pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp_v;
958 }
959
960 if(0 == pu1_avail[2])
961 ht++;
962 if(0 == pu1_avail[3])
963 ht++;
964 pu1_src_top_left[0] = au1_src_top_left_tmp[0];
965 pu1_src_top_left[1] = au1_src_top_left_tmp[1];
966 for(row = 0; row < 2 * ht; row++)
967 {
968 pu1_src_left[row] = au1_src_left_tmp[row];
969 }
970 for(col = 0; col < wd; col++)
971 {
972 pu1_src_top[col] = au1_src_top_tmp[col];
973 }
974
975 }
976
977
978
979
980 /* 45 degree filtering */
ihevc_sao_edge_offset_class3(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)981 void ihevc_sao_edge_offset_class3(UWORD8 *pu1_src,
982 WORD32 src_strd,
983 UWORD8 *pu1_src_left,
984 UWORD8 *pu1_src_top,
985 UWORD8 *pu1_src_top_left,
986 UWORD8 *pu1_src_top_right,
987 UWORD8 *pu1_src_bot_left,
988 UWORD8 *pu1_avail,
989 WORD8 *pi1_sao_offset,
990 WORD32 wd,
991 WORD32 ht)
992 {
993 WORD32 row, col;
994 UWORD8 au1_mask[MAX_CTB_SIZE];
995 UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
996 UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
997 UWORD8 u1_src_top_left_tmp;
998 WORD8 au1_sign_up[MAX_CTB_SIZE];
999 UWORD8 *pu1_src_left_cpy;
1000 WORD8 u1_sign_down;
1001 WORD32 bit_depth;
1002
1003 UWORD8 u1_pos_0_ht_tmp;
1004 UWORD8 u1_pos_wd_0_tmp;
1005
1006 bit_depth = BIT_DEPTH_LUMA;
1007 pu1_src_left_cpy = pu1_src_left;
1008
1009 /* Initialize the mask values */
1010 memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1011
1012 /* Update left, top and top-left arrays */
1013 u1_src_top_left_tmp = pu1_src_top[wd - 1];
1014 for(row = 0; row < ht; row++)
1015 {
1016 au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
1017 }
1018 for(col = 0; col < wd; col++)
1019 {
1020 au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1021 }
1022
1023 /* If top-right is available, process separately */
1024 if(0 != pu1_avail[5])
1025 {
1026 WORD32 edge_idx;
1027
1028 edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) +
1029 SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]);
1030
1031 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1032
1033 if(0 != edge_idx)
1034 {
1035 u1_pos_wd_0_tmp = CLIP3(pu1_src[wd - 1] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1036 }
1037 else
1038 {
1039 u1_pos_wd_0_tmp = pu1_src[wd - 1];
1040 }
1041 }
1042 else
1043 {
1044 u1_pos_wd_0_tmp = pu1_src[wd - 1];
1045 }
1046
1047 /* If bottom-left is available, process separately */
1048 if(0 != pu1_avail[6])
1049 {
1050 WORD32 edge_idx;
1051
1052 edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd]) +
1053 SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1054
1055 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1056
1057 if(0 != edge_idx)
1058 {
1059 u1_pos_0_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1060 }
1061 else
1062 {
1063 u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1064 }
1065 }
1066 else
1067 {
1068 u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1069 }
1070
1071 /* If Left is not available */
1072 if(0 == pu1_avail[0])
1073 {
1074 au1_mask[0] = 0;
1075 }
1076
1077 /* If Top is not available */
1078 if(0 == pu1_avail[2])
1079 {
1080 pu1_src += src_strd;
1081 ht--;
1082 pu1_src_left_cpy += 1;
1083 for(col = 0; col < wd - 1; col++)
1084 {
1085 au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 1 - src_strd]);
1086 }
1087 }
1088 else
1089 {
1090 for(col = 0; col < wd - 1; col++)
1091 {
1092 au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 1]);
1093 }
1094 }
1095
1096 /* If Right is not available */
1097 if(0 == pu1_avail[1])
1098 {
1099 au1_mask[wd - 1] = 0;
1100 }
1101
1102 /* If Bottom is not available */
1103 if(0 == pu1_avail[3])
1104 {
1105 ht--;
1106 }
1107
1108 /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1109 {
1110 for(row = 0; row < ht; row++)
1111 {
1112 au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 1 - src_strd]);
1113 for(col = 0; col < wd; col++)
1114 {
1115 WORD32 edge_idx;
1116
1117 u1_sign_down = SIGN(pu1_src[col] - ((col == 0) ? pu1_src_left_cpy[row + 1] :
1118 pu1_src[col - 1 + src_strd]));
1119 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1120 if(col > 0)
1121 au1_sign_up[col - 1] = -u1_sign_down;
1122
1123 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
1124
1125 if(0 != edge_idx)
1126 {
1127 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1128 }
1129 }
1130
1131 pu1_src += src_strd;
1132 }
1133
1134 pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp;
1135 pu1_src[(pu1_avail[3] ? (-src_strd) : 0)] = u1_pos_0_ht_tmp;
1136 }
1137
1138 if(0 == pu1_avail[2])
1139 ht++;
1140 if(0 == pu1_avail[3])
1141 ht++;
1142 *pu1_src_top_left = u1_src_top_left_tmp;
1143 for(row = 0; row < ht; row++)
1144 {
1145 pu1_src_left[row] = au1_src_left_tmp[row];
1146 }
1147 for(col = 0; col < wd; col++)
1148 {
1149 pu1_src_top[col] = au1_src_top_tmp[col];
1150 }
1151
1152 }
1153
1154
1155
1156
ihevc_sao_edge_offset_class3_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)1157 void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src,
1158 WORD32 src_strd,
1159 UWORD8 *pu1_src_left,
1160 UWORD8 *pu1_src_top,
1161 UWORD8 *pu1_src_top_left,
1162 UWORD8 *pu1_src_top_right,
1163 UWORD8 *pu1_src_bot_left,
1164 UWORD8 *pu1_avail,
1165 WORD8 *pi1_sao_offset_u,
1166 WORD8 *pi1_sao_offset_v,
1167 WORD32 wd,
1168 WORD32 ht)
1169 {
1170 WORD32 row, col;
1171 UWORD8 au1_mask[MAX_CTB_SIZE];
1172 UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
1173 UWORD8 au1_src_top_left_tmp[2];
1174 WORD8 au1_sign_up[MAX_CTB_SIZE];
1175 UWORD8 *pu1_src_left_cpy;
1176 WORD8 u1_sign_down;
1177 WORD32 bit_depth;
1178
1179 UWORD8 u1_pos_wd_0_tmp_u;
1180 UWORD8 u1_pos_wd_0_tmp_v;
1181 UWORD8 u1_pos_0_ht_tmp_u;
1182 UWORD8 u1_pos_0_ht_tmp_v;
1183
1184 bit_depth = BIT_DEPTH_CHROMA;
1185 pu1_src_left_cpy = pu1_src_left;
1186
1187 /* Initialize the mask values */
1188 memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1189
1190 /* Update left, top and top-left arrays */
1191 au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
1192 au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
1193 for(row = 0; row < ht; row++)
1194 {
1195 au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
1196 au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
1197 }
1198 for(col = 0; col < wd; col++)
1199 {
1200 au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1201 }
1202
1203
1204 /* If top-right is available, process separately */
1205 if(0 != pu1_avail[5])
1206 {
1207 WORD32 edge_idx;
1208
1209 /* U */
1210 edge_idx = 2 + SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) +
1211 SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]);
1212
1213 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1214
1215 if(0 != edge_idx)
1216 {
1217 u1_pos_wd_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1218 }
1219 else
1220 {
1221 u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1222 }
1223
1224 /* V */
1225 edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) +
1226 SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]);
1227
1228 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1229
1230 if(0 != edge_idx)
1231 {
1232 u1_pos_wd_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1233 }
1234 else
1235 {
1236 u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1237 }
1238 }
1239 else
1240 {
1241 u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1242 u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1243 }
1244
1245 /* If bottom-left is available, process separately */
1246 if(0 != pu1_avail[6])
1247 {
1248 WORD32 edge_idx;
1249
1250 /* U */
1251 edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) +
1252 SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1253
1254 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1255
1256 if(0 != edge_idx)
1257 {
1258 u1_pos_0_ht_tmp_u = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1259 }
1260 else
1261 {
1262 u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1263 }
1264
1265 /* V */
1266 edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) +
1267 SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]);
1268
1269 edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1270
1271 if(0 != edge_idx)
1272 {
1273 u1_pos_0_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd + 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1274 }
1275 else
1276 {
1277 u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1278 }
1279 }
1280 else
1281 {
1282 u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1283 u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1284 }
1285
1286 /* If Left is not available */
1287 if(0 == pu1_avail[0])
1288 {
1289 au1_mask[0] = 0;
1290 }
1291
1292 /* If Top is not available */
1293 if(0 == pu1_avail[2])
1294 {
1295 pu1_src += src_strd;
1296 ht--;
1297 pu1_src_left_cpy += 2;
1298 for(col = 0; col < wd - 2; col++)
1299 {
1300 au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 2 - src_strd]);
1301 }
1302 }
1303 else
1304 {
1305 for(col = 0; col < wd - 2; col++)
1306 {
1307 au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 2]);
1308 }
1309 }
1310
1311 /* If Right is not available */
1312 if(0 == pu1_avail[1])
1313 {
1314 au1_mask[(wd - 1) >> 1] = 0;
1315 }
1316
1317 /* If Bottom is not available */
1318 if(0 == pu1_avail[3])
1319 {
1320 ht--;
1321 }
1322
1323 /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1324 {
1325 for(row = 0; row < ht; row++)
1326 {
1327 au1_sign_up[wd - 2] = SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 + 2 - src_strd]);
1328 au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 2 - src_strd]);
1329 for(col = 0; col < wd; col++)
1330 {
1331 WORD32 edge_idx;
1332 WORD8 *pi1_sao_offset;
1333
1334 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
1335
1336 u1_sign_down = SIGN(pu1_src[col] - ((col < 2) ? pu1_src_left_cpy[2 * (row + 1) + col] :
1337 pu1_src[col - 2 + src_strd]));
1338 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1339 if(col > 1)
1340 au1_sign_up[col - 2] = -u1_sign_down;
1341
1342 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
1343
1344 if(0 != edge_idx)
1345 {
1346 pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1347 }
1348 }
1349
1350 pu1_src += src_strd;
1351 }
1352
1353 pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 2] = u1_pos_wd_0_tmp_u;
1354 pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp_v;
1355 pu1_src[(pu1_avail[3] ? (-src_strd) : 0)] = u1_pos_0_ht_tmp_u;
1356 pu1_src[(pu1_avail[3] ? (-src_strd) : 0) + 1] = u1_pos_0_ht_tmp_v;
1357 }
1358
1359 if(0 == pu1_avail[2])
1360 ht++;
1361 if(0 == pu1_avail[3])
1362 ht++;
1363 pu1_src_top_left[0] = au1_src_top_left_tmp[0];
1364 pu1_src_top_left[1] = au1_src_top_left_tmp[1];
1365 for(row = 0; row < 2 * ht; row++)
1366 {
1367 pu1_src_left[row] = au1_src_left_tmp[row];
1368 }
1369 for(col = 0; col < wd; col++)
1370 {
1371 pu1_src_top[col] = au1_src_top_tmp[col];
1372 }
1373
1374 }
1375