1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include <stdlib.h>
13 #include "loopfilter.h"
14 #include "onyxc_int.h"
15 
16 typedef unsigned char uc;
17 
vp8_signed_char_clamp(int t)18 static signed char vp8_signed_char_clamp(int t)
19 {
20     t = (t < -128 ? -128 : t);
21     t = (t > 127 ? 127 : t);
22     return (signed char) t;
23 }
24 
25 
26 /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
vp8_filter_mask(uc limit,uc blimit,uc p3,uc p2,uc p1,uc p0,uc q0,uc q1,uc q2,uc q3)27 static signed char vp8_filter_mask(uc limit, uc blimit,
28                             uc p3, uc p2, uc p1, uc p0,
29                             uc q0, uc q1, uc q2, uc q3)
30 {
31     signed char mask = 0;
32     mask |= (abs(p3 - p2) > limit);
33     mask |= (abs(p2 - p1) > limit);
34     mask |= (abs(p1 - p0) > limit);
35     mask |= (abs(q1 - q0) > limit);
36     mask |= (abs(q2 - q1) > limit);
37     mask |= (abs(q3 - q2) > limit);
38     mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > blimit);
39     return mask - 1;
40 }
41 
42 /* is there high variance internal edge ( 11111111 yes, 00000000 no) */
vp8_hevmask(uc thresh,uc p1,uc p0,uc q0,uc q1)43 static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
44 {
45     signed char hev = 0;
46     hev  |= (abs(p1 - p0) > thresh) * -1;
47     hev  |= (abs(q1 - q0) > thresh) * -1;
48     return hev;
49 }
50 
vp8_filter(signed char mask,uc hev,uc * op1,uc * op0,uc * oq0,uc * oq1)51 static void vp8_filter(signed char mask, uc hev, uc *op1,
52         uc *op0, uc *oq0, uc *oq1)
53 
54 {
55     signed char ps0, qs0;
56     signed char ps1, qs1;
57     signed char filter_value, Filter1, Filter2;
58     signed char u;
59 
60     ps1 = (signed char) * op1 ^ 0x80;
61     ps0 = (signed char) * op0 ^ 0x80;
62     qs0 = (signed char) * oq0 ^ 0x80;
63     qs1 = (signed char) * oq1 ^ 0x80;
64 
65     /* add outer taps if we have high edge variance */
66     filter_value = vp8_signed_char_clamp(ps1 - qs1);
67     filter_value &= hev;
68 
69     /* inner taps */
70     filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
71     filter_value &= mask;
72 
73     /* save bottom 3 bits so that we round one side +4 and the other +3
74      * if it equals 4 we'll set to adjust by -1 to account for the fact
75      * we'd round 3 the other way
76      */
77     Filter1 = vp8_signed_char_clamp(filter_value + 4);
78     Filter2 = vp8_signed_char_clamp(filter_value + 3);
79     Filter1 >>= 3;
80     Filter2 >>= 3;
81     u = vp8_signed_char_clamp(qs0 - Filter1);
82     *oq0 = u ^ 0x80;
83     u = vp8_signed_char_clamp(ps0 + Filter2);
84     *op0 = u ^ 0x80;
85     filter_value = Filter1;
86 
87     /* outer tap adjustments */
88     filter_value += 1;
89     filter_value >>= 1;
90     filter_value &= ~hev;
91 
92     u = vp8_signed_char_clamp(qs1 - filter_value);
93     *oq1 = u ^ 0x80;
94     u = vp8_signed_char_clamp(ps1 + filter_value);
95     *op1 = u ^ 0x80;
96 
97 }
vp8_loop_filter_horizontal_edge_c(unsigned char * s,int p,const unsigned char * blimit,const unsigned char * limit,const unsigned char * thresh,int count)98 void vp8_loop_filter_horizontal_edge_c
99 (
100     unsigned char *s,
101     int p, /* pitch */
102     const unsigned char *blimit,
103     const unsigned char *limit,
104     const unsigned char *thresh,
105     int count
106 )
107 {
108     int  hev = 0; /* high edge variance */
109     signed char mask = 0;
110     int i = 0;
111 
112     /* loop filter designed to work using chars so that we can make maximum use
113      * of 8 bit simd instructions.
114      */
115     do
116     {
117         mask = vp8_filter_mask(limit[0], blimit[0],
118                                s[-4*p], s[-3*p], s[-2*p], s[-1*p],
119                                s[0*p], s[1*p], s[2*p], s[3*p]);
120 
121         hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
122 
123         vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
124 
125         ++s;
126     }
127     while (++i < count * 8);
128 }
129 
vp8_loop_filter_vertical_edge_c(unsigned char * s,int p,const unsigned char * blimit,const unsigned char * limit,const unsigned char * thresh,int count)130 void vp8_loop_filter_vertical_edge_c
131 (
132     unsigned char *s,
133     int p,
134     const unsigned char *blimit,
135     const unsigned char *limit,
136     const unsigned char *thresh,
137     int count
138 )
139 {
140     int  hev = 0; /* high edge variance */
141     signed char mask = 0;
142     int i = 0;
143 
144     /* loop filter designed to work using chars so that we can make maximum use
145      * of 8 bit simd instructions.
146      */
147     do
148     {
149         mask = vp8_filter_mask(limit[0], blimit[0],
150                                s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
151 
152         hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
153 
154         vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
155 
156         s += p;
157     }
158     while (++i < count * 8);
159 }
160 
vp8_mbfilter(signed char mask,uc hev,uc * op2,uc * op1,uc * op0,uc * oq0,uc * oq1,uc * oq2)161 static void vp8_mbfilter(signed char mask, uc hev,
162                            uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
163 {
164     signed char s, u;
165     signed char filter_value, Filter1, Filter2;
166     signed char ps2 = (signed char) * op2 ^ 0x80;
167     signed char ps1 = (signed char) * op1 ^ 0x80;
168     signed char ps0 = (signed char) * op0 ^ 0x80;
169     signed char qs0 = (signed char) * oq0 ^ 0x80;
170     signed char qs1 = (signed char) * oq1 ^ 0x80;
171     signed char qs2 = (signed char) * oq2 ^ 0x80;
172 
173     /* add outer taps if we have high edge variance */
174     filter_value = vp8_signed_char_clamp(ps1 - qs1);
175     filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
176     filter_value &= mask;
177 
178     Filter2 = filter_value;
179     Filter2 &= hev;
180 
181     /* save bottom 3 bits so that we round one side +4 and the other +3 */
182     Filter1 = vp8_signed_char_clamp(Filter2 + 4);
183     Filter2 = vp8_signed_char_clamp(Filter2 + 3);
184     Filter1 >>= 3;
185     Filter2 >>= 3;
186     qs0 = vp8_signed_char_clamp(qs0 - Filter1);
187     ps0 = vp8_signed_char_clamp(ps0 + Filter2);
188 
189 
190     /* only apply wider filter if not high edge variance */
191     filter_value &= ~hev;
192     Filter2 = filter_value;
193 
194     /* roughly 3/7th difference across boundary */
195     u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
196     s = vp8_signed_char_clamp(qs0 - u);
197     *oq0 = s ^ 0x80;
198     s = vp8_signed_char_clamp(ps0 + u);
199     *op0 = s ^ 0x80;
200 
201     /* roughly 2/7th difference across boundary */
202     u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
203     s = vp8_signed_char_clamp(qs1 - u);
204     *oq1 = s ^ 0x80;
205     s = vp8_signed_char_clamp(ps1 + u);
206     *op1 = s ^ 0x80;
207 
208     /* roughly 1/7th difference across boundary */
209     u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
210     s = vp8_signed_char_clamp(qs2 - u);
211     *oq2 = s ^ 0x80;
212     s = vp8_signed_char_clamp(ps2 + u);
213     *op2 = s ^ 0x80;
214 }
215 
vp8_mbloop_filter_horizontal_edge_c(unsigned char * s,int p,const unsigned char * blimit,const unsigned char * limit,const unsigned char * thresh,int count)216 void vp8_mbloop_filter_horizontal_edge_c
217 (
218     unsigned char *s,
219     int p,
220     const unsigned char *blimit,
221     const unsigned char *limit,
222     const unsigned char *thresh,
223     int count
224 )
225 {
226     signed char hev = 0; /* high edge variance */
227     signed char mask = 0;
228     int i = 0;
229 
230     /* loop filter designed to work using chars so that we can make maximum use
231      * of 8 bit simd instructions.
232      */
233     do
234     {
235 
236         mask = vp8_filter_mask(limit[0], blimit[0],
237                                s[-4*p], s[-3*p], s[-2*p], s[-1*p],
238                                s[0*p], s[1*p], s[2*p], s[3*p]);
239 
240         hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
241 
242         vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
243 
244         ++s;
245     }
246     while (++i < count * 8);
247 
248 }
249 
250 
vp8_mbloop_filter_vertical_edge_c(unsigned char * s,int p,const unsigned char * blimit,const unsigned char * limit,const unsigned char * thresh,int count)251 void vp8_mbloop_filter_vertical_edge_c
252 (
253     unsigned char *s,
254     int p,
255     const unsigned char *blimit,
256     const unsigned char *limit,
257     const unsigned char *thresh,
258     int count
259 )
260 {
261     signed char hev = 0; /* high edge variance */
262     signed char mask = 0;
263     int i = 0;
264 
265     do
266     {
267 
268         mask = vp8_filter_mask(limit[0], blimit[0],
269                                s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
270 
271         hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
272 
273         vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
274 
275         s += p;
276     }
277     while (++i < count * 8);
278 
279 }
280 
281 /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
vp8_simple_filter_mask(uc blimit,uc p1,uc p0,uc q0,uc q1)282 static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
283 {
284 /* Why does this cause problems for win32?
285  * error C2143: syntax error : missing ';' before 'type'
286  *  (void) limit;
287  */
288     signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= blimit) * -1;
289     return mask;
290 }
291 
vp8_simple_filter(signed char mask,uc * op1,uc * op0,uc * oq0,uc * oq1)292 static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
293 {
294     signed char filter_value, Filter1, Filter2;
295     signed char p1 = (signed char) * op1 ^ 0x80;
296     signed char p0 = (signed char) * op0 ^ 0x80;
297     signed char q0 = (signed char) * oq0 ^ 0x80;
298     signed char q1 = (signed char) * oq1 ^ 0x80;
299     signed char u;
300 
301     filter_value = vp8_signed_char_clamp(p1 - q1);
302     filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
303     filter_value &= mask;
304 
305     /* save bottom 3 bits so that we round one side +4 and the other +3 */
306     Filter1 = vp8_signed_char_clamp(filter_value + 4);
307     Filter1 >>= 3;
308     u = vp8_signed_char_clamp(q0 - Filter1);
309     *oq0  = u ^ 0x80;
310 
311     Filter2 = vp8_signed_char_clamp(filter_value + 3);
312     Filter2 >>= 3;
313     u = vp8_signed_char_clamp(p0 + Filter2);
314     *op0 = u ^ 0x80;
315 }
316 
vp8_loop_filter_simple_horizontal_edge_c(unsigned char * s,int p,const unsigned char * blimit)317 void vp8_loop_filter_simple_horizontal_edge_c
318 (
319     unsigned char *s,
320     int p,
321     const unsigned char *blimit
322 )
323 {
324     signed char mask = 0;
325     int i = 0;
326 
327     do
328     {
329         mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
330         vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
331         ++s;
332     }
333     while (++i < 16);
334 }
335 
vp8_loop_filter_simple_vertical_edge_c(unsigned char * s,int p,const unsigned char * blimit)336 void vp8_loop_filter_simple_vertical_edge_c
337 (
338     unsigned char *s,
339     int p,
340     const unsigned char *blimit
341 )
342 {
343     signed char mask = 0;
344     int i = 0;
345 
346     do
347     {
348         mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
349         vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
350         s += p;
351     }
352     while (++i < 16);
353 
354 }
355 
356 /* Horizontal MB filtering */
vp8_loop_filter_mbh_c(unsigned char * y_ptr,unsigned char * u_ptr,unsigned char * v_ptr,int y_stride,int uv_stride,loop_filter_info * lfi)357 void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
358                            unsigned char *v_ptr, int y_stride, int uv_stride,
359                            loop_filter_info *lfi)
360 {
361     vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
362 
363     if (u_ptr)
364         vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
365 
366     if (v_ptr)
367         vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
368 }
369 
370 /* Vertical MB Filtering */
vp8_loop_filter_mbv_c(unsigned char * y_ptr,unsigned char * u_ptr,unsigned char * v_ptr,int y_stride,int uv_stride,loop_filter_info * lfi)371 void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
372                            unsigned char *v_ptr, int y_stride, int uv_stride,
373                            loop_filter_info *lfi)
374 {
375     vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
376 
377     if (u_ptr)
378         vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
379 
380     if (v_ptr)
381         vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
382 }
383 
384 /* Horizontal B Filtering */
vp8_loop_filter_bh_c(unsigned char * y_ptr,unsigned char * u_ptr,unsigned char * v_ptr,int y_stride,int uv_stride,loop_filter_info * lfi)385 void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
386                           unsigned char *v_ptr, int y_stride, int uv_stride,
387                           loop_filter_info *lfi)
388 {
389     vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
390     vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
391     vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
392 
393     if (u_ptr)
394         vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
395 
396     if (v_ptr)
397         vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
398 }
399 
vp8_loop_filter_bhs_c(unsigned char * y_ptr,int y_stride,const unsigned char * blimit)400 void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
401                            const unsigned char *blimit)
402 {
403     vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
404     vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
405     vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
406 }
407 
408 /* Vertical B Filtering */
vp8_loop_filter_bv_c(unsigned char * y_ptr,unsigned char * u_ptr,unsigned char * v_ptr,int y_stride,int uv_stride,loop_filter_info * lfi)409 void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
410                           unsigned char *v_ptr, int y_stride, int uv_stride,
411                           loop_filter_info *lfi)
412 {
413     vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
414     vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
415     vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
416 
417     if (u_ptr)
418         vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
419 
420     if (v_ptr)
421         vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
422 }
423 
vp8_loop_filter_bvs_c(unsigned char * y_ptr,int y_stride,const unsigned char * blimit)424 void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
425                            const unsigned char *blimit)
426 {
427     vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
428     vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
429     vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
430 }
431