1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include "loopfilter.h"
13 #include "onyxc_int.h"
14 
15 typedef unsigned char uc;
16 
vp8_signed_char_clamp(int t)17 static signed char vp8_signed_char_clamp(int t) {
18   t = (t < -128 ? -128 : t);
19   t = (t > 127 ? 127 : t);
20   return (signed char)t;
21 }
22 
23 /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
vp8_filter_mask(uc limit,uc blimit,uc p3,uc p2,uc p1,uc p0,uc q0,uc q1,uc q2,uc q3)24 static signed char vp8_filter_mask(uc limit, uc blimit, uc p3, uc p2, uc p1,
25                                    uc p0, uc q0, uc q1, uc q2, uc q3) {
26   signed char mask = 0;
27   mask |= (abs(p3 - p2) > limit);
28   mask |= (abs(p2 - p1) > limit);
29   mask |= (abs(p1 - p0) > limit);
30   mask |= (abs(q1 - q0) > limit);
31   mask |= (abs(q2 - q1) > limit);
32   mask |= (abs(q3 - q2) > limit);
33   mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
34   return mask - 1;
35 }
36 
37 /* is there high variance internal edge ( 11111111 yes, 00000000 no) */
vp8_hevmask(uc thresh,uc p1,uc p0,uc q0,uc q1)38 static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) {
39   signed char hev = 0;
40   hev |= (abs(p1 - p0) > thresh) * -1;
41   hev |= (abs(q1 - q0) > thresh) * -1;
42   return hev;
43 }
44 
vp8_filter(signed char mask,uc hev,uc * op1,uc * op0,uc * oq0,uc * oq1)45 static void vp8_filter(signed char mask, uc hev, uc *op1, uc *op0, uc *oq0,
46                        uc *oq1) {
47   signed char ps0, qs0;
48   signed char ps1, qs1;
49   signed char filter_value, Filter1, Filter2;
50   signed char u;
51 
52   ps1 = (signed char)*op1 ^ 0x80;
53   ps0 = (signed char)*op0 ^ 0x80;
54   qs0 = (signed char)*oq0 ^ 0x80;
55   qs1 = (signed char)*oq1 ^ 0x80;
56 
57   /* add outer taps if we have high edge variance */
58   filter_value = vp8_signed_char_clamp(ps1 - qs1);
59   filter_value &= hev;
60 
61   /* inner taps */
62   filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
63   filter_value &= mask;
64 
65   /* save bottom 3 bits so that we round one side +4 and the other +3
66    * if it equals 4 we'll set it to adjust by -1 to account for the fact
67    * we'd round it by 3 the other way
68    */
69   Filter1 = vp8_signed_char_clamp(filter_value + 4);
70   Filter2 = vp8_signed_char_clamp(filter_value + 3);
71   Filter1 >>= 3;
72   Filter2 >>= 3;
73   u = vp8_signed_char_clamp(qs0 - Filter1);
74   *oq0 = u ^ 0x80;
75   u = vp8_signed_char_clamp(ps0 + Filter2);
76   *op0 = u ^ 0x80;
77   filter_value = Filter1;
78 
79   /* outer tap adjustments */
80   filter_value += 1;
81   filter_value >>= 1;
82   filter_value &= ~hev;
83 
84   u = vp8_signed_char_clamp(qs1 - filter_value);
85   *oq1 = u ^ 0x80;
86   u = vp8_signed_char_clamp(ps1 + filter_value);
87   *op1 = u ^ 0x80;
88 }
89 
loop_filter_horizontal_edge_c(unsigned char * s,int p,const unsigned char * blimit,const unsigned char * limit,const unsigned char * thresh,int count)90 static void loop_filter_horizontal_edge_c(unsigned char *s, int p, /* pitch */
91                                           const unsigned char *blimit,
92                                           const unsigned char *limit,
93                                           const unsigned char *thresh,
94                                           int count) {
95   int hev = 0; /* high edge variance */
96   signed char mask = 0;
97   int i = 0;
98 
99   /* loop filter designed to work using chars so that we can make maximum use
100    * of 8 bit simd instructions.
101    */
102   do {
103     mask = vp8_filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p],
104                            s[-1 * p], s[0 * p], s[1 * p], s[2 * p], s[3 * p]);
105 
106     hev = vp8_hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
107 
108     vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
109 
110     ++s;
111   } while (++i < count * 8);
112 }
113 
loop_filter_vertical_edge_c(unsigned char * s,int p,const unsigned char * blimit,const unsigned char * limit,const unsigned char * thresh,int count)114 static void loop_filter_vertical_edge_c(unsigned char *s, int p,
115                                         const unsigned char *blimit,
116                                         const unsigned char *limit,
117                                         const unsigned char *thresh,
118                                         int count) {
119   int hev = 0; /* high edge variance */
120   signed char mask = 0;
121   int i = 0;
122 
123   /* loop filter designed to work using chars so that we can make maximum use
124    * of 8 bit simd instructions.
125    */
126   do {
127     mask = vp8_filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1],
128                            s[0], s[1], s[2], s[3]);
129 
130     hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
131 
132     vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
133 
134     s += p;
135   } while (++i < count * 8);
136 }
137 
vp8_mbfilter(signed char mask,uc hev,uc * op2,uc * op1,uc * op0,uc * oq0,uc * oq1,uc * oq2)138 static void vp8_mbfilter(signed char mask, uc hev, uc *op2, uc *op1, uc *op0,
139                          uc *oq0, uc *oq1, uc *oq2) {
140   signed char s, u;
141   signed char filter_value, Filter1, Filter2;
142   signed char ps2 = (signed char)*op2 ^ 0x80;
143   signed char ps1 = (signed char)*op1 ^ 0x80;
144   signed char ps0 = (signed char)*op0 ^ 0x80;
145   signed char qs0 = (signed char)*oq0 ^ 0x80;
146   signed char qs1 = (signed char)*oq1 ^ 0x80;
147   signed char qs2 = (signed char)*oq2 ^ 0x80;
148 
149   /* add outer taps if we have high edge variance */
150   filter_value = vp8_signed_char_clamp(ps1 - qs1);
151   filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
152   filter_value &= mask;
153 
154   Filter2 = filter_value;
155   Filter2 &= hev;
156 
157   /* save bottom 3 bits so that we round one side +4 and the other +3 */
158   Filter1 = vp8_signed_char_clamp(Filter2 + 4);
159   Filter2 = vp8_signed_char_clamp(Filter2 + 3);
160   Filter1 >>= 3;
161   Filter2 >>= 3;
162   qs0 = vp8_signed_char_clamp(qs0 - Filter1);
163   ps0 = vp8_signed_char_clamp(ps0 + Filter2);
164 
165   /* only apply wider filter if not high edge variance */
166   filter_value &= ~hev;
167   Filter2 = filter_value;
168 
169   /* roughly 3/7th difference across boundary */
170   u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
171   s = vp8_signed_char_clamp(qs0 - u);
172   *oq0 = s ^ 0x80;
173   s = vp8_signed_char_clamp(ps0 + u);
174   *op0 = s ^ 0x80;
175 
176   /* roughly 2/7th difference across boundary */
177   u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
178   s = vp8_signed_char_clamp(qs1 - u);
179   *oq1 = s ^ 0x80;
180   s = vp8_signed_char_clamp(ps1 + u);
181   *op1 = s ^ 0x80;
182 
183   /* roughly 1/7th difference across boundary */
184   u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
185   s = vp8_signed_char_clamp(qs2 - u);
186   *oq2 = s ^ 0x80;
187   s = vp8_signed_char_clamp(ps2 + u);
188   *op2 = s ^ 0x80;
189 }
190 
mbloop_filter_horizontal_edge_c(unsigned char * s,int p,const unsigned char * blimit,const unsigned char * limit,const unsigned char * thresh,int count)191 static void mbloop_filter_horizontal_edge_c(unsigned char *s, int p,
192                                             const unsigned char *blimit,
193                                             const unsigned char *limit,
194                                             const unsigned char *thresh,
195                                             int count) {
196   signed char hev = 0; /* high edge variance */
197   signed char mask = 0;
198   int i = 0;
199 
200   /* loop filter designed to work using chars so that we can make maximum use
201    * of 8 bit simd instructions.
202    */
203   do {
204     mask = vp8_filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p],
205                            s[-1 * p], s[0 * p], s[1 * p], s[2 * p], s[3 * p]);
206 
207     hev = vp8_hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
208 
209     vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
210                  s + 2 * p);
211 
212     ++s;
213   } while (++i < count * 8);
214 }
215 
mbloop_filter_vertical_edge_c(unsigned char * s,int p,const unsigned char * blimit,const unsigned char * limit,const unsigned char * thresh,int count)216 static void mbloop_filter_vertical_edge_c(unsigned char *s, int p,
217                                           const unsigned char *blimit,
218                                           const unsigned char *limit,
219                                           const unsigned char *thresh,
220                                           int count) {
221   signed char hev = 0; /* high edge variance */
222   signed char mask = 0;
223   int i = 0;
224 
225   do {
226     mask = vp8_filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1],
227                            s[0], s[1], s[2], s[3]);
228 
229     hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
230 
231     vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
232 
233     s += p;
234   } while (++i < count * 8);
235 }
236 
237 /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
vp8_simple_filter_mask(uc blimit,uc p1,uc p0,uc q0,uc q1)238 static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0,
239                                           uc q1) {
240   /* Why does this cause problems for win32?
241    * error C2143: syntax error : missing ';' before 'type'
242    *  (void) limit;
243    */
244   signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
245   return mask;
246 }
247 
vp8_simple_filter(signed char mask,uc * op1,uc * op0,uc * oq0,uc * oq1)248 static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0,
249                               uc *oq1) {
250   signed char filter_value, Filter1, Filter2;
251   signed char p1 = (signed char)*op1 ^ 0x80;
252   signed char p0 = (signed char)*op0 ^ 0x80;
253   signed char q0 = (signed char)*oq0 ^ 0x80;
254   signed char q1 = (signed char)*oq1 ^ 0x80;
255   signed char u;
256 
257   filter_value = vp8_signed_char_clamp(p1 - q1);
258   filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
259   filter_value &= mask;
260 
261   /* save bottom 3 bits so that we round one side +4 and the other +3 */
262   Filter1 = vp8_signed_char_clamp(filter_value + 4);
263   Filter1 >>= 3;
264   u = vp8_signed_char_clamp(q0 - Filter1);
265   *oq0 = u ^ 0x80;
266 
267   Filter2 = vp8_signed_char_clamp(filter_value + 3);
268   Filter2 >>= 3;
269   u = vp8_signed_char_clamp(p0 + Filter2);
270   *op0 = u ^ 0x80;
271 }
272 
vp8_loop_filter_simple_horizontal_edge_c(unsigned char * y_ptr,int y_stride,const unsigned char * blimit)273 void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y_ptr,
274                                               int y_stride,
275                                               const unsigned char *blimit) {
276   signed char mask = 0;
277   int i = 0;
278 
279   do {
280     mask = vp8_simple_filter_mask(blimit[0], y_ptr[-2 * y_stride],
281                                   y_ptr[-1 * y_stride], y_ptr[0 * y_stride],
282                                   y_ptr[1 * y_stride]);
283     vp8_simple_filter(mask, y_ptr - 2 * y_stride, y_ptr - 1 * y_stride, y_ptr,
284                       y_ptr + 1 * y_stride);
285     ++y_ptr;
286   } while (++i < 16);
287 }
288 
vp8_loop_filter_simple_vertical_edge_c(unsigned char * y_ptr,int y_stride,const unsigned char * blimit)289 void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y_ptr, int y_stride,
290                                             const unsigned char *blimit) {
291   signed char mask = 0;
292   int i = 0;
293 
294   do {
295     mask = vp8_simple_filter_mask(blimit[0], y_ptr[-2], y_ptr[-1], y_ptr[0],
296                                   y_ptr[1]);
297     vp8_simple_filter(mask, y_ptr - 2, y_ptr - 1, y_ptr, y_ptr + 1);
298     y_ptr += y_stride;
299   } while (++i < 16);
300 }
301 
302 /* Horizontal MB filtering */
vp8_loop_filter_mbh_c(unsigned char * y_ptr,unsigned char * u_ptr,unsigned char * v_ptr,int y_stride,int uv_stride,loop_filter_info * lfi)303 void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
304                            unsigned char *v_ptr, int y_stride, int uv_stride,
305                            loop_filter_info *lfi) {
306   mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim,
307                                   lfi->hev_thr, 2);
308 
309   if (u_ptr) {
310     mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim,
311                                     lfi->hev_thr, 1);
312   }
313 
314   if (v_ptr) {
315     mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim,
316                                     lfi->hev_thr, 1);
317   }
318 }
319 
320 /* Vertical MB Filtering */
vp8_loop_filter_mbv_c(unsigned char * y_ptr,unsigned char * u_ptr,unsigned char * v_ptr,int y_stride,int uv_stride,loop_filter_info * lfi)321 void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
322                            unsigned char *v_ptr, int y_stride, int uv_stride,
323                            loop_filter_info *lfi) {
324   mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim,
325                                 lfi->hev_thr, 2);
326 
327   if (u_ptr) {
328     mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim,
329                                   lfi->hev_thr, 1);
330   }
331 
332   if (v_ptr) {
333     mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim,
334                                   lfi->hev_thr, 1);
335   }
336 }
337 
338 /* Horizontal B Filtering */
vp8_loop_filter_bh_c(unsigned char * y_ptr,unsigned char * u_ptr,unsigned char * v_ptr,int y_stride,int uv_stride,loop_filter_info * lfi)339 void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
340                           unsigned char *v_ptr, int y_stride, int uv_stride,
341                           loop_filter_info *lfi) {
342   loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim,
343                                 lfi->lim, lfi->hev_thr, 2);
344   loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim,
345                                 lfi->lim, lfi->hev_thr, 2);
346   loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim,
347                                 lfi->lim, lfi->hev_thr, 2);
348 
349   if (u_ptr) {
350     loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim,
351                                   lfi->lim, lfi->hev_thr, 1);
352   }
353 
354   if (v_ptr) {
355     loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim,
356                                   lfi->lim, lfi->hev_thr, 1);
357   }
358 }
359 
vp8_loop_filter_bhs_c(unsigned char * y_ptr,int y_stride,const unsigned char * blimit)360 void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
361                            const unsigned char *blimit) {
362   vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride,
363                                            blimit);
364   vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride,
365                                            blimit);
366   vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride,
367                                            blimit);
368 }
369 
370 /* Vertical B Filtering */
vp8_loop_filter_bv_c(unsigned char * y_ptr,unsigned char * u_ptr,unsigned char * v_ptr,int y_stride,int uv_stride,loop_filter_info * lfi)371 void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
372                           unsigned char *v_ptr, int y_stride, int uv_stride,
373                           loop_filter_info *lfi) {
374   loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
375                               lfi->hev_thr, 2);
376   loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
377                               lfi->hev_thr, 2);
378   loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
379                               lfi->hev_thr, 2);
380 
381   if (u_ptr) {
382     loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim,
383                                 lfi->hev_thr, 1);
384   }
385 
386   if (v_ptr) {
387     loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim,
388                                 lfi->hev_thr, 1);
389   }
390 }
391 
vp8_loop_filter_bvs_c(unsigned char * y_ptr,int y_stride,const unsigned char * blimit)392 void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
393                            const unsigned char *blimit) {
394   vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
395   vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
396   vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
397 }
398