1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "filter.h"
13 
14 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
15 {
16     { 128,   0 },
17     { 112,  16 },
18     {  96,  32 },
19     {  80,  48 },
20     {  64,  64 },
21     {  48,  80 },
22     {  32,  96 },
23     {  16, 112 }
24 };
25 
26 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
27 {
28 
29     { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
30     { 0, -6,  123,   12,  -1,  0 },
31     { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
32     { 0, -9,   93,   50,  -6,  0 },
33     { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
34     { 0, -6,   50,   93,  -9,  0 },
35     { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
36     { 0, -1,   12,  123,  -6,  0 },
37 };
38 
filter_block2d_first_pass(unsigned char * src_ptr,int * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const short * vp8_filter)39 static void filter_block2d_first_pass
40 (
41     unsigned char *src_ptr,
42     int *output_ptr,
43     unsigned int src_pixels_per_line,
44     unsigned int pixel_step,
45     unsigned int output_height,
46     unsigned int output_width,
47     const short *vp8_filter
48 )
49 {
50     unsigned int i, j;
51     int  Temp;
52 
53     for (i = 0; i < output_height; i++)
54     {
55         for (j = 0; j < output_width; j++)
56         {
57             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
58                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
59                    ((int)src_ptr[0]                 * vp8_filter[2]) +
60                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
61                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
62                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
63                    (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
64 
65             /* Normalize back to 0-255 */
66             Temp = Temp >> VP8_FILTER_SHIFT;
67 
68             if (Temp < 0)
69                 Temp = 0;
70             else if (Temp > 255)
71                 Temp = 255;
72 
73             output_ptr[j] = Temp;
74             src_ptr++;
75         }
76 
77         /* Next row... */
78         src_ptr    += src_pixels_per_line - output_width;
79         output_ptr += output_width;
80     }
81 }
82 
filter_block2d_second_pass(int * src_ptr,unsigned char * output_ptr,int output_pitch,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const short * vp8_filter)83 static void filter_block2d_second_pass
84 (
85     int *src_ptr,
86     unsigned char *output_ptr,
87     int output_pitch,
88     unsigned int src_pixels_per_line,
89     unsigned int pixel_step,
90     unsigned int output_height,
91     unsigned int output_width,
92     const short *vp8_filter
93 )
94 {
95     unsigned int i, j;
96     int  Temp;
97 
98     for (i = 0; i < output_height; i++)
99     {
100         for (j = 0; j < output_width; j++)
101         {
102             /* Apply filter */
103             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
104                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
105                    ((int)src_ptr[0]                 * vp8_filter[2]) +
106                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
107                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
108                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
109                    (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
110 
111             /* Normalize back to 0-255 */
112             Temp = Temp >> VP8_FILTER_SHIFT;
113 
114             if (Temp < 0)
115                 Temp = 0;
116             else if (Temp > 255)
117                 Temp = 255;
118 
119             output_ptr[j] = (unsigned char)Temp;
120             src_ptr++;
121         }
122 
123         /* Start next row */
124         src_ptr    += src_pixels_per_line - output_width;
125         output_ptr += output_pitch;
126     }
127 }
128 
129 
filter_block2d(unsigned char * src_ptr,unsigned char * output_ptr,unsigned int src_pixels_per_line,int output_pitch,const short * HFilter,const short * VFilter)130 static void filter_block2d
131 (
132     unsigned char  *src_ptr,
133     unsigned char  *output_ptr,
134     unsigned int src_pixels_per_line,
135     int output_pitch,
136     const short  *HFilter,
137     const short  *VFilter
138 )
139 {
140     int FData[9*4]; /* Temp data buffer used in filtering */
141 
142     /* First filter 1-D horizontally... */
143     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
144 
145     /* then filter verticaly... */
146     filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
147 }
148 
149 
vp8_sixtap_predict4x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)150 void vp8_sixtap_predict4x4_c
151 (
152     unsigned char  *src_ptr,
153     int   src_pixels_per_line,
154     int  xoffset,
155     int  yoffset,
156     unsigned char *dst_ptr,
157     int dst_pitch
158 )
159 {
160     const short  *HFilter;
161     const short  *VFilter;
162 
163     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
164     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
165 
166     filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
167 }
vp8_sixtap_predict8x8_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)168 void vp8_sixtap_predict8x8_c
169 (
170     unsigned char  *src_ptr,
171     int  src_pixels_per_line,
172     int  xoffset,
173     int  yoffset,
174     unsigned char *dst_ptr,
175     int  dst_pitch
176 )
177 {
178     const short  *HFilter;
179     const short  *VFilter;
180     int FData[13*16];   /* Temp data buffer used in filtering */
181 
182     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
183     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
184 
185     /* First filter 1-D horizontally... */
186     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
187 
188 
189     /* then filter verticaly... */
190     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
191 
192 }
193 
vp8_sixtap_predict8x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)194 void vp8_sixtap_predict8x4_c
195 (
196     unsigned char  *src_ptr,
197     int  src_pixels_per_line,
198     int  xoffset,
199     int  yoffset,
200     unsigned char *dst_ptr,
201     int  dst_pitch
202 )
203 {
204     const short  *HFilter;
205     const short  *VFilter;
206     int FData[13*16];   /* Temp data buffer used in filtering */
207 
208     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
209     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
210 
211     /* First filter 1-D horizontally... */
212     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
213 
214 
215     /* then filter verticaly... */
216     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
217 
218 }
219 
vp8_sixtap_predict16x16_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)220 void vp8_sixtap_predict16x16_c
221 (
222     unsigned char  *src_ptr,
223     int  src_pixels_per_line,
224     int  xoffset,
225     int  yoffset,
226     unsigned char *dst_ptr,
227     int  dst_pitch
228 )
229 {
230     const short  *HFilter;
231     const short  *VFilter;
232     int FData[21*24];   /* Temp data buffer used in filtering */
233 
234 
235     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
236     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
237 
238     /* First filter 1-D horizontally... */
239     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
240 
241     /* then filter verticaly... */
242     filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
243 
244 }
245 
246 
247 /****************************************************************************
248  *
249  *  ROUTINE       : filter_block2d_bil_first_pass
250  *
251  *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
252  *                  UINT32  src_stride : Stride of source block.
253  *                  UINT32  height     : Block height.
254  *                  UINT32  width      : Block width.
255  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
256  *
257  *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
258  *
259  *  RETURNS       : void
260  *
261  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
262  *                  in the horizontal direction to produce the filtered output
263  *                  block. Used to implement first-pass of 2-D separable filter.
264  *
265  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
266  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
267  *
268  ****************************************************************************/
filter_block2d_bil_first_pass(unsigned char * src_ptr,unsigned short * dst_ptr,unsigned int src_stride,unsigned int height,unsigned int width,const short * vp8_filter)269 static void filter_block2d_bil_first_pass
270 (
271     unsigned char  *src_ptr,
272     unsigned short *dst_ptr,
273     unsigned int    src_stride,
274     unsigned int    height,
275     unsigned int    width,
276     const short    *vp8_filter
277 )
278 {
279     unsigned int i, j;
280 
281     for (i = 0; i < height; i++)
282     {
283         for (j = 0; j < width; j++)
284         {
285             /* Apply bilinear filter */
286             dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
287                           ((int)src_ptr[1] * vp8_filter[1]) +
288                           (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
289             src_ptr++;
290         }
291 
292         /* Next row... */
293         src_ptr += src_stride - width;
294         dst_ptr += width;
295     }
296 }
297 
298 /****************************************************************************
299  *
300  *  ROUTINE       : filter_block2d_bil_second_pass
301  *
302  *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
303  *                  UINT32  dst_pitch  : Destination block pitch.
304  *                  UINT32  height     : Block height.
305  *                  UINT32  width      : Block width.
306  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
307  *
308  *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
309  *
310  *  RETURNS       : void
311  *
312  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
313  *                  in the vertical direction to produce the filtered output
314  *                  block. Used to implement second-pass of 2-D separable filter.
315  *
316  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
317  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
318  *
319  ****************************************************************************/
filter_block2d_bil_second_pass(unsigned short * src_ptr,unsigned char * dst_ptr,int dst_pitch,unsigned int height,unsigned int width,const short * vp8_filter)320 static void filter_block2d_bil_second_pass
321 (
322     unsigned short *src_ptr,
323     unsigned char  *dst_ptr,
324     int             dst_pitch,
325     unsigned int    height,
326     unsigned int    width,
327     const short    *vp8_filter
328 )
329 {
330     unsigned int  i, j;
331     int  Temp;
332 
333     for (i = 0; i < height; i++)
334     {
335         for (j = 0; j < width; j++)
336         {
337             /* Apply filter */
338             Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
339                    ((int)src_ptr[width] * vp8_filter[1]) +
340                    (VP8_FILTER_WEIGHT / 2);
341             dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
342             src_ptr++;
343         }
344 
345         /* Next row... */
346         dst_ptr += dst_pitch;
347     }
348 }
349 
350 
351 /****************************************************************************
352  *
353  *  ROUTINE       : filter_block2d_bil
354  *
355  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
356  *                  UINT32  src_pitch        : Stride of source block.
357  *                  UINT32  dst_pitch        : Stride of destination block.
358  *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
359  *                  INT32  *VFilter          : Array of 2 vertical filter taps.
360  *                  INT32  Width             : Block width
361  *                  INT32  Height            : Block height
362  *
363  *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
364  *
365  *  RETURNS       : void
366  *
367  *  FUNCTION      : 2-D filters an input block by applying a 2-tap
368  *                  bi-linear filter horizontally followed by a 2-tap
369  *                  bi-linear filter vertically on the result.
370  *
371  *  SPECIAL NOTES : The largest block size can be handled here is 16x16
372  *
373  ****************************************************************************/
filter_block2d_bil(unsigned char * src_ptr,unsigned char * dst_ptr,unsigned int src_pitch,unsigned int dst_pitch,const short * HFilter,const short * VFilter,int Width,int Height)374 static void filter_block2d_bil
375 (
376     unsigned char *src_ptr,
377     unsigned char *dst_ptr,
378     unsigned int   src_pitch,
379     unsigned int   dst_pitch,
380     const short   *HFilter,
381     const short   *VFilter,
382     int            Width,
383     int            Height
384 )
385 {
386 
387     unsigned short FData[17*16];    /* Temp data buffer used in filtering */
388 
389     /* First filter 1-D horizontally... */
390     filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
391 
392     /* then 1-D vertically... */
393     filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
394 }
395 
396 
vp8_bilinear_predict4x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)397 void vp8_bilinear_predict4x4_c
398 (
399     unsigned char  *src_ptr,
400     int   src_pixels_per_line,
401     int  xoffset,
402     int  yoffset,
403     unsigned char *dst_ptr,
404     int dst_pitch
405 )
406 {
407     const short *HFilter;
408     const short *VFilter;
409 
410     HFilter = vp8_bilinear_filters[xoffset];
411     VFilter = vp8_bilinear_filters[yoffset];
412 #if 0
413     {
414         int i;
415         unsigned char temp1[16];
416         unsigned char temp2[16];
417 
418         bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
419         filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
420 
421         for (i = 0; i < 16; i++)
422         {
423             if (temp1[i] != temp2[i])
424             {
425                 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
426                 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
427             }
428         }
429     }
430 #endif
431     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
432 
433 }
434 
vp8_bilinear_predict8x8_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)435 void vp8_bilinear_predict8x8_c
436 (
437     unsigned char  *src_ptr,
438     int  src_pixels_per_line,
439     int  xoffset,
440     int  yoffset,
441     unsigned char *dst_ptr,
442     int  dst_pitch
443 )
444 {
445     const short *HFilter;
446     const short *VFilter;
447 
448     HFilter = vp8_bilinear_filters[xoffset];
449     VFilter = vp8_bilinear_filters[yoffset];
450 
451     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
452 
453 }
454 
vp8_bilinear_predict8x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)455 void vp8_bilinear_predict8x4_c
456 (
457     unsigned char  *src_ptr,
458     int  src_pixels_per_line,
459     int  xoffset,
460     int  yoffset,
461     unsigned char *dst_ptr,
462     int  dst_pitch
463 )
464 {
465     const short *HFilter;
466     const short *VFilter;
467 
468     HFilter = vp8_bilinear_filters[xoffset];
469     VFilter = vp8_bilinear_filters[yoffset];
470 
471     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
472 
473 }
474 
vp8_bilinear_predict16x16_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)475 void vp8_bilinear_predict16x16_c
476 (
477     unsigned char  *src_ptr,
478     int  src_pixels_per_line,
479     int  xoffset,
480     int  yoffset,
481     unsigned char *dst_ptr,
482     int  dst_pitch
483 )
484 {
485     const short *HFilter;
486     const short *VFilter;
487 
488     HFilter = vp8_bilinear_filters[xoffset];
489     VFilter = vp8_bilinear_filters[yoffset];
490 
491     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
492 }
493