1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "filter.h"
13 #include "./vp8_rtcd.h"
14 
15 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
16 {
17     { 128,   0 },
18     { 112,  16 },
19     {  96,  32 },
20     {  80,  48 },
21     {  64,  64 },
22     {  48,  80 },
23     {  32,  96 },
24     {  16, 112 }
25 };
26 
27 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
28 {
29 
30     { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
31     { 0, -6,  123,   12,  -1,  0 },
32     { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
33     { 0, -9,   93,   50,  -6,  0 },
34     { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
35     { 0, -6,   50,   93,  -9,  0 },
36     { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
37     { 0, -1,   12,  123,  -6,  0 },
38 };
39 
filter_block2d_first_pass(unsigned char * src_ptr,int * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const short * vp8_filter)40 static void filter_block2d_first_pass
41 (
42     unsigned char *src_ptr,
43     int *output_ptr,
44     unsigned int src_pixels_per_line,
45     unsigned int pixel_step,
46     unsigned int output_height,
47     unsigned int output_width,
48     const short *vp8_filter
49 )
50 {
51     unsigned int i, j;
52     int  Temp;
53 
54     for (i = 0; i < output_height; i++)
55     {
56         for (j = 0; j < output_width; j++)
57         {
58             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
59                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
60                    ((int)src_ptr[0]                 * vp8_filter[2]) +
61                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
62                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
63                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
64                    (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
65 
66             /* Normalize back to 0-255 */
67             Temp = Temp >> VP8_FILTER_SHIFT;
68 
69             if (Temp < 0)
70                 Temp = 0;
71             else if (Temp > 255)
72                 Temp = 255;
73 
74             output_ptr[j] = Temp;
75             src_ptr++;
76         }
77 
78         /* Next row... */
79         src_ptr    += src_pixels_per_line - output_width;
80         output_ptr += output_width;
81     }
82 }
83 
filter_block2d_second_pass(int * src_ptr,unsigned char * output_ptr,int output_pitch,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const short * vp8_filter)84 static void filter_block2d_second_pass
85 (
86     int *src_ptr,
87     unsigned char *output_ptr,
88     int output_pitch,
89     unsigned int src_pixels_per_line,
90     unsigned int pixel_step,
91     unsigned int output_height,
92     unsigned int output_width,
93     const short *vp8_filter
94 )
95 {
96     unsigned int i, j;
97     int  Temp;
98 
99     for (i = 0; i < output_height; i++)
100     {
101         for (j = 0; j < output_width; j++)
102         {
103             /* Apply filter */
104             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
105                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
106                    ((int)src_ptr[0]                 * vp8_filter[2]) +
107                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
108                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
109                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
110                    (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
111 
112             /* Normalize back to 0-255 */
113             Temp = Temp >> VP8_FILTER_SHIFT;
114 
115             if (Temp < 0)
116                 Temp = 0;
117             else if (Temp > 255)
118                 Temp = 255;
119 
120             output_ptr[j] = (unsigned char)Temp;
121             src_ptr++;
122         }
123 
124         /* Start next row */
125         src_ptr    += src_pixels_per_line - output_width;
126         output_ptr += output_pitch;
127     }
128 }
129 
130 
filter_block2d(unsigned char * src_ptr,unsigned char * output_ptr,unsigned int src_pixels_per_line,int output_pitch,const short * HFilter,const short * VFilter)131 static void filter_block2d
132 (
133     unsigned char  *src_ptr,
134     unsigned char  *output_ptr,
135     unsigned int src_pixels_per_line,
136     int output_pitch,
137     const short  *HFilter,
138     const short  *VFilter
139 )
140 {
141     int FData[9*4]; /* Temp data buffer used in filtering */
142 
143     /* First filter 1-D horizontally... */
144     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
145 
146     /* then filter verticaly... */
147     filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
148 }
149 
150 
vp8_sixtap_predict4x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)151 void vp8_sixtap_predict4x4_c
152 (
153     unsigned char  *src_ptr,
154     int   src_pixels_per_line,
155     int  xoffset,
156     int  yoffset,
157     unsigned char *dst_ptr,
158     int dst_pitch
159 )
160 {
161     const short  *HFilter;
162     const short  *VFilter;
163 
164     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
165     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
166 
167     filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
168 }
vp8_sixtap_predict8x8_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)169 void vp8_sixtap_predict8x8_c
170 (
171     unsigned char  *src_ptr,
172     int  src_pixels_per_line,
173     int  xoffset,
174     int  yoffset,
175     unsigned char *dst_ptr,
176     int  dst_pitch
177 )
178 {
179     const short  *HFilter;
180     const short  *VFilter;
181     int FData[13*16];   /* Temp data buffer used in filtering */
182 
183     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
184     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
185 
186     /* First filter 1-D horizontally... */
187     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
188 
189 
190     /* then filter verticaly... */
191     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
192 
193 }
194 
vp8_sixtap_predict8x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)195 void vp8_sixtap_predict8x4_c
196 (
197     unsigned char  *src_ptr,
198     int  src_pixels_per_line,
199     int  xoffset,
200     int  yoffset,
201     unsigned char *dst_ptr,
202     int  dst_pitch
203 )
204 {
205     const short  *HFilter;
206     const short  *VFilter;
207     int FData[13*16];   /* Temp data buffer used in filtering */
208 
209     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
210     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
211 
212     /* First filter 1-D horizontally... */
213     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
214 
215 
216     /* then filter verticaly... */
217     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
218 
219 }
220 
vp8_sixtap_predict16x16_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)221 void vp8_sixtap_predict16x16_c
222 (
223     unsigned char  *src_ptr,
224     int  src_pixels_per_line,
225     int  xoffset,
226     int  yoffset,
227     unsigned char *dst_ptr,
228     int  dst_pitch
229 )
230 {
231     const short  *HFilter;
232     const short  *VFilter;
233     int FData[21*24];   /* Temp data buffer used in filtering */
234 
235 
236     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
237     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
238 
239     /* First filter 1-D horizontally... */
240     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
241 
242     /* then filter verticaly... */
243     filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
244 
245 }
246 
247 
248 /****************************************************************************
249  *
250  *  ROUTINE       : filter_block2d_bil_first_pass
251  *
252  *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
253  *                  UINT32  src_stride : Stride of source block.
254  *                  UINT32  height     : Block height.
255  *                  UINT32  width      : Block width.
256  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
257  *
258  *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
259  *
260  *  RETURNS       : void
261  *
262  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
263  *                  in the horizontal direction to produce the filtered output
264  *                  block. Used to implement first-pass of 2-D separable filter.
265  *
266  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
267  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
268  *
269  ****************************************************************************/
filter_block2d_bil_first_pass(unsigned char * src_ptr,unsigned short * dst_ptr,unsigned int src_stride,unsigned int height,unsigned int width,const short * vp8_filter)270 static void filter_block2d_bil_first_pass
271 (
272     unsigned char  *src_ptr,
273     unsigned short *dst_ptr,
274     unsigned int    src_stride,
275     unsigned int    height,
276     unsigned int    width,
277     const short    *vp8_filter
278 )
279 {
280     unsigned int i, j;
281 
282     for (i = 0; i < height; i++)
283     {
284         for (j = 0; j < width; j++)
285         {
286             /* Apply bilinear filter */
287             dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
288                           ((int)src_ptr[1] * vp8_filter[1]) +
289                           (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
290             src_ptr++;
291         }
292 
293         /* Next row... */
294         src_ptr += src_stride - width;
295         dst_ptr += width;
296     }
297 }
298 
299 /****************************************************************************
300  *
301  *  ROUTINE       : filter_block2d_bil_second_pass
302  *
303  *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
304  *                  UINT32  dst_pitch  : Destination block pitch.
305  *                  UINT32  height     : Block height.
306  *                  UINT32  width      : Block width.
307  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
308  *
309  *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
310  *
311  *  RETURNS       : void
312  *
313  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
314  *                  in the vertical direction to produce the filtered output
315  *                  block. Used to implement second-pass of 2-D separable filter.
316  *
317  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
318  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
319  *
320  ****************************************************************************/
filter_block2d_bil_second_pass(unsigned short * src_ptr,unsigned char * dst_ptr,int dst_pitch,unsigned int height,unsigned int width,const short * vp8_filter)321 static void filter_block2d_bil_second_pass
322 (
323     unsigned short *src_ptr,
324     unsigned char  *dst_ptr,
325     int             dst_pitch,
326     unsigned int    height,
327     unsigned int    width,
328     const short    *vp8_filter
329 )
330 {
331     unsigned int  i, j;
332     int  Temp;
333 
334     for (i = 0; i < height; i++)
335     {
336         for (j = 0; j < width; j++)
337         {
338             /* Apply filter */
339             Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
340                    ((int)src_ptr[width] * vp8_filter[1]) +
341                    (VP8_FILTER_WEIGHT / 2);
342             dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
343             src_ptr++;
344         }
345 
346         /* Next row... */
347         dst_ptr += dst_pitch;
348     }
349 }
350 
351 
352 /****************************************************************************
353  *
354  *  ROUTINE       : filter_block2d_bil
355  *
356  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
357  *                  UINT32  src_pitch        : Stride of source block.
358  *                  UINT32  dst_pitch        : Stride of destination block.
359  *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
360  *                  INT32  *VFilter          : Array of 2 vertical filter taps.
361  *                  INT32  Width             : Block width
362  *                  INT32  Height            : Block height
363  *
364  *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
365  *
366  *  RETURNS       : void
367  *
368  *  FUNCTION      : 2-D filters an input block by applying a 2-tap
369  *                  bi-linear filter horizontally followed by a 2-tap
370  *                  bi-linear filter vertically on the result.
371  *
372  *  SPECIAL NOTES : The largest block size can be handled here is 16x16
373  *
374  ****************************************************************************/
filter_block2d_bil(unsigned char * src_ptr,unsigned char * dst_ptr,unsigned int src_pitch,unsigned int dst_pitch,const short * HFilter,const short * VFilter,int Width,int Height)375 static void filter_block2d_bil
376 (
377     unsigned char *src_ptr,
378     unsigned char *dst_ptr,
379     unsigned int   src_pitch,
380     unsigned int   dst_pitch,
381     const short   *HFilter,
382     const short   *VFilter,
383     int            Width,
384     int            Height
385 )
386 {
387 
388     unsigned short FData[17*16];    /* Temp data buffer used in filtering */
389 
390     /* First filter 1-D horizontally... */
391     filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
392 
393     /* then 1-D vertically... */
394     filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
395 }
396 
397 
vp8_bilinear_predict4x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)398 void vp8_bilinear_predict4x4_c
399 (
400     unsigned char  *src_ptr,
401     int   src_pixels_per_line,
402     int  xoffset,
403     int  yoffset,
404     unsigned char *dst_ptr,
405     int dst_pitch
406 )
407 {
408     const short *HFilter;
409     const short *VFilter;
410 
411     HFilter = vp8_bilinear_filters[xoffset];
412     VFilter = vp8_bilinear_filters[yoffset];
413 #if 0
414     {
415         int i;
416         unsigned char temp1[16];
417         unsigned char temp2[16];
418 
419         bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
420         filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
421 
422         for (i = 0; i < 16; i++)
423         {
424             if (temp1[i] != temp2[i])
425             {
426                 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
427                 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
428             }
429         }
430     }
431 #endif
432     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
433 
434 }
435 
vp8_bilinear_predict8x8_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)436 void vp8_bilinear_predict8x8_c
437 (
438     unsigned char  *src_ptr,
439     int  src_pixels_per_line,
440     int  xoffset,
441     int  yoffset,
442     unsigned char *dst_ptr,
443     int  dst_pitch
444 )
445 {
446     const short *HFilter;
447     const short *VFilter;
448 
449     HFilter = vp8_bilinear_filters[xoffset];
450     VFilter = vp8_bilinear_filters[yoffset];
451 
452     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
453 
454 }
455 
vp8_bilinear_predict8x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)456 void vp8_bilinear_predict8x4_c
457 (
458     unsigned char  *src_ptr,
459     int  src_pixels_per_line,
460     int  xoffset,
461     int  yoffset,
462     unsigned char *dst_ptr,
463     int  dst_pitch
464 )
465 {
466     const short *HFilter;
467     const short *VFilter;
468 
469     HFilter = vp8_bilinear_filters[xoffset];
470     VFilter = vp8_bilinear_filters[yoffset];
471 
472     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
473 
474 }
475 
vp8_bilinear_predict16x16_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)476 void vp8_bilinear_predict16x16_c
477 (
478     unsigned char  *src_ptr,
479     int  src_pixels_per_line,
480     int  xoffset,
481     int  yoffset,
482     unsigned char *dst_ptr,
483     int  dst_pitch
484 )
485 {
486     const short *HFilter;
487     const short *VFilter;
488 
489     HFilter = vp8_bilinear_filters[xoffset];
490     VFilter = vp8_bilinear_filters[yoffset];
491 
492     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
493 }
494