1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "vpx_config.h"
13 #include "vp8_rtcd.h"
14 #include "vpx_ports/mem.h"
15 #include "filter_x86.h"
16 
17 extern const short vp8_six_tap_mmx[8][6*8];
18 
19 extern void vp8_filter_block1d_h6_mmx
20 (
21     unsigned char   *src_ptr,
22     unsigned short  *output_ptr,
23     unsigned int    src_pixels_per_line,
24     unsigned int    pixel_step,
25     unsigned int    output_height,
26     unsigned int    output_width,
27     const short      *vp8_filter
28 );
29 extern void vp8_filter_block1dc_v6_mmx
30 (
31     unsigned short *src_ptr,
32     unsigned char  *output_ptr,
33     int             output_pitch,
34     unsigned int    pixels_per_line,
35     unsigned int    pixel_step,
36     unsigned int    output_height,
37     unsigned int    output_width,
38     const short    *vp8_filter
39 );
40 extern void vp8_filter_block1d8_h6_sse2
41 (
42     unsigned char  *src_ptr,
43     unsigned short *output_ptr,
44     unsigned int    src_pixels_per_line,
45     unsigned int    pixel_step,
46     unsigned int    output_height,
47     unsigned int    output_width,
48     const short    *vp8_filter
49 );
50 extern void vp8_filter_block1d16_h6_sse2
51 (
52     unsigned char  *src_ptr,
53     unsigned short *output_ptr,
54     unsigned int    src_pixels_per_line,
55     unsigned int    pixel_step,
56     unsigned int    output_height,
57     unsigned int    output_width,
58     const short    *vp8_filter
59 );
60 extern void vp8_filter_block1d8_v6_sse2
61 (
62     unsigned short *src_ptr,
63     unsigned char *output_ptr,
64     int dst_ptich,
65     unsigned int pixels_per_line,
66     unsigned int pixel_step,
67     unsigned int output_height,
68     unsigned int output_width,
69     const short    *vp8_filter
70 );
71 extern void vp8_filter_block1d16_v6_sse2
72 (
73     unsigned short *src_ptr,
74     unsigned char *output_ptr,
75     int dst_ptich,
76     unsigned int pixels_per_line,
77     unsigned int pixel_step,
78     unsigned int output_height,
79     unsigned int output_width,
80     const short    *vp8_filter
81 );
82 extern void vp8_unpack_block1d16_h6_sse2
83 (
84     unsigned char  *src_ptr,
85     unsigned short *output_ptr,
86     unsigned int    src_pixels_per_line,
87     unsigned int    output_height,
88     unsigned int    output_width
89 );
90 extern void vp8_filter_block1d8_h6_only_sse2
91 (
92     unsigned char  *src_ptr,
93     unsigned int    src_pixels_per_line,
94     unsigned char  *output_ptr,
95     int dst_ptich,
96     unsigned int    output_height,
97     const short    *vp8_filter
98 );
99 extern void vp8_filter_block1d16_h6_only_sse2
100 (
101     unsigned char  *src_ptr,
102     unsigned int    src_pixels_per_line,
103     unsigned char  *output_ptr,
104     int dst_ptich,
105     unsigned int    output_height,
106     const short    *vp8_filter
107 );
108 extern void vp8_filter_block1d8_v6_only_sse2
109 (
110     unsigned char *src_ptr,
111     unsigned int   src_pixels_per_line,
112     unsigned char *output_ptr,
113     int dst_ptich,
114     unsigned int   output_height,
115     const short   *vp8_filter
116 );
117 
118 
119 #if HAVE_MMX
vp8_sixtap_predict4x4_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)120 void vp8_sixtap_predict4x4_mmx
121 (
122     unsigned char  *src_ptr,
123     int   src_pixels_per_line,
124     int  xoffset,
125     int  yoffset,
126     unsigned char *dst_ptr,
127     int dst_pitch
128 )
129 {
130     DECLARE_ALIGNED(16, unsigned short, FData2[16*16]);  /* Temp data bufffer used in filtering */
131     const short *HFilter, *VFilter;
132     HFilter = vp8_six_tap_mmx[xoffset];
133     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
134     VFilter = vp8_six_tap_mmx[yoffset];
135     vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter);
136 
137 }
138 
139 
vp8_sixtap_predict16x16_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)140 void vp8_sixtap_predict16x16_mmx
141 (
142     unsigned char  *src_ptr,
143     int   src_pixels_per_line,
144     int  xoffset,
145     int  yoffset,
146     unsigned char *dst_ptr,
147     int dst_pitch
148 )
149 {
150 
151     DECLARE_ALIGNED(16, unsigned short, FData2[24*24]);  /* Temp data bufffer used in filtering */
152 
153     const short *HFilter, *VFilter;
154 
155 
156     HFilter = vp8_six_tap_mmx[xoffset];
157 
158     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),    FData2,   src_pixels_per_line, 1, 21, 32, HFilter);
159     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,  FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter);
160     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8,  FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter);
161     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter);
162 
163     VFilter = vp8_six_tap_mmx[yoffset];
164     vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, 16, VFilter);
165     vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter);
166     vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
167     vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter);
168 
169 }
170 
171 
vp8_sixtap_predict8x8_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)172 void vp8_sixtap_predict8x8_mmx
173 (
174     unsigned char  *src_ptr,
175     int   src_pixels_per_line,
176     int  xoffset,
177     int  yoffset,
178     unsigned char *dst_ptr,
179     int dst_pitch
180 )
181 {
182 
183     DECLARE_ALIGNED(16, unsigned short, FData2[256]);    /* Temp data bufffer used in filtering */
184 
185     const short *HFilter, *VFilter;
186 
187     HFilter = vp8_six_tap_mmx[xoffset];
188     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),    FData2,   src_pixels_per_line, 1, 13, 16, HFilter);
189     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,  FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter);
190 
191     VFilter = vp8_six_tap_mmx[yoffset];
192     vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 8, 8, VFilter);
193     vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter);
194 
195 }
196 
197 
vp8_sixtap_predict8x4_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)198 void vp8_sixtap_predict8x4_mmx
199 (
200     unsigned char  *src_ptr,
201     int   src_pixels_per_line,
202     int  xoffset,
203     int  yoffset,
204     unsigned char *dst_ptr,
205     int dst_pitch
206 )
207 {
208 
209     DECLARE_ALIGNED(16, unsigned short, FData2[256]);    /* Temp data bufffer used in filtering */
210 
211     const short *HFilter, *VFilter;
212 
213     HFilter = vp8_six_tap_mmx[xoffset];
214     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),    FData2,   src_pixels_per_line, 1, 9, 16, HFilter);
215     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,  FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter);
216 
217     VFilter = vp8_six_tap_mmx[yoffset];
218     vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 4, 8, VFilter);
219     vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter);
220 
221 }
222 
223 
224 
vp8_bilinear_predict16x16_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)225 void vp8_bilinear_predict16x16_mmx
226 (
227     unsigned char  *src_ptr,
228     int   src_pixels_per_line,
229     int  xoffset,
230     int  yoffset,
231     unsigned char *dst_ptr,
232     int dst_pitch
233 )
234 {
235     vp8_bilinear_predict8x8_mmx(src_ptr,   src_pixels_per_line, xoffset, yoffset, dst_ptr,   dst_pitch);
236     vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch);
237     vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line,   src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8,   dst_pitch);
238     vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch);
239 }
240 #endif
241 
242 
243 #if HAVE_SSE2
vp8_sixtap_predict16x16_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)244 void vp8_sixtap_predict16x16_sse2
245 (
246     unsigned char  *src_ptr,
247     int   src_pixels_per_line,
248     int  xoffset,
249     int  yoffset,
250     unsigned char *dst_ptr,
251     int dst_pitch
252 
253 )
254 {
255     DECLARE_ALIGNED(16, unsigned short, FData2[24*24]);    /* Temp data bufffer used in filtering */
256 
257     const short *HFilter, *VFilter;
258 
259     if (xoffset)
260     {
261         if (yoffset)
262         {
263             HFilter = vp8_six_tap_mmx[xoffset];
264             vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 21, 32, HFilter);
265             VFilter = vp8_six_tap_mmx[yoffset];
266             vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
267         }
268         else
269         {
270             /* First-pass only */
271             HFilter = vp8_six_tap_mmx[xoffset];
272             vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
273         }
274     }
275     else
276     {
277         /* Second-pass only */
278         VFilter = vp8_six_tap_mmx[yoffset];
279         vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 21, 32);
280         vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
281     }
282 }
283 
284 
vp8_sixtap_predict8x8_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)285 void vp8_sixtap_predict8x8_sse2
286 (
287     unsigned char  *src_ptr,
288     int   src_pixels_per_line,
289     int  xoffset,
290     int  yoffset,
291     unsigned char *dst_ptr,
292     int dst_pitch
293 )
294 {
295     DECLARE_ALIGNED(16, unsigned short, FData2[256]);  /* Temp data bufffer used in filtering */
296     const short *HFilter, *VFilter;
297 
298     if (xoffset)
299     {
300         if (yoffset)
301         {
302             HFilter = vp8_six_tap_mmx[xoffset];
303             vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 13, 16, HFilter);
304             VFilter = vp8_six_tap_mmx[yoffset];
305             vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
306         }
307         else
308         {
309             /* First-pass only */
310             HFilter = vp8_six_tap_mmx[xoffset];
311             vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
312         }
313     }
314     else
315     {
316         /* Second-pass only */
317         VFilter = vp8_six_tap_mmx[yoffset];
318         vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
319     }
320 }
321 
322 
vp8_sixtap_predict8x4_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)323 void vp8_sixtap_predict8x4_sse2
324 (
325     unsigned char  *src_ptr,
326     int   src_pixels_per_line,
327     int  xoffset,
328     int  yoffset,
329     unsigned char *dst_ptr,
330     int dst_pitch
331 )
332 {
333     DECLARE_ALIGNED(16, unsigned short, FData2[256]);  /* Temp data bufffer used in filtering */
334     const short *HFilter, *VFilter;
335 
336     if (xoffset)
337     {
338         if (yoffset)
339         {
340             HFilter = vp8_six_tap_mmx[xoffset];
341             vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 9, 16, HFilter);
342             VFilter = vp8_six_tap_mmx[yoffset];
343             vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
344         }
345         else
346         {
347             /* First-pass only */
348             HFilter = vp8_six_tap_mmx[xoffset];
349             vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
350         }
351     }
352     else
353     {
354         /* Second-pass only */
355         VFilter = vp8_six_tap_mmx[yoffset];
356         vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
357     }
358 }
359 
360 #endif
361 
362 #if HAVE_SSSE3
363 
364 extern void vp8_filter_block1d8_h6_ssse3
365 (
366     unsigned char  *src_ptr,
367     unsigned int    src_pixels_per_line,
368     unsigned char  *output_ptr,
369     unsigned int    output_pitch,
370     unsigned int    output_height,
371     unsigned int    vp8_filter_index
372 );
373 
374 extern void vp8_filter_block1d16_h6_ssse3
375 (
376     unsigned char  *src_ptr,
377     unsigned int    src_pixels_per_line,
378     unsigned char  *output_ptr,
379     unsigned int    output_pitch,
380     unsigned int    output_height,
381     unsigned int    vp8_filter_index
382 );
383 
384 extern void vp8_filter_block1d16_v6_ssse3
385 (
386     unsigned char *src_ptr,
387     unsigned int   src_pitch,
388     unsigned char *output_ptr,
389     unsigned int   out_pitch,
390     unsigned int   output_height,
391     unsigned int   vp8_filter_index
392 );
393 
394 extern void vp8_filter_block1d8_v6_ssse3
395 (
396     unsigned char *src_ptr,
397     unsigned int   src_pitch,
398     unsigned char *output_ptr,
399     unsigned int   out_pitch,
400     unsigned int   output_height,
401     unsigned int   vp8_filter_index
402 );
403 
404 extern void vp8_filter_block1d4_h6_ssse3
405 (
406     unsigned char  *src_ptr,
407     unsigned int    src_pixels_per_line,
408     unsigned char  *output_ptr,
409     unsigned int    output_pitch,
410     unsigned int    output_height,
411     unsigned int    vp8_filter_index
412 );
413 
414 extern void vp8_filter_block1d4_v6_ssse3
415 (
416     unsigned char *src_ptr,
417     unsigned int   src_pitch,
418     unsigned char *output_ptr,
419     unsigned int   out_pitch,
420     unsigned int   output_height,
421     unsigned int   vp8_filter_index
422 );
423 
vp8_sixtap_predict16x16_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)424 void vp8_sixtap_predict16x16_ssse3
425 (
426     unsigned char  *src_ptr,
427     int   src_pixels_per_line,
428     int  xoffset,
429     int  yoffset,
430     unsigned char *dst_ptr,
431     int dst_pitch
432 
433 )
434 {
435     DECLARE_ALIGNED(16, unsigned char, FData2[24*24]);
436 
437     if (xoffset)
438     {
439         if (yoffset)
440         {
441             vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
442                                           src_pixels_per_line, FData2,
443                                           16, 21, xoffset);
444             vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch,
445                                           16, yoffset);
446         }
447         else
448         {
449             /* First-pass only */
450             vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
451                                           dst_ptr, dst_pitch, 16, xoffset);
452         }
453     }
454     else
455     {
456         if (yoffset)
457         {
458             /* Second-pass only */
459             vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
460                                           src_pixels_per_line,
461                                           dst_ptr, dst_pitch, 16, yoffset);
462         }
463         else
464         {
465             /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
466              * yoffset==0) case correctly. Add copy function here to guarantee
467              * six-tap function handles all possible offsets. */
468             vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
469         }
470     }
471 }
472 
vp8_sixtap_predict8x8_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)473 void vp8_sixtap_predict8x8_ssse3
474 (
475     unsigned char  *src_ptr,
476     int   src_pixels_per_line,
477     int  xoffset,
478     int  yoffset,
479     unsigned char *dst_ptr,
480     int dst_pitch
481 )
482 {
483     DECLARE_ALIGNED(16, unsigned char, FData2[256]);
484 
485     if (xoffset)
486     {
487         if (yoffset)
488         {
489             vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
490                                          src_pixels_per_line, FData2,
491                                          8, 13, xoffset);
492             vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
493                                          8, yoffset);
494         }
495         else
496         {
497             vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
498                                          dst_ptr, dst_pitch, 8, xoffset);
499         }
500     }
501     else
502     {
503         if (yoffset)
504         {
505             /* Second-pass only */
506             vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
507                                          src_pixels_per_line,
508                                          dst_ptr, dst_pitch, 8, yoffset);
509         }
510         else
511         {
512             /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
513              * yoffset==0) case correctly. Add copy function here to guarantee
514              * six-tap function handles all possible offsets. */
515             vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
516         }
517     }
518 }
519 
520 
vp8_sixtap_predict8x4_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)521 void vp8_sixtap_predict8x4_ssse3
522 (
523     unsigned char  *src_ptr,
524     int   src_pixels_per_line,
525     int  xoffset,
526     int  yoffset,
527     unsigned char *dst_ptr,
528     int dst_pitch
529 )
530 {
531     DECLARE_ALIGNED(16, unsigned char, FData2[256]);
532 
533     if (xoffset)
534     {
535         if (yoffset)
536         {
537             vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
538                                          src_pixels_per_line, FData2,
539                                          8, 9, xoffset);
540             vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
541                                          4, yoffset);
542         }
543         else
544         {
545             /* First-pass only */
546             vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
547                                          dst_ptr, dst_pitch, 4, xoffset);
548         }
549     }
550     else
551     {
552         if (yoffset)
553         {
554             /* Second-pass only */
555             vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
556                                          src_pixels_per_line,
557                                          dst_ptr, dst_pitch, 4, yoffset);
558         }
559         else
560         {
561             /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
562              * yoffset==0) case correctly. Add copy function here to guarantee
563              * six-tap function handles all possible offsets. */
564             vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
565         }
566     }
567 }
568 
vp8_sixtap_predict4x4_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)569 void vp8_sixtap_predict4x4_ssse3
570 (
571     unsigned char  *src_ptr,
572     int   src_pixels_per_line,
573     int  xoffset,
574     int  yoffset,
575     unsigned char *dst_ptr,
576     int dst_pitch
577 )
578 {
579   DECLARE_ALIGNED(16, unsigned char, FData2[4*9]);
580 
581   if (xoffset)
582   {
583       if (yoffset)
584       {
585           vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
586                                        src_pixels_per_line,
587                                        FData2, 4, 9, xoffset);
588           vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch,
589                                        4, yoffset);
590       }
591       else
592       {
593           vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
594                                        dst_ptr, dst_pitch, 4, xoffset);
595       }
596   }
597   else
598   {
599       if (yoffset)
600       {
601           vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
602                                        src_pixels_per_line,
603                                        dst_ptr, dst_pitch, 4, yoffset);
604       }
605       else
606       {
607         /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
608           * yoffset==0) case correctly. Add copy function here to guarantee
609           * six-tap function handles all possible offsets. */
610           int r;
611 
612           for (r = 0; r < 4; r++)
613           {
614             dst_ptr[0]  = src_ptr[0];
615             dst_ptr[1]  = src_ptr[1];
616             dst_ptr[2]  = src_ptr[2];
617             dst_ptr[3]  = src_ptr[3];
618             dst_ptr     += dst_pitch;
619             src_ptr     += src_pixels_per_line;
620           }
621       }
622   }
623 }
624 
625 #endif
626