1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "vpx_config.h"
13 #include "vp8_rtcd.h"
14 #include "vpx_ports/mem.h"
15 #include "filter_x86.h"
16
17 extern const short vp8_six_tap_mmx[8][6*8];
18
19 extern void vp8_filter_block1d_h6_mmx
20 (
21 unsigned char *src_ptr,
22 unsigned short *output_ptr,
23 unsigned int src_pixels_per_line,
24 unsigned int pixel_step,
25 unsigned int output_height,
26 unsigned int output_width,
27 const short *vp8_filter
28 );
29 extern void vp8_filter_block1dc_v6_mmx
30 (
31 unsigned short *src_ptr,
32 unsigned char *output_ptr,
33 int output_pitch,
34 unsigned int pixels_per_line,
35 unsigned int pixel_step,
36 unsigned int output_height,
37 unsigned int output_width,
38 const short *vp8_filter
39 );
40 extern void vp8_filter_block1d8_h6_sse2
41 (
42 unsigned char *src_ptr,
43 unsigned short *output_ptr,
44 unsigned int src_pixels_per_line,
45 unsigned int pixel_step,
46 unsigned int output_height,
47 unsigned int output_width,
48 const short *vp8_filter
49 );
50 extern void vp8_filter_block1d16_h6_sse2
51 (
52 unsigned char *src_ptr,
53 unsigned short *output_ptr,
54 unsigned int src_pixels_per_line,
55 unsigned int pixel_step,
56 unsigned int output_height,
57 unsigned int output_width,
58 const short *vp8_filter
59 );
60 extern void vp8_filter_block1d8_v6_sse2
61 (
62 unsigned short *src_ptr,
63 unsigned char *output_ptr,
64 int dst_ptich,
65 unsigned int pixels_per_line,
66 unsigned int pixel_step,
67 unsigned int output_height,
68 unsigned int output_width,
69 const short *vp8_filter
70 );
71 extern void vp8_filter_block1d16_v6_sse2
72 (
73 unsigned short *src_ptr,
74 unsigned char *output_ptr,
75 int dst_ptich,
76 unsigned int pixels_per_line,
77 unsigned int pixel_step,
78 unsigned int output_height,
79 unsigned int output_width,
80 const short *vp8_filter
81 );
82 extern void vp8_unpack_block1d16_h6_sse2
83 (
84 unsigned char *src_ptr,
85 unsigned short *output_ptr,
86 unsigned int src_pixels_per_line,
87 unsigned int output_height,
88 unsigned int output_width
89 );
90 extern void vp8_filter_block1d8_h6_only_sse2
91 (
92 unsigned char *src_ptr,
93 unsigned int src_pixels_per_line,
94 unsigned char *output_ptr,
95 int dst_ptich,
96 unsigned int output_height,
97 const short *vp8_filter
98 );
99 extern void vp8_filter_block1d16_h6_only_sse2
100 (
101 unsigned char *src_ptr,
102 unsigned int src_pixels_per_line,
103 unsigned char *output_ptr,
104 int dst_ptich,
105 unsigned int output_height,
106 const short *vp8_filter
107 );
108 extern void vp8_filter_block1d8_v6_only_sse2
109 (
110 unsigned char *src_ptr,
111 unsigned int src_pixels_per_line,
112 unsigned char *output_ptr,
113 int dst_ptich,
114 unsigned int output_height,
115 const short *vp8_filter
116 );
117
118
119 #if HAVE_MMX
vp8_sixtap_predict4x4_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)120 void vp8_sixtap_predict4x4_mmx
121 (
122 unsigned char *src_ptr,
123 int src_pixels_per_line,
124 int xoffset,
125 int yoffset,
126 unsigned char *dst_ptr,
127 int dst_pitch
128 )
129 {
130 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
131 const short *HFilter, *VFilter;
132 HFilter = vp8_six_tap_mmx[xoffset];
133 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
134 VFilter = vp8_six_tap_mmx[yoffset];
135 vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter);
136
137 }
138
139
vp8_sixtap_predict16x16_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)140 void vp8_sixtap_predict16x16_mmx
141 (
142 unsigned char *src_ptr,
143 int src_pixels_per_line,
144 int xoffset,
145 int yoffset,
146 unsigned char *dst_ptr,
147 int dst_pitch
148 )
149 {
150
151 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
152
153 const short *HFilter, *VFilter;
154
155
156 HFilter = vp8_six_tap_mmx[xoffset];
157
158 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
159 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter);
160 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8, FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter);
161 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter);
162
163 VFilter = vp8_six_tap_mmx[yoffset];
164 vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter);
165 vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter);
166 vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
167 vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter);
168
169 }
170
171
vp8_sixtap_predict8x8_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)172 void vp8_sixtap_predict8x8_mmx
173 (
174 unsigned char *src_ptr,
175 int src_pixels_per_line,
176 int xoffset,
177 int yoffset,
178 unsigned char *dst_ptr,
179 int dst_pitch
180 )
181 {
182
183 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
184
185 const short *HFilter, *VFilter;
186
187 HFilter = vp8_six_tap_mmx[xoffset];
188 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
189 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter);
190
191 VFilter = vp8_six_tap_mmx[yoffset];
192 vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, 8, VFilter);
193 vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter);
194
195 }
196
197
vp8_sixtap_predict8x4_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)198 void vp8_sixtap_predict8x4_mmx
199 (
200 unsigned char *src_ptr,
201 int src_pixels_per_line,
202 int xoffset,
203 int yoffset,
204 unsigned char *dst_ptr,
205 int dst_pitch
206 )
207 {
208
209 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
210
211 const short *HFilter, *VFilter;
212
213 HFilter = vp8_six_tap_mmx[xoffset];
214 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
215 vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter);
216
217 VFilter = vp8_six_tap_mmx[yoffset];
218 vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, 8, VFilter);
219 vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter);
220
221 }
222
223
224
vp8_bilinear_predict16x16_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)225 void vp8_bilinear_predict16x16_mmx
226 (
227 unsigned char *src_ptr,
228 int src_pixels_per_line,
229 int xoffset,
230 int yoffset,
231 unsigned char *dst_ptr,
232 int dst_pitch
233 )
234 {
235 vp8_bilinear_predict8x8_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pitch);
236 vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch);
237 vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8, dst_pitch);
238 vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch);
239 }
240 #endif
241
242
243 #if HAVE_SSE2
vp8_sixtap_predict16x16_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)244 void vp8_sixtap_predict16x16_sse2
245 (
246 unsigned char *src_ptr,
247 int src_pixels_per_line,
248 int xoffset,
249 int yoffset,
250 unsigned char *dst_ptr,
251 int dst_pitch
252
253 )
254 {
255 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
256
257 const short *HFilter, *VFilter;
258
259 if (xoffset)
260 {
261 if (yoffset)
262 {
263 HFilter = vp8_six_tap_mmx[xoffset];
264 vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
265 VFilter = vp8_six_tap_mmx[yoffset];
266 vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
267 }
268 else
269 {
270 /* First-pass only */
271 HFilter = vp8_six_tap_mmx[xoffset];
272 vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
273 }
274 }
275 else
276 {
277 /* Second-pass only */
278 VFilter = vp8_six_tap_mmx[yoffset];
279 vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
280 vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
281 }
282 }
283
284
vp8_sixtap_predict8x8_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)285 void vp8_sixtap_predict8x8_sse2
286 (
287 unsigned char *src_ptr,
288 int src_pixels_per_line,
289 int xoffset,
290 int yoffset,
291 unsigned char *dst_ptr,
292 int dst_pitch
293 )
294 {
295 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
296 const short *HFilter, *VFilter;
297
298 if (xoffset)
299 {
300 if (yoffset)
301 {
302 HFilter = vp8_six_tap_mmx[xoffset];
303 vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
304 VFilter = vp8_six_tap_mmx[yoffset];
305 vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
306 }
307 else
308 {
309 /* First-pass only */
310 HFilter = vp8_six_tap_mmx[xoffset];
311 vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
312 }
313 }
314 else
315 {
316 /* Second-pass only */
317 VFilter = vp8_six_tap_mmx[yoffset];
318 vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
319 }
320 }
321
322
vp8_sixtap_predict8x4_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)323 void vp8_sixtap_predict8x4_sse2
324 (
325 unsigned char *src_ptr,
326 int src_pixels_per_line,
327 int xoffset,
328 int yoffset,
329 unsigned char *dst_ptr,
330 int dst_pitch
331 )
332 {
333 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
334 const short *HFilter, *VFilter;
335
336 if (xoffset)
337 {
338 if (yoffset)
339 {
340 HFilter = vp8_six_tap_mmx[xoffset];
341 vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
342 VFilter = vp8_six_tap_mmx[yoffset];
343 vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
344 }
345 else
346 {
347 /* First-pass only */
348 HFilter = vp8_six_tap_mmx[xoffset];
349 vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
350 }
351 }
352 else
353 {
354 /* Second-pass only */
355 VFilter = vp8_six_tap_mmx[yoffset];
356 vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
357 }
358 }
359
360 #endif
361
362 #if HAVE_SSSE3
363
364 extern void vp8_filter_block1d8_h6_ssse3
365 (
366 unsigned char *src_ptr,
367 unsigned int src_pixels_per_line,
368 unsigned char *output_ptr,
369 unsigned int output_pitch,
370 unsigned int output_height,
371 unsigned int vp8_filter_index
372 );
373
374 extern void vp8_filter_block1d16_h6_ssse3
375 (
376 unsigned char *src_ptr,
377 unsigned int src_pixels_per_line,
378 unsigned char *output_ptr,
379 unsigned int output_pitch,
380 unsigned int output_height,
381 unsigned int vp8_filter_index
382 );
383
384 extern void vp8_filter_block1d16_v6_ssse3
385 (
386 unsigned char *src_ptr,
387 unsigned int src_pitch,
388 unsigned char *output_ptr,
389 unsigned int out_pitch,
390 unsigned int output_height,
391 unsigned int vp8_filter_index
392 );
393
394 extern void vp8_filter_block1d8_v6_ssse3
395 (
396 unsigned char *src_ptr,
397 unsigned int src_pitch,
398 unsigned char *output_ptr,
399 unsigned int out_pitch,
400 unsigned int output_height,
401 unsigned int vp8_filter_index
402 );
403
404 extern void vp8_filter_block1d4_h6_ssse3
405 (
406 unsigned char *src_ptr,
407 unsigned int src_pixels_per_line,
408 unsigned char *output_ptr,
409 unsigned int output_pitch,
410 unsigned int output_height,
411 unsigned int vp8_filter_index
412 );
413
414 extern void vp8_filter_block1d4_v6_ssse3
415 (
416 unsigned char *src_ptr,
417 unsigned int src_pitch,
418 unsigned char *output_ptr,
419 unsigned int out_pitch,
420 unsigned int output_height,
421 unsigned int vp8_filter_index
422 );
423
vp8_sixtap_predict16x16_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)424 void vp8_sixtap_predict16x16_ssse3
425 (
426 unsigned char *src_ptr,
427 int src_pixels_per_line,
428 int xoffset,
429 int yoffset,
430 unsigned char *dst_ptr,
431 int dst_pitch
432
433 )
434 {
435 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
436
437 if (xoffset)
438 {
439 if (yoffset)
440 {
441 vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
442 src_pixels_per_line, FData2,
443 16, 21, xoffset);
444 vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch,
445 16, yoffset);
446 }
447 else
448 {
449 /* First-pass only */
450 vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
451 dst_ptr, dst_pitch, 16, xoffset);
452 }
453 }
454 else
455 {
456 if (yoffset)
457 {
458 /* Second-pass only */
459 vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
460 src_pixels_per_line,
461 dst_ptr, dst_pitch, 16, yoffset);
462 }
463 else
464 {
465 /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
466 * yoffset==0) case correctly. Add copy function here to guarantee
467 * six-tap function handles all possible offsets. */
468 vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
469 }
470 }
471 }
472
vp8_sixtap_predict8x8_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)473 void vp8_sixtap_predict8x8_ssse3
474 (
475 unsigned char *src_ptr,
476 int src_pixels_per_line,
477 int xoffset,
478 int yoffset,
479 unsigned char *dst_ptr,
480 int dst_pitch
481 )
482 {
483 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
484
485 if (xoffset)
486 {
487 if (yoffset)
488 {
489 vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
490 src_pixels_per_line, FData2,
491 8, 13, xoffset);
492 vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
493 8, yoffset);
494 }
495 else
496 {
497 vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
498 dst_ptr, dst_pitch, 8, xoffset);
499 }
500 }
501 else
502 {
503 if (yoffset)
504 {
505 /* Second-pass only */
506 vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
507 src_pixels_per_line,
508 dst_ptr, dst_pitch, 8, yoffset);
509 }
510 else
511 {
512 /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
513 * yoffset==0) case correctly. Add copy function here to guarantee
514 * six-tap function handles all possible offsets. */
515 vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
516 }
517 }
518 }
519
520
vp8_sixtap_predict8x4_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)521 void vp8_sixtap_predict8x4_ssse3
522 (
523 unsigned char *src_ptr,
524 int src_pixels_per_line,
525 int xoffset,
526 int yoffset,
527 unsigned char *dst_ptr,
528 int dst_pitch
529 )
530 {
531 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
532
533 if (xoffset)
534 {
535 if (yoffset)
536 {
537 vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
538 src_pixels_per_line, FData2,
539 8, 9, xoffset);
540 vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
541 4, yoffset);
542 }
543 else
544 {
545 /* First-pass only */
546 vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
547 dst_ptr, dst_pitch, 4, xoffset);
548 }
549 }
550 else
551 {
552 if (yoffset)
553 {
554 /* Second-pass only */
555 vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
556 src_pixels_per_line,
557 dst_ptr, dst_pitch, 4, yoffset);
558 }
559 else
560 {
561 /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
562 * yoffset==0) case correctly. Add copy function here to guarantee
563 * six-tap function handles all possible offsets. */
564 vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
565 }
566 }
567 }
568
vp8_sixtap_predict4x4_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)569 void vp8_sixtap_predict4x4_ssse3
570 (
571 unsigned char *src_ptr,
572 int src_pixels_per_line,
573 int xoffset,
574 int yoffset,
575 unsigned char *dst_ptr,
576 int dst_pitch
577 )
578 {
579 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
580
581 if (xoffset)
582 {
583 if (yoffset)
584 {
585 vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
586 src_pixels_per_line,
587 FData2, 4, 9, xoffset);
588 vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch,
589 4, yoffset);
590 }
591 else
592 {
593 vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
594 dst_ptr, dst_pitch, 4, xoffset);
595 }
596 }
597 else
598 {
599 if (yoffset)
600 {
601 vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
602 src_pixels_per_line,
603 dst_ptr, dst_pitch, 4, yoffset);
604 }
605 else
606 {
607 /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
608 * yoffset==0) case correctly. Add copy function here to guarantee
609 * six-tap function handles all possible offsets. */
610 int r;
611
612 for (r = 0; r < 4; r++)
613 {
614 dst_ptr[0] = src_ptr[0];
615 dst_ptr[1] = src_ptr[1];
616 dst_ptr[2] = src_ptr[2];
617 dst_ptr[3] = src_ptr[3];
618 dst_ptr += dst_pitch;
619 src_ptr += src_pixels_per_line;
620 }
621 }
622 }
623 }
624
625 #endif
626