1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "vpx_config.h"
12 #include "vp8/common/variance.h"
13 #include "vp8/common/pragmas.h"
14 #include "vpx_ports/mem.h"
15 
16 extern unsigned int vp8_get16x16var_sse2
17 (
18     const unsigned char *src_ptr,
19     int source_stride,
20     const unsigned char *ref_ptr,
21     int recon_stride,
22     unsigned int *SSE,
23     int *Sum
24 );
25 extern void vp8_half_horiz_vert_variance16x_h_sse2
26 (
27     const unsigned char *ref_ptr,
28     int ref_pixels_per_line,
29     const unsigned char *src_ptr,
30     int src_pixels_per_line,
31     unsigned int Height,
32     int *sum,
33     unsigned int *sumsquared
34 );
35 extern void vp8_half_horiz_variance16x_h_sse2
36 (
37     const unsigned char *ref_ptr,
38     int ref_pixels_per_line,
39     const unsigned char *src_ptr,
40     int src_pixels_per_line,
41     unsigned int Height,
42     int *sum,
43     unsigned int *sumsquared
44 );
45 extern void vp8_half_vert_variance16x_h_sse2
46 (
47     const unsigned char *ref_ptr,
48     int ref_pixels_per_line,
49     const unsigned char *src_ptr,
50     int src_pixels_per_line,
51     unsigned int Height,
52     int *sum,
53     unsigned int *sumsquared
54 );
55 extern void vp8_filter_block2d_bil_var_ssse3
56 (
57     const unsigned char *ref_ptr,
58     int ref_pixels_per_line,
59     const unsigned char *src_ptr,
60     int src_pixels_per_line,
61     unsigned int Height,
62     int  xoffset,
63     int  yoffset,
64     int *sum,
65     unsigned int *sumsquared
66 );
67 
vp8_sub_pixel_variance16x16_ssse3(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)68 unsigned int vp8_sub_pixel_variance16x16_ssse3
69 (
70     const unsigned char  *src_ptr,
71     int  src_pixels_per_line,
72     int  xoffset,
73     int  yoffset,
74     const unsigned char *dst_ptr,
75     int dst_pixels_per_line,
76     unsigned int *sse
77 )
78 {
79     int xsum0;
80     unsigned int xxsum0;
81 
82     /* note we could avoid these if statements if the calling function
83      * just called the appropriate functions inside.
84      */
85     if (xoffset == 4 && yoffset == 0)
86     {
87         vp8_half_horiz_variance16x_h_sse2(
88             src_ptr, src_pixels_per_line,
89             dst_ptr, dst_pixels_per_line, 16,
90             &xsum0, &xxsum0);
91     }
92     else if (xoffset == 0 && yoffset == 4)
93     {
94         vp8_half_vert_variance16x_h_sse2(
95             src_ptr, src_pixels_per_line,
96             dst_ptr, dst_pixels_per_line, 16,
97             &xsum0, &xxsum0);
98     }
99     else if (xoffset == 4 && yoffset == 4)
100     {
101         vp8_half_horiz_vert_variance16x_h_sse2(
102             src_ptr, src_pixels_per_line,
103             dst_ptr, dst_pixels_per_line, 16,
104             &xsum0, &xxsum0);
105     }
106     else
107     {
108         vp8_filter_block2d_bil_var_ssse3(
109             src_ptr, src_pixels_per_line,
110             dst_ptr, dst_pixels_per_line, 16,
111             xoffset, yoffset,
112             &xsum0, &xxsum0);
113     }
114 
115     *sse = xxsum0;
116     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
117 }
118 
vp8_sub_pixel_variance16x8_ssse3(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)119 unsigned int vp8_sub_pixel_variance16x8_ssse3
120 (
121     const unsigned char  *src_ptr,
122     int  src_pixels_per_line,
123     int  xoffset,
124     int  yoffset,
125     const unsigned char *dst_ptr,
126     int dst_pixels_per_line,
127     unsigned int *sse
128 
129 )
130 {
131     int xsum0;
132     unsigned int xxsum0;
133 
134     if (xoffset == 4 && yoffset == 0)
135     {
136         vp8_half_horiz_variance16x_h_sse2(
137             src_ptr, src_pixels_per_line,
138             dst_ptr, dst_pixels_per_line, 8,
139             &xsum0, &xxsum0);
140     }
141     else if (xoffset == 0 && yoffset == 4)
142     {
143         vp8_half_vert_variance16x_h_sse2(
144             src_ptr, src_pixels_per_line,
145             dst_ptr, dst_pixels_per_line, 8,
146             &xsum0, &xxsum0);
147     }
148     else if (xoffset == 4 && yoffset == 4)
149     {
150         vp8_half_horiz_vert_variance16x_h_sse2(
151             src_ptr, src_pixels_per_line,
152             dst_ptr, dst_pixels_per_line, 8,
153             &xsum0, &xxsum0);
154     }
155     else
156     {
157         vp8_filter_block2d_bil_var_ssse3(
158             src_ptr, src_pixels_per_line,
159             dst_ptr, dst_pixels_per_line, 8,
160             xoffset, yoffset,
161             &xsum0, &xxsum0);
162     }
163 
164     *sse = xxsum0;
165     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
166 }
167