1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_config.h"
12 #include "vp8/common/variance.h"
13 #include "vp8/common/pragmas.h"
14 #include "vpx_ports/mem.h"
15
16 extern unsigned int vp8_get16x16var_sse2
17 (
18 const unsigned char *src_ptr,
19 int source_stride,
20 const unsigned char *ref_ptr,
21 int recon_stride,
22 unsigned int *SSE,
23 int *Sum
24 );
25 extern void vp8_half_horiz_vert_variance16x_h_sse2
26 (
27 const unsigned char *ref_ptr,
28 int ref_pixels_per_line,
29 const unsigned char *src_ptr,
30 int src_pixels_per_line,
31 unsigned int Height,
32 int *sum,
33 unsigned int *sumsquared
34 );
35 extern void vp8_half_horiz_variance16x_h_sse2
36 (
37 const unsigned char *ref_ptr,
38 int ref_pixels_per_line,
39 const unsigned char *src_ptr,
40 int src_pixels_per_line,
41 unsigned int Height,
42 int *sum,
43 unsigned int *sumsquared
44 );
45 extern void vp8_half_vert_variance16x_h_sse2
46 (
47 const unsigned char *ref_ptr,
48 int ref_pixels_per_line,
49 const unsigned char *src_ptr,
50 int src_pixels_per_line,
51 unsigned int Height,
52 int *sum,
53 unsigned int *sumsquared
54 );
55 extern void vp8_filter_block2d_bil_var_ssse3
56 (
57 const unsigned char *ref_ptr,
58 int ref_pixels_per_line,
59 const unsigned char *src_ptr,
60 int src_pixels_per_line,
61 unsigned int Height,
62 int xoffset,
63 int yoffset,
64 int *sum,
65 unsigned int *sumsquared
66 );
67
vp8_sub_pixel_variance16x16_ssse3(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)68 unsigned int vp8_sub_pixel_variance16x16_ssse3
69 (
70 const unsigned char *src_ptr,
71 int src_pixels_per_line,
72 int xoffset,
73 int yoffset,
74 const unsigned char *dst_ptr,
75 int dst_pixels_per_line,
76 unsigned int *sse
77 )
78 {
79 int xsum0;
80 unsigned int xxsum0;
81
82 /* note we could avoid these if statements if the calling function
83 * just called the appropriate functions inside.
84 */
85 if (xoffset == 4 && yoffset == 0)
86 {
87 vp8_half_horiz_variance16x_h_sse2(
88 src_ptr, src_pixels_per_line,
89 dst_ptr, dst_pixels_per_line, 16,
90 &xsum0, &xxsum0);
91 }
92 else if (xoffset == 0 && yoffset == 4)
93 {
94 vp8_half_vert_variance16x_h_sse2(
95 src_ptr, src_pixels_per_line,
96 dst_ptr, dst_pixels_per_line, 16,
97 &xsum0, &xxsum0);
98 }
99 else if (xoffset == 4 && yoffset == 4)
100 {
101 vp8_half_horiz_vert_variance16x_h_sse2(
102 src_ptr, src_pixels_per_line,
103 dst_ptr, dst_pixels_per_line, 16,
104 &xsum0, &xxsum0);
105 }
106 else
107 {
108 vp8_filter_block2d_bil_var_ssse3(
109 src_ptr, src_pixels_per_line,
110 dst_ptr, dst_pixels_per_line, 16,
111 xoffset, yoffset,
112 &xsum0, &xxsum0);
113 }
114
115 *sse = xxsum0;
116 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
117 }
118
vp8_sub_pixel_variance16x8_ssse3(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)119 unsigned int vp8_sub_pixel_variance16x8_ssse3
120 (
121 const unsigned char *src_ptr,
122 int src_pixels_per_line,
123 int xoffset,
124 int yoffset,
125 const unsigned char *dst_ptr,
126 int dst_pixels_per_line,
127 unsigned int *sse
128
129 )
130 {
131 int xsum0;
132 unsigned int xxsum0;
133
134 if (xoffset == 4 && yoffset == 0)
135 {
136 vp8_half_horiz_variance16x_h_sse2(
137 src_ptr, src_pixels_per_line,
138 dst_ptr, dst_pixels_per_line, 8,
139 &xsum0, &xxsum0);
140 }
141 else if (xoffset == 0 && yoffset == 4)
142 {
143 vp8_half_vert_variance16x_h_sse2(
144 src_ptr, src_pixels_per_line,
145 dst_ptr, dst_pixels_per_line, 8,
146 &xsum0, &xxsum0);
147 }
148 else if (xoffset == 4 && yoffset == 4)
149 {
150 vp8_half_horiz_vert_variance16x_h_sse2(
151 src_ptr, src_pixels_per_line,
152 dst_ptr, dst_pixels_per_line, 8,
153 &xsum0, &xxsum0);
154 }
155 else
156 {
157 vp8_filter_block2d_bil_var_ssse3(
158 src_ptr, src_pixels_per_line,
159 dst_ptr, dst_pixels_per_line, 8,
160 xoffset, yoffset,
161 &xsum0, &xxsum0);
162 }
163
164 *sse = xxsum0;
165 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
166 }
167