1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_config.h"
12 #include "vp8/common/variance.h"
13 #include "vp8/common/pragmas.h"
14 #include "vpx_ports/mem.h"
15 #include "vp8/common/x86/filter_x86.h"
16
17 extern void filter_block1d_h6_mmx
18 (
19 const unsigned char *src_ptr,
20 unsigned short *output_ptr,
21 unsigned int src_pixels_per_line,
22 unsigned int pixel_step,
23 unsigned int output_height,
24 unsigned int output_width,
25 short *filter
26 );
27 extern void filter_block1d_v6_mmx
28 (
29 const short *src_ptr,
30 unsigned char *output_ptr,
31 unsigned int pixels_per_line,
32 unsigned int pixel_step,
33 unsigned int output_height,
34 unsigned int output_width,
35 short *filter
36 );
37
38 extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
39 extern unsigned int vp8_get8x8var_mmx
40 (
41 const unsigned char *src_ptr,
42 int source_stride,
43 const unsigned char *ref_ptr,
44 int recon_stride,
45 unsigned int *SSE,
46 int *Sum
47 );
48 extern unsigned int vp8_get4x4var_mmx
49 (
50 const unsigned char *src_ptr,
51 int source_stride,
52 const unsigned char *ref_ptr,
53 int recon_stride,
54 unsigned int *SSE,
55 int *Sum
56 );
57 extern void vp8_filter_block2d_bil4x4_var_mmx
58 (
59 const unsigned char *ref_ptr,
60 int ref_pixels_per_line,
61 const unsigned char *src_ptr,
62 int src_pixels_per_line,
63 const short *HFilter,
64 const short *VFilter,
65 int *sum,
66 unsigned int *sumsquared
67 );
68 extern void vp8_filter_block2d_bil_var_mmx
69 (
70 const unsigned char *ref_ptr,
71 int ref_pixels_per_line,
72 const unsigned char *src_ptr,
73 int src_pixels_per_line,
74 unsigned int Height,
75 const short *HFilter,
76 const short *VFilter,
77 int *sum,
78 unsigned int *sumsquared
79 );
80
81
vp8_variance4x4_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)82 unsigned int vp8_variance4x4_mmx(
83 const unsigned char *src_ptr,
84 int source_stride,
85 const unsigned char *ref_ptr,
86 int recon_stride,
87 unsigned int *sse)
88 {
89 unsigned int var;
90 int avg;
91
92 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
93 *sse = var;
94 return (var - (((unsigned int)avg * avg) >> 4));
95
96 }
97
vp8_variance8x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)98 unsigned int vp8_variance8x8_mmx(
99 const unsigned char *src_ptr,
100 int source_stride,
101 const unsigned char *ref_ptr,
102 int recon_stride,
103 unsigned int *sse)
104 {
105 unsigned int var;
106 int avg;
107
108 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
109 *sse = var;
110
111 return (var - (((unsigned int)avg * avg) >> 6));
112
113 }
114
vp8_mse16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)115 unsigned int vp8_mse16x16_mmx(
116 const unsigned char *src_ptr,
117 int source_stride,
118 const unsigned char *ref_ptr,
119 int recon_stride,
120 unsigned int *sse)
121 {
122 unsigned int sse0, sse1, sse2, sse3, var;
123 int sum0, sum1, sum2, sum3;
124
125
126 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
127 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
128 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
129 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
130
131 var = sse0 + sse1 + sse2 + sse3;
132 *sse = var;
133 return var;
134 }
135
136
vp8_variance16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)137 unsigned int vp8_variance16x16_mmx(
138 const unsigned char *src_ptr,
139 int source_stride,
140 const unsigned char *ref_ptr,
141 int recon_stride,
142 unsigned int *sse)
143 {
144 unsigned int sse0, sse1, sse2, sse3, var;
145 int sum0, sum1, sum2, sum3, avg;
146
147
148 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
149 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
150 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
151 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
152
153 var = sse0 + sse1 + sse2 + sse3;
154 avg = sum0 + sum1 + sum2 + sum3;
155 *sse = var;
156 return (var - (((unsigned int)avg * avg) >> 8));
157 }
158
vp8_variance16x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)159 unsigned int vp8_variance16x8_mmx(
160 const unsigned char *src_ptr,
161 int source_stride,
162 const unsigned char *ref_ptr,
163 int recon_stride,
164 unsigned int *sse)
165 {
166 unsigned int sse0, sse1, var;
167 int sum0, sum1, avg;
168
169 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
170 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
171
172 var = sse0 + sse1;
173 avg = sum0 + sum1;
174 *sse = var;
175 return (var - (((unsigned int)avg * avg) >> 7));
176
177 }
178
179
vp8_variance8x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)180 unsigned int vp8_variance8x16_mmx(
181 const unsigned char *src_ptr,
182 int source_stride,
183 const unsigned char *ref_ptr,
184 int recon_stride,
185 unsigned int *sse)
186 {
187 unsigned int sse0, sse1, var;
188 int sum0, sum1, avg;
189
190 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
191 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
192
193 var = sse0 + sse1;
194 avg = sum0 + sum1;
195 *sse = var;
196
197 return (var - (((unsigned int)avg * avg) >> 7));
198
199 }
200
201
vp8_sub_pixel_variance4x4_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)202 unsigned int vp8_sub_pixel_variance4x4_mmx
203 (
204 const unsigned char *src_ptr,
205 int src_pixels_per_line,
206 int xoffset,
207 int yoffset,
208 const unsigned char *dst_ptr,
209 int dst_pixels_per_line,
210 unsigned int *sse)
211
212 {
213 int xsum;
214 unsigned int xxsum;
215 vp8_filter_block2d_bil4x4_var_mmx(
216 src_ptr, src_pixels_per_line,
217 dst_ptr, dst_pixels_per_line,
218 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
219 &xsum, &xxsum
220 );
221 *sse = xxsum;
222 return (xxsum - (((unsigned int)xsum * xsum) >> 4));
223 }
224
225
vp8_sub_pixel_variance8x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)226 unsigned int vp8_sub_pixel_variance8x8_mmx
227 (
228 const unsigned char *src_ptr,
229 int src_pixels_per_line,
230 int xoffset,
231 int yoffset,
232 const unsigned char *dst_ptr,
233 int dst_pixels_per_line,
234 unsigned int *sse
235 )
236 {
237
238 int xsum;
239 unsigned int xxsum;
240 vp8_filter_block2d_bil_var_mmx(
241 src_ptr, src_pixels_per_line,
242 dst_ptr, dst_pixels_per_line, 8,
243 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
244 &xsum, &xxsum
245 );
246 *sse = xxsum;
247 return (xxsum - (((unsigned int)xsum * xsum) >> 6));
248 }
249
vp8_sub_pixel_variance16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)250 unsigned int vp8_sub_pixel_variance16x16_mmx
251 (
252 const unsigned char *src_ptr,
253 int src_pixels_per_line,
254 int xoffset,
255 int yoffset,
256 const unsigned char *dst_ptr,
257 int dst_pixels_per_line,
258 unsigned int *sse
259 )
260 {
261
262 int xsum0, xsum1;
263 unsigned int xxsum0, xxsum1;
264
265
266 vp8_filter_block2d_bil_var_mmx(
267 src_ptr, src_pixels_per_line,
268 dst_ptr, dst_pixels_per_line, 16,
269 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
270 &xsum0, &xxsum0
271 );
272
273
274 vp8_filter_block2d_bil_var_mmx(
275 src_ptr + 8, src_pixels_per_line,
276 dst_ptr + 8, dst_pixels_per_line, 16,
277 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
278 &xsum1, &xxsum1
279 );
280
281 xsum0 += xsum1;
282 xxsum0 += xxsum1;
283
284 *sse = xxsum0;
285 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
286
287
288 }
289
vp8_sub_pixel_mse16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)290 unsigned int vp8_sub_pixel_mse16x16_mmx(
291 const unsigned char *src_ptr,
292 int src_pixels_per_line,
293 int xoffset,
294 int yoffset,
295 const unsigned char *dst_ptr,
296 int dst_pixels_per_line,
297 unsigned int *sse
298 )
299 {
300 vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
301 return *sse;
302 }
303
vp8_sub_pixel_variance16x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)304 unsigned int vp8_sub_pixel_variance16x8_mmx
305 (
306 const unsigned char *src_ptr,
307 int src_pixels_per_line,
308 int xoffset,
309 int yoffset,
310 const unsigned char *dst_ptr,
311 int dst_pixels_per_line,
312 unsigned int *sse
313 )
314 {
315 int xsum0, xsum1;
316 unsigned int xxsum0, xxsum1;
317
318
319 vp8_filter_block2d_bil_var_mmx(
320 src_ptr, src_pixels_per_line,
321 dst_ptr, dst_pixels_per_line, 8,
322 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
323 &xsum0, &xxsum0
324 );
325
326
327 vp8_filter_block2d_bil_var_mmx(
328 src_ptr + 8, src_pixels_per_line,
329 dst_ptr + 8, dst_pixels_per_line, 8,
330 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
331 &xsum1, &xxsum1
332 );
333
334 xsum0 += xsum1;
335 xxsum0 += xxsum1;
336
337 *sse = xxsum0;
338 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
339 }
340
vp8_sub_pixel_variance8x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)341 unsigned int vp8_sub_pixel_variance8x16_mmx
342 (
343 const unsigned char *src_ptr,
344 int src_pixels_per_line,
345 int xoffset,
346 int yoffset,
347 const unsigned char *dst_ptr,
348 int dst_pixels_per_line,
349 unsigned int *sse
350 )
351 {
352 int xsum;
353 unsigned int xxsum;
354 vp8_filter_block2d_bil_var_mmx(
355 src_ptr, src_pixels_per_line,
356 dst_ptr, dst_pixels_per_line, 16,
357 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
358 &xsum, &xxsum
359 );
360 *sse = xxsum;
361 return (xxsum - (((unsigned int)xsum * xsum) >> 7));
362 }
363
364
vp8_variance_halfpixvar16x16_h_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)365 unsigned int vp8_variance_halfpixvar16x16_h_mmx(
366 const unsigned char *src_ptr,
367 int source_stride,
368 const unsigned char *ref_ptr,
369 int recon_stride,
370 unsigned int *sse)
371 {
372 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
373 ref_ptr, recon_stride, sse);
374 }
375
376
vp8_variance_halfpixvar16x16_v_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)377 unsigned int vp8_variance_halfpixvar16x16_v_mmx(
378 const unsigned char *src_ptr,
379 int source_stride,
380 const unsigned char *ref_ptr,
381 int recon_stride,
382 unsigned int *sse)
383 {
384 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
385 ref_ptr, recon_stride, sse);
386 }
387
388
vp8_variance_halfpixvar16x16_hv_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)389 unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
390 const unsigned char *src_ptr,
391 int source_stride,
392 const unsigned char *ref_ptr,
393 int recon_stride,
394 unsigned int *sse)
395 {
396 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
397 ref_ptr, recon_stride, sse);
398 }
399