1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp9_rtcd.h"
12 
13 #include "vpx_ports/mem.h"
14 #include "vpx/vpx_integer.h"
15 
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_filter.h"
18 
19 #include "vp9/encoder/vp9_variance.h"
20 
variance(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,int w,int h,unsigned int * sse,int * sum)21 void variance(const uint8_t *src_ptr,
22               int  source_stride,
23               const uint8_t *ref_ptr,
24               int  recon_stride,
25               int  w,
26               int  h,
27               unsigned int *sse,
28               int *sum) {
29   int i, j;
30   int diff;
31 
32   *sum = 0;
33   *sse = 0;
34 
35   for (i = 0; i < h; i++) {
36     for (j = 0; j < w; j++) {
37       diff = src_ptr[j] - ref_ptr[j];
38       *sum += diff;
39       *sse += diff * diff;
40     }
41 
42     src_ptr += source_stride;
43     ref_ptr += recon_stride;
44   }
45 }
46 
47 /****************************************************************************
48  *
49  *  ROUTINE       : filter_block2d_bil_first_pass
50  *
51  *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
52  *                  uint32_t src_pixels_per_line : Stride of input block.
53  *                  uint32_t pixel_step        : Offset between filter input
54  *                                               samples (see notes).
55  *                  uint32_t output_height     : Input block height.
56  *                  uint32_t output_width      : Input block width.
57  *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
58  *                                               taps.
59  *
60  *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
61  *
62  *  RETURNS       : void
63  *
64  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
65  *                  either horizontal or vertical direction to produce the
66  *                  filtered output block. Used to implement first-pass
67  *                  of 2-D separable filter.
68  *
69  *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70  *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
71  *                  pixel_step defines whether the filter is applied
72  *                  horizontally (pixel_step=1) or vertically (pixel_step=
73  *                  stride).
74  *                  It defines the offset required to move from one input
75  *                  to the next.
76  *
77  ****************************************************************************/
var_filter_block2d_bil_first_pass(const uint8_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const int16_t * vp9_filter)78 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
79                                               uint16_t *output_ptr,
80                                               unsigned int src_pixels_per_line,
81                                               int pixel_step,
82                                               unsigned int output_height,
83                                               unsigned int output_width,
84                                               const int16_t *vp9_filter) {
85   unsigned int i, j;
86 
87   for (i = 0; i < output_height; i++) {
88     for (j = 0; j < output_width; j++) {
89       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90                           (int)src_ptr[pixel_step] * vp9_filter[1],
91                           FILTER_BITS);
92 
93       src_ptr++;
94     }
95 
96     // Next row...
97     src_ptr    += src_pixels_per_line - output_width;
98     output_ptr += output_width;
99   }
100 }
101 
102 /****************************************************************************
103  *
104  *  ROUTINE       : filter_block2d_bil_second_pass
105  *
106  *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
107  *                  uint32_t src_pixels_per_line : Stride of input block.
108  *                  uint32_t pixel_step        : Offset between filter input
109  *                                               samples (see notes).
110  *                  uint32_t output_height     : Input block height.
111  *                  uint32_t output_width      : Input block width.
112  *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
113  *                                               taps.
114  *
115  *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
116  *
117  *  RETURNS       : void
118  *
119  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
120  *                  either horizontal or vertical direction to produce the
121  *                  filtered output block. Used to implement second-pass
122  *                  of 2-D separable filter.
123  *
124  *  SPECIAL NOTES : Requires 32-bit input as produced by
125  *                  filter_block2d_bil_first_pass.
126  *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
127  *                  pixel_step defines whether the filter is applied
128  *                  horizontally (pixel_step=1) or vertically (pixel_step=
129  *                  stride).
130  *                  It defines the offset required to move from one input
131  *                  to the next.
132  *
133  ****************************************************************************/
var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint8_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const int16_t * vp9_filter)134 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
135                                                uint8_t *output_ptr,
136                                                unsigned int src_pixels_per_line,
137                                                unsigned int pixel_step,
138                                                unsigned int output_height,
139                                                unsigned int output_width,
140                                                const int16_t *vp9_filter) {
141   unsigned int  i, j;
142 
143   for (i = 0; i < output_height; i++) {
144     for (j = 0; j < output_width; j++) {
145       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146                           (int)src_ptr[pixel_step] * vp9_filter[1],
147                           FILTER_BITS);
148       src_ptr++;
149     }
150 
151     src_ptr += src_pixels_per_line - output_width;
152     output_ptr += output_width;
153   }
154 }
155 
vp9_get_mb_ss_c(const int16_t * src_ptr)156 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157   unsigned int i, sum = 0;
158 
159   for (i = 0; i < 256; i++) {
160     sum += (src_ptr[i] * src_ptr[i]);
161   }
162 
163   return sum;
164 }
165 
vp9_variance64x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)166 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
167                                  int  source_stride,
168                                  const uint8_t *ref_ptr,
169                                  int  recon_stride,
170                                  unsigned int *sse) {
171   unsigned int var;
172   int avg;
173 
174   variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
175   *sse = var;
176   return (var - (((int64_t)avg * avg) >> 11));
177 }
178 
vp9_sub_pixel_variance64x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)179 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180                                            int  src_pixels_per_line,
181                                            int  xoffset,
182                                            int  yoffset,
183                                            const uint8_t *dst_ptr,
184                                            int dst_pixels_per_line,
185                                            unsigned int *sse) {
186   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
187   uint8_t temp2[68 * 64];
188   const int16_t *hfilter, *vfilter;
189 
190   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192 
193   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
194                                     1, 33, 64, hfilter);
195   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
196 
197   return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198 }
199 
vp9_sub_pixel_avg_variance64x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)200 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201                                                int  src_pixels_per_line,
202                                                int  xoffset,
203                                                int  yoffset,
204                                                const uint8_t *dst_ptr,
205                                                int dst_pixels_per_line,
206                                                unsigned int *sse,
207                                                const uint8_t *second_pred) {
208   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
209   uint8_t temp2[68 * 64];
210   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
211   const int16_t *hfilter, *vfilter;
212 
213   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215 
216   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
217                                     1, 33, 64, hfilter);
218   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219   vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220   return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221 }
222 
vp9_variance32x64_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)223 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
224                                  int  source_stride,
225                                  const uint8_t *ref_ptr,
226                                  int  recon_stride,
227                                  unsigned int *sse) {
228   unsigned int var;
229   int avg;
230 
231   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
232   *sse = var;
233   return (var - (((int64_t)avg * avg) >> 11));
234 }
235 
vp9_sub_pixel_variance32x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)236 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237                                            int  src_pixels_per_line,
238                                            int  xoffset,
239                                            int  yoffset,
240                                            const uint8_t *dst_ptr,
241                                            int dst_pixels_per_line,
242                                            unsigned int *sse) {
243   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
244   uint8_t temp2[68 * 64];
245   const int16_t *hfilter, *vfilter;
246 
247   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249 
250   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
251                                     1, 65, 32, hfilter);
252   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253 
254   return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255 }
256 
vp9_sub_pixel_avg_variance32x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)257 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258                                                int  src_pixels_per_line,
259                                                int  xoffset,
260                                                int  yoffset,
261                                                const uint8_t *dst_ptr,
262                                                int dst_pixels_per_line,
263                                                unsigned int *sse,
264                                                const uint8_t *second_pred) {
265   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
266   uint8_t temp2[68 * 64];
267   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
268   const int16_t *hfilter, *vfilter;
269 
270   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272 
273   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
274                                     1, 65, 32, hfilter);
275   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
276   vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277   return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
278 }
279 
vp9_variance32x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)280 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
281                                  int  source_stride,
282                                  const uint8_t *ref_ptr,
283                                  int  recon_stride,
284                                  unsigned int *sse) {
285   unsigned int var;
286   int avg;
287 
288   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
289   *sse = var;
290   return (var - (((int64_t)avg * avg) >> 9));
291 }
292 
vp9_sub_pixel_variance32x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)293 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294                                            int  src_pixels_per_line,
295                                            int  xoffset,
296                                            int  yoffset,
297                                            const uint8_t *dst_ptr,
298                                            int dst_pixels_per_line,
299                                            unsigned int *sse) {
300   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
301   uint8_t temp2[36 * 32];
302   const int16_t *hfilter, *vfilter;
303 
304   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306 
307   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
308                                     1, 17, 32, hfilter);
309   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
310 
311   return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312 }
313 
vp9_sub_pixel_avg_variance32x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)314 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315                                                int  src_pixels_per_line,
316                                                int  xoffset,
317                                                int  yoffset,
318                                                const uint8_t *dst_ptr,
319                                                int dst_pixels_per_line,
320                                                unsigned int *sse,
321                                                const uint8_t *second_pred) {
322   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
323   uint8_t temp2[36 * 32];
324   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
325   const int16_t *hfilter, *vfilter;
326 
327   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329 
330   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
331                                     1, 17, 32, hfilter);
332   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
333   vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334   return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335 }
336 
vp9_variance16x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)337 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
338                                  int  source_stride,
339                                  const uint8_t *ref_ptr,
340                                  int  recon_stride,
341                                  unsigned int *sse) {
342   unsigned int var;
343   int avg;
344 
345   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
346   *sse = var;
347   return (var - (((int64_t)avg * avg) >> 9));
348 }
349 
vp9_sub_pixel_variance16x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)350 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
351                                            int  src_pixels_per_line,
352                                            int  xoffset,
353                                            int  yoffset,
354                                            const uint8_t *dst_ptr,
355                                            int dst_pixels_per_line,
356                                            unsigned int *sse) {
357   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
358   uint8_t temp2[36 * 32];
359   const int16_t *hfilter, *vfilter;
360 
361   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363 
364   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
365                                     1, 33, 16, hfilter);
366   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367 
368   return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369 }
370 
vp9_sub_pixel_avg_variance16x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)371 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372                                                int  src_pixels_per_line,
373                                                int  xoffset,
374                                                int  yoffset,
375                                                const uint8_t *dst_ptr,
376                                                int dst_pixels_per_line,
377                                                unsigned int *sse,
378                                                const uint8_t *second_pred) {
379   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
380   uint8_t temp2[36 * 32];
381   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
382   const int16_t *hfilter, *vfilter;
383 
384   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386 
387   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
388                                     1, 33, 16, hfilter);
389   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390   vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391   return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392 }
393 
vp9_variance64x64_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)394 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
395                                  int  source_stride,
396                                  const uint8_t *ref_ptr,
397                                  int  recon_stride,
398                                  unsigned int *sse) {
399   unsigned int var;
400   int avg;
401 
402   variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
403   *sse = var;
404   return (var - (((int64_t)avg * avg) >> 12));
405 }
406 
vp9_variance32x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)407 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
408                                  int  source_stride,
409                                  const uint8_t *ref_ptr,
410                                  int  recon_stride,
411                                  unsigned int *sse) {
412   unsigned int var;
413   int avg;
414 
415   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
416   *sse = var;
417   return (var - (((int64_t)avg * avg) >> 10));
418 }
419 
vp9_variance16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)420 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
421                                  int  source_stride,
422                                  const uint8_t *ref_ptr,
423                                  int  recon_stride,
424                                  unsigned int *sse) {
425   unsigned int var;
426   int avg;
427 
428   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
429   *sse = var;
430   return (var - (((unsigned int)avg * avg) >> 8));
431 }
432 
vp9_variance8x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)433 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
434                                 int  source_stride,
435                                 const uint8_t *ref_ptr,
436                                 int  recon_stride,
437                                 unsigned int *sse) {
438   unsigned int var;
439   int avg;
440 
441   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
442   *sse = var;
443   return (var - (((unsigned int)avg * avg) >> 7));
444 }
445 
vp9_variance16x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)446 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
447                                 int  source_stride,
448                                 const uint8_t *ref_ptr,
449                                 int  recon_stride,
450                                 unsigned int *sse) {
451   unsigned int var;
452   int avg;
453 
454   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
455   *sse = var;
456   return (var - (((unsigned int)avg * avg) >> 7));
457 }
458 
vp9_get_sse_sum_8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int ref_stride,unsigned int * sse,int * sum)459 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
460                        const uint8_t *ref_ptr, int ref_stride,
461                        unsigned int *sse, int *sum) {
462   variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
463 }
464 
vp9_variance8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)465 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
466                                int  source_stride,
467                                const uint8_t *ref_ptr,
468                                int  recon_stride,
469                                unsigned int *sse) {
470   unsigned int var;
471   int avg;
472 
473   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
474   *sse = var;
475   return (var - (((unsigned int)avg * avg) >> 6));
476 }
477 
vp9_variance8x4_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)478 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
479                                int  source_stride,
480                                const uint8_t *ref_ptr,
481                                int  recon_stride,
482                                unsigned int *sse) {
483   unsigned int var;
484   int avg;
485 
486   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
487   *sse = var;
488   return (var - (((unsigned int)avg * avg) >> 5));
489 }
490 
vp9_variance4x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)491 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
492                                int  source_stride,
493                                const uint8_t *ref_ptr,
494                                int  recon_stride,
495                                unsigned int *sse) {
496   unsigned int var;
497   int avg;
498 
499   variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
500   *sse = var;
501   return (var - (((unsigned int)avg * avg) >> 5));
502 }
503 
vp9_variance4x4_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)504 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
505                                int  source_stride,
506                                const uint8_t *ref_ptr,
507                                int  recon_stride,
508                                unsigned int *sse) {
509   unsigned int var;
510   int avg;
511 
512   variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
513   *sse = var;
514   return (var - (((unsigned int)avg * avg) >> 4));
515 }
516 
517 
vp9_mse16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)518 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
519                             int  source_stride,
520                             const uint8_t *ref_ptr,
521                             int  recon_stride,
522                             unsigned int *sse) {
523   unsigned int var;
524   int avg;
525 
526   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
527   *sse = var;
528   return var;
529 }
530 
vp9_mse16x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)531 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
532                            int  source_stride,
533                            const uint8_t *ref_ptr,
534                            int  recon_stride,
535                            unsigned int *sse) {
536   unsigned int var;
537   int avg;
538 
539   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
540   *sse = var;
541   return var;
542 }
543 
vp9_mse8x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)544 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
545                            int  source_stride,
546                            const uint8_t *ref_ptr,
547                            int  recon_stride,
548                            unsigned int *sse) {
549   unsigned int var;
550   int avg;
551 
552   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
553   *sse = var;
554   return var;
555 }
556 
vp9_mse8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)557 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
558                           int  source_stride,
559                           const uint8_t *ref_ptr,
560                           int  recon_stride,
561                           unsigned int *sse) {
562   unsigned int var;
563   int avg;
564 
565   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
566   *sse = var;
567   return var;
568 }
569 
570 
vp9_sub_pixel_variance4x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)571 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
572                                          int  src_pixels_per_line,
573                                          int  xoffset,
574                                          int  yoffset,
575                                          const uint8_t *dst_ptr,
576                                          int dst_pixels_per_line,
577                                          unsigned int *sse) {
578   uint8_t temp2[20 * 16];
579   const int16_t *hfilter, *vfilter;
580   uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
581 
582   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
583   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
584 
585   // First filter 1d Horizontal
586   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
587                                     1, 5, 4, hfilter);
588 
589   // Now filter Verticaly
590   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
591 
592   return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
593 }
594 
vp9_sub_pixel_avg_variance4x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)595 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
596                                              int  src_pixels_per_line,
597                                              int  xoffset,
598                                              int  yoffset,
599                                              const uint8_t *dst_ptr,
600                                              int dst_pixels_per_line,
601                                              unsigned int *sse,
602                                              const uint8_t *second_pred) {
603   uint8_t temp2[20 * 16];
604   const int16_t *hfilter, *vfilter;
605   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
606   uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
607 
608   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
609   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
610 
611   // First filter 1d Horizontal
612   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
613                                     1, 5, 4, hfilter);
614 
615   // Now filter Verticaly
616   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
617   vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
618   return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
619 }
620 
vp9_sub_pixel_variance8x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)621 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
622                                          int  src_pixels_per_line,
623                                          int  xoffset,
624                                          int  yoffset,
625                                          const uint8_t *dst_ptr,
626                                          int dst_pixels_per_line,
627                                          unsigned int *sse) {
628   uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
629   uint8_t temp2[20 * 16];
630   const int16_t *hfilter, *vfilter;
631 
632   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
633   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
634 
635   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
636                                     1, 9, 8, hfilter);
637   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
638 
639   return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
640 }
641 
vp9_sub_pixel_avg_variance8x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)642 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
643                                              int  src_pixels_per_line,
644                                              int  xoffset,
645                                              int  yoffset,
646                                              const uint8_t *dst_ptr,
647                                              int dst_pixels_per_line,
648                                              unsigned int *sse,
649                                              const uint8_t *second_pred) {
650   uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
651   uint8_t temp2[20 * 16];
652   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
653   const int16_t *hfilter, *vfilter;
654 
655   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
656   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
657 
658   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
659                                     1, 9, 8, hfilter);
660   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
661   vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
662   return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
663 }
664 
vp9_sub_pixel_variance16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)665 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
666                                            int  src_pixels_per_line,
667                                            int  xoffset,
668                                            int  yoffset,
669                                            const uint8_t *dst_ptr,
670                                            int dst_pixels_per_line,
671                                            unsigned int *sse) {
672   uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
673   uint8_t temp2[20 * 16];
674   const int16_t *hfilter, *vfilter;
675 
676   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
677   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
678 
679   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
680                                     1, 17, 16, hfilter);
681   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
682 
683   return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
684 }
685 
vp9_sub_pixel_avg_variance16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)686 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
687                                                int  src_pixels_per_line,
688                                                int  xoffset,
689                                                int  yoffset,
690                                                const uint8_t *dst_ptr,
691                                                int dst_pixels_per_line,
692                                                unsigned int *sse,
693                                                const uint8_t *second_pred) {
694   uint16_t fdata3[17 * 16];
695   uint8_t temp2[20 * 16];
696   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
697   const int16_t *hfilter, *vfilter;
698 
699   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
700   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
701 
702   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
703                                     1, 17, 16, hfilter);
704   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
705 
706   vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
707   return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
708 }
709 
vp9_sub_pixel_variance64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)710 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
711                                            int  src_pixels_per_line,
712                                            int  xoffset,
713                                            int  yoffset,
714                                            const uint8_t *dst_ptr,
715                                            int dst_pixels_per_line,
716                                            unsigned int *sse) {
717   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
718   uint8_t temp2[68 * 64];
719   const int16_t *hfilter, *vfilter;
720 
721   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
722   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
723 
724   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
725                                     1, 65, 64, hfilter);
726   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
727 
728   return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
729 }
730 
vp9_sub_pixel_avg_variance64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)731 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
732                                                int  src_pixels_per_line,
733                                                int  xoffset,
734                                                int  yoffset,
735                                                const uint8_t *dst_ptr,
736                                                int dst_pixels_per_line,
737                                                unsigned int *sse,
738                                                const uint8_t *second_pred) {
739   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
740   uint8_t temp2[68 * 64];
741   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
742   const int16_t *hfilter, *vfilter;
743 
744   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
745   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
746 
747   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
748                                     1, 65, 64, hfilter);
749   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
750   vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
751   return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
752 }
753 
vp9_sub_pixel_variance32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)754 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
755                                            int  src_pixels_per_line,
756                                            int  xoffset,
757                                            int  yoffset,
758                                            const uint8_t *dst_ptr,
759                                            int dst_pixels_per_line,
760                                            unsigned int *sse) {
761   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
762   uint8_t temp2[36 * 32];
763   const int16_t *hfilter, *vfilter;
764 
765   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
766   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
767 
768   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
769                                     1, 33, 32, hfilter);
770   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
771 
772   return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
773 }
774 
vp9_sub_pixel_avg_variance32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)775 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
776                                                int  src_pixels_per_line,
777                                                int  xoffset,
778                                                int  yoffset,
779                                                const uint8_t *dst_ptr,
780                                                int dst_pixels_per_line,
781                                                unsigned int *sse,
782                                                const uint8_t *second_pred) {
783   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
784   uint8_t temp2[36 * 32];
785   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
786   const int16_t *hfilter, *vfilter;
787 
788   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
789   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
790 
791   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
792                                     1, 33, 32, hfilter);
793   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
794   vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
795   return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
796 }
797 
vp9_variance_halfpixvar16x16_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)798 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
799                                               int  source_stride,
800                                               const uint8_t *ref_ptr,
801                                               int  recon_stride,
802                                               unsigned int *sse) {
803   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
804                                        ref_ptr, recon_stride, sse);
805 }
806 
vp9_variance_halfpixvar32x32_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)807 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
808                                               int  source_stride,
809                                               const uint8_t *ref_ptr,
810                                               int  recon_stride,
811                                               unsigned int *sse) {
812   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
813                                        ref_ptr, recon_stride, sse);
814 }
815 
vp9_variance_halfpixvar64x64_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)816 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
817                                               int  source_stride,
818                                               const uint8_t *ref_ptr,
819                                               int  recon_stride,
820                                               unsigned int *sse) {
821   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
822                                        ref_ptr, recon_stride, sse);
823 }
824 
vp9_variance_halfpixvar16x16_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)825 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
826                                               int  source_stride,
827                                               const uint8_t *ref_ptr,
828                                               int  recon_stride,
829                                               unsigned int *sse) {
830   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
831                                        ref_ptr, recon_stride, sse);
832 }
833 
vp9_variance_halfpixvar32x32_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)834 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
835                                               int  source_stride,
836                                               const uint8_t *ref_ptr,
837                                               int  recon_stride,
838                                               unsigned int *sse) {
839   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
840                                        ref_ptr, recon_stride, sse);
841 }
842 
vp9_variance_halfpixvar64x64_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)843 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
844                                               int  source_stride,
845                                               const uint8_t *ref_ptr,
846                                               int  recon_stride,
847                                               unsigned int *sse) {
848   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
849                                        ref_ptr, recon_stride, sse);
850 }
851 
vp9_variance_halfpixvar16x16_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)852 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
853                                                int  source_stride,
854                                                const uint8_t *ref_ptr,
855                                                int  recon_stride,
856                                                unsigned int *sse) {
857   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
858                                        ref_ptr, recon_stride, sse);
859 }
860 
vp9_variance_halfpixvar32x32_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)861 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
862                                                int  source_stride,
863                                                const uint8_t *ref_ptr,
864                                                int  recon_stride,
865                                                unsigned int *sse) {
866   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
867                                        ref_ptr, recon_stride, sse);
868 }
869 
vp9_variance_halfpixvar64x64_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)870 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
871                                                int  source_stride,
872                                                const uint8_t *ref_ptr,
873                                                int  recon_stride,
874                                                unsigned int *sse) {
875   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
876                                        ref_ptr, recon_stride, sse);
877 }
878 
vp9_sub_pixel_mse16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)879 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
880                                       int  src_pixels_per_line,
881                                       int  xoffset,
882                                       int  yoffset,
883                                       const uint8_t *dst_ptr,
884                                       int dst_pixels_per_line,
885                                       unsigned int *sse) {
886   vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
887                                 xoffset, yoffset, dst_ptr,
888                                 dst_pixels_per_line, sse);
889   return *sse;
890 }
891 
vp9_sub_pixel_mse32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)892 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
893                                       int  src_pixels_per_line,
894                                       int  xoffset,
895                                       int  yoffset,
896                                       const uint8_t *dst_ptr,
897                                       int dst_pixels_per_line,
898                                       unsigned int *sse) {
899   vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
900                                 xoffset, yoffset, dst_ptr,
901                                 dst_pixels_per_line, sse);
902   return *sse;
903 }
904 
vp9_sub_pixel_mse64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)905 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
906                                       int  src_pixels_per_line,
907                                       int  xoffset,
908                                       int  yoffset,
909                                       const uint8_t *dst_ptr,
910                                       int dst_pixels_per_line,
911                                       unsigned int *sse) {
912   vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
913                                 xoffset, yoffset, dst_ptr,
914                                 dst_pixels_per_line, sse);
915   return *sse;
916 }
917 
vp9_sub_pixel_variance16x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)918 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
919                                           int  src_pixels_per_line,
920                                           int  xoffset,
921                                           int  yoffset,
922                                           const uint8_t *dst_ptr,
923                                           int dst_pixels_per_line,
924                                           unsigned int *sse) {
925   uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
926   uint8_t temp2[20 * 16];
927   const int16_t *hfilter, *vfilter;
928 
929   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
930   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
931 
932   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
933                                     1, 9, 16, hfilter);
934   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
935 
936   return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
937 }
938 
vp9_sub_pixel_avg_variance16x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)939 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
940                                               int  src_pixels_per_line,
941                                               int  xoffset,
942                                               int  yoffset,
943                                               const uint8_t *dst_ptr,
944                                               int dst_pixels_per_line,
945                                               unsigned int *sse,
946                                               const uint8_t *second_pred) {
947   uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
948   uint8_t temp2[20 * 16];
949   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
950   const int16_t *hfilter, *vfilter;
951 
952   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
953   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
954 
955   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
956                                     1, 9, 16, hfilter);
957   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
958   vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
959   return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
960 }
961 
vp9_sub_pixel_variance8x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)962 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
963                                           int  src_pixels_per_line,
964                                           int  xoffset,
965                                           int  yoffset,
966                                           const uint8_t *dst_ptr,
967                                           int dst_pixels_per_line,
968                                           unsigned int *sse) {
969   uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
970   uint8_t temp2[20 * 16];
971   const int16_t *hfilter, *vfilter;
972 
973   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
974   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
975 
976   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
977                                     1, 17, 8, hfilter);
978   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
979 
980   return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
981 }
982 
vp9_sub_pixel_avg_variance8x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)983 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
984                                               int  src_pixels_per_line,
985                                               int  xoffset,
986                                               int  yoffset,
987                                               const uint8_t *dst_ptr,
988                                               int dst_pixels_per_line,
989                                               unsigned int *sse,
990                                               const uint8_t *second_pred) {
991   uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
992   uint8_t temp2[20 * 16];
993   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
994   const int16_t *hfilter, *vfilter;
995 
996   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
997   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
998 
999   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1000                                     1, 17, 8, hfilter);
1001   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1002   vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1003   return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1004 }
1005 
vp9_sub_pixel_variance8x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)1006 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1007                                          int  src_pixels_per_line,
1008                                          int  xoffset,
1009                                          int  yoffset,
1010                                          const uint8_t *dst_ptr,
1011                                          int dst_pixels_per_line,
1012                                          unsigned int *sse) {
1013   uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1014   uint8_t temp2[20 * 16];
1015   const int16_t *hfilter, *vfilter;
1016 
1017   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1018   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1019 
1020   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1021                                     1, 5, 8, hfilter);
1022   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1023 
1024   return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1025 }
1026 
vp9_sub_pixel_avg_variance8x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)1027 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1028                                              int  src_pixels_per_line,
1029                                              int  xoffset,
1030                                              int  yoffset,
1031                                              const uint8_t *dst_ptr,
1032                                              int dst_pixels_per_line,
1033                                              unsigned int *sse,
1034                                              const uint8_t *second_pred) {
1035   uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1036   uint8_t temp2[20 * 16];
1037   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
1038   const int16_t *hfilter, *vfilter;
1039 
1040   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1041   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1042 
1043   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1044                                     1, 5, 8, hfilter);
1045   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1046   vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1047   return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1048 }
1049 
vp9_sub_pixel_variance4x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)1050 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1051                                          int  src_pixels_per_line,
1052                                          int  xoffset,
1053                                          int  yoffset,
1054                                          const uint8_t *dst_ptr,
1055                                          int dst_pixels_per_line,
1056                                          unsigned int *sse) {
1057   uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1058   // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1059   // of this big? same issue appears in all other block size settings.
1060   uint8_t temp2[20 * 16];
1061   const int16_t *hfilter, *vfilter;
1062 
1063   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1064   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1065 
1066   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1067                                     1, 9, 4, hfilter);
1068   var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1069 
1070   return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1071 }
1072 
vp9_sub_pixel_avg_variance4x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)1073 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1074                                              int  src_pixels_per_line,
1075                                              int  xoffset,
1076                                              int  yoffset,
1077                                              const uint8_t *dst_ptr,
1078                                              int dst_pixels_per_line,
1079                                              unsigned int *sse,
1080                                              const uint8_t *second_pred) {
1081   uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1082   uint8_t temp2[20 * 16];
1083   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
1084   const int16_t *hfilter, *vfilter;
1085 
1086   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1087   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1088 
1089   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1090                                     1, 9, 4, hfilter);
1091   var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1092   vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1093   return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1094 }
1095 
1096 
vp9_comp_avg_pred(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)1097 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
1098                        int height, const uint8_t *ref, int ref_stride) {
1099   int i, j;
1100 
1101   for (i = 0; i < height; i++) {
1102     for (j = 0; j < width; j++) {
1103       int tmp;
1104       tmp = pred[j] + ref[j];
1105       comp_pred[j] = (tmp + 1) >> 1;
1106     }
1107     comp_pred += width;
1108     pred += width;
1109     ref += ref_stride;
1110   }
1111 }
1112