1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_config.h"
12 #include "./vpx_dsp_rtcd.h"
13 
14 #include "vpx_ports/mem.h"
15 #include "vpx/vpx_integer.h"
16 
17 #include "vpx_dsp/variance.h"
18 
19 static const uint8_t bilinear_filters[8][2] = {
20   { 128,   0  },
21   { 112,  16  },
22   {  96,  32  },
23   {  80,  48  },
24   {  64,  64  },
25   {  48,  80  },
26   {  32,  96  },
27   {  16, 112  },
28 };
29 
vpx_get4x4sse_cs_c(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride)30 uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int  a_stride,
31                             const uint8_t *b, int  b_stride) {
32   int distortion = 0;
33   int r, c;
34 
35   for (r = 0; r < 4; ++r) {
36     for (c = 0; c < 4; ++c) {
37       int diff = a[c] - b[c];
38       distortion += diff * diff;
39     }
40 
41     a += a_stride;
42     b += b_stride;
43   }
44 
45   return distortion;
46 }
47 
vpx_get_mb_ss_c(const int16_t * a)48 uint32_t vpx_get_mb_ss_c(const int16_t *a) {
49   unsigned int i, sum = 0;
50 
51   for (i = 0; i < 256; ++i) {
52     sum += a[i] * a[i];
53   }
54 
55   return sum;
56 }
57 
vpx_variance_halfpixvar16x16_h_c(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,uint32_t * sse)58 uint32_t vpx_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
59                                           const uint8_t *b, int b_stride,
60                                           uint32_t *sse) {
61   return vpx_sub_pixel_variance16x16_c(a, a_stride, 4, 0,
62                                        b, b_stride, sse);
63 }
64 
65 
vpx_variance_halfpixvar16x16_v_c(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,uint32_t * sse)66 uint32_t vpx_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
67                                           const uint8_t *b, int b_stride,
68                                           uint32_t *sse) {
69   return vpx_sub_pixel_variance16x16_c(a, a_stride, 0, 4,
70                                        b, b_stride, sse);
71 }
72 
vpx_variance_halfpixvar16x16_hv_c(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,uint32_t * sse)73 uint32_t vpx_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
74                                            const uint8_t *b, int b_stride,
75                                            uint32_t *sse) {
76   return vpx_sub_pixel_variance16x16_c(a, a_stride, 4, 4,
77                                        b, b_stride, sse);
78 }
79 
variance(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,int w,int h,uint32_t * sse,int * sum)80 static void variance(const uint8_t *a, int  a_stride,
81                      const uint8_t *b, int  b_stride,
82                      int  w, int  h, uint32_t *sse, int *sum) {
83   int i, j;
84 
85   *sum = 0;
86   *sse = 0;
87 
88   for (i = 0; i < h; ++i) {
89     for (j = 0; j < w; ++j) {
90       const int diff = a[j] - b[j];
91       *sum += diff;
92       *sse += diff * diff;
93     }
94 
95     a += a_stride;
96     b += b_stride;
97   }
98 }
99 
100 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
101 // or vertical direction to produce the filtered output block. Used to implement
102 // the first-pass of 2-D separable filter.
103 //
104 // Produces int16_t output to retain precision for the next pass. Two filter
105 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
106 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
107 // It defines the offset required to move from one input to the next.
var_filter_block2d_bil_first_pass(const uint8_t * a,uint16_t * b,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)108 static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
109                                               unsigned int src_pixels_per_line,
110                                               int pixel_step,
111                                               unsigned int output_height,
112                                               unsigned int output_width,
113                                               const uint8_t *filter) {
114   unsigned int i, j;
115 
116   for (i = 0; i < output_height; ++i) {
117     for (j = 0; j < output_width; ++j) {
118       b[j] = ROUND_POWER_OF_TWO((int)a[0] * filter[0] +
119                           (int)a[pixel_step] * filter[1],
120                           FILTER_BITS);
121 
122       ++a;
123     }
124 
125     a += src_pixels_per_line - output_width;
126     b += output_width;
127   }
128 }
129 
130 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
131 // or vertical direction to produce the filtered output block. Used to implement
132 // the second-pass of 2-D separable filter.
133 //
134 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
135 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
136 // filter is applied horizontally (pixel_step = 1) or vertically
137 // (pixel_step = stride). It defines the offset required to move from one input
138 // to the next. Output is 8-bit.
var_filter_block2d_bil_second_pass(const uint16_t * a,uint8_t * b,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)139 static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
140                                                unsigned int src_pixels_per_line,
141                                                unsigned int pixel_step,
142                                                unsigned int output_height,
143                                                unsigned int output_width,
144                                                const uint8_t *filter) {
145   unsigned int  i, j;
146 
147   for (i = 0; i < output_height; ++i) {
148     for (j = 0; j < output_width; ++j) {
149       b[j] = ROUND_POWER_OF_TWO((int)a[0] * filter[0] +
150                           (int)a[pixel_step] * filter[1],
151                           FILTER_BITS);
152       ++a;
153     }
154 
155     a += src_pixels_per_line - output_width;
156     b += output_width;
157   }
158 }
159 
160 #define VAR(W, H) \
161 uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
162                                    const uint8_t *b, int b_stride, \
163                                    uint32_t *sse) { \
164   int sum; \
165   variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
166   return *sse - (((int64_t)sum * sum) / (W * H)); \
167 }
168 
169 #define SUBPIX_VAR(W, H) \
170 uint32_t vpx_sub_pixel_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
171                                              int xoffset, int  yoffset, \
172                                              const uint8_t *b, int b_stride, \
173                                              uint32_t *sse) { \
174   uint16_t fdata3[(H + 1) * W]; \
175   uint8_t temp2[H * W]; \
176 \
177   var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
178                                     bilinear_filters[xoffset]); \
179   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
180                                      bilinear_filters[yoffset]); \
181 \
182   return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
183 }
184 
185 #define SUBPIX_AVG_VAR(W, H) \
186 uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c(const uint8_t *a, \
187                                                  int  a_stride, \
188                                                  int xoffset, int  yoffset, \
189                                                  const uint8_t *b, \
190                                                  int b_stride, \
191                                                  uint32_t *sse, \
192                                                  const uint8_t *second_pred) { \
193   uint16_t fdata3[(H + 1) * W]; \
194   uint8_t temp2[H * W]; \
195   DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
196 \
197   var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
198                                     bilinear_filters[xoffset]); \
199   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
200                                      bilinear_filters[yoffset]); \
201 \
202   vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
203 \
204   return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
205 }
206 
207 /* Identical to the variance call except it takes an additional parameter, sum,
208  * and returns that value using pass-by-reference instead of returning
209  * sse - sum^2 / w*h
210  */
211 #define GET_VAR(W, H) \
212 void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
213                              const uint8_t *b, int b_stride, \
214                              uint32_t *sse, int *sum) { \
215   variance(a, a_stride, b, b_stride, W, H, sse, sum); \
216 }
217 
218 /* Identical to the variance call except it does not calculate the
219  * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
220  * variable.
221  */
222 #define MSE(W, H) \
223 uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
224                               const uint8_t *b, int b_stride, \
225                               uint32_t *sse) { \
226   int sum; \
227   variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
228   return *sse; \
229 }
230 
231 /* All three forms of the variance are available in the same sizes. */
232 #define VARIANCES(W, H) \
233     VAR(W, H) \
234     SUBPIX_VAR(W, H) \
235     SUBPIX_AVG_VAR(W, H)
236 
237 VARIANCES(64, 64)
238 VARIANCES(64, 32)
239 VARIANCES(32, 64)
240 VARIANCES(32, 32)
241 VARIANCES(32, 16)
242 VARIANCES(16, 32)
243 VARIANCES(16, 16)
244 VARIANCES(16, 8)
245 VARIANCES(8, 16)
246 VARIANCES(8, 8)
247 VARIANCES(8, 4)
248 VARIANCES(4, 8)
249 VARIANCES(4, 4)
250 
251 GET_VAR(16, 16)
252 GET_VAR(8, 8)
253 
254 MSE(16, 16)
255 MSE(16, 8)
256 MSE(8, 16)
257 MSE(8, 8)
258 
vpx_comp_avg_pred_c(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)259 void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred,
260                          int width, int height,
261                          const uint8_t *ref, int ref_stride) {
262   int i, j;
263 
264   for (i = 0; i < height; ++i) {
265     for (j = 0; j < width; ++j) {
266       const int tmp = pred[j] + ref[j];
267       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
268     }
269     comp_pred += width;
270     pred += width;
271     ref += ref_stride;
272   }
273 }
274 
275 #if CONFIG_VP9_HIGHBITDEPTH
highbd_variance64(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint64_t * sse,uint64_t * sum)276 static void highbd_variance64(const uint8_t *a8, int  a_stride,
277                               const uint8_t *b8, int  b_stride,
278                               int w, int h, uint64_t *sse, uint64_t *sum) {
279   int i, j;
280 
281   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
282   uint16_t *b = CONVERT_TO_SHORTPTR(b8);
283   *sum = 0;
284   *sse = 0;
285 
286   for (i = 0; i < h; ++i) {
287     for (j = 0; j < w; ++j) {
288       const int diff = a[j] - b[j];
289       *sum += diff;
290       *sse += diff * diff;
291     }
292     a += a_stride;
293     b += b_stride;
294   }
295 }
296 
highbd_8_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)297 static void highbd_8_variance(const uint8_t *a8, int  a_stride,
298                               const uint8_t *b8, int  b_stride,
299                               int w, int h, uint32_t *sse, int *sum) {
300   uint64_t sse_long = 0;
301   uint64_t sum_long = 0;
302   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
303   *sse = (uint32_t)sse_long;
304   *sum = (int)sum_long;
305 }
306 
highbd_10_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)307 static void highbd_10_variance(const uint8_t *a8, int  a_stride,
308                                const uint8_t *b8, int  b_stride,
309                                int w, int h, uint32_t *sse, int *sum) {
310   uint64_t sse_long = 0;
311   uint64_t sum_long = 0;
312   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
313   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
314   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
315 }
316 
highbd_12_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)317 static void highbd_12_variance(const uint8_t *a8, int  a_stride,
318                                const uint8_t *b8, int  b_stride,
319                                int w, int h, uint32_t *sse, int *sum) {
320   uint64_t sse_long = 0;
321   uint64_t sum_long = 0;
322   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
323   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
324   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
325 }
326 
327 #define HIGHBD_VAR(W, H) \
328 uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, \
329                                             int a_stride, \
330                                             const uint8_t *b, \
331                                             int b_stride, \
332                                             uint32_t *sse) { \
333   int sum; \
334   highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
335   return *sse - (((int64_t)sum * sum) / (W * H)); \
336 } \
337 \
338 uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, \
339                                              int a_stride, \
340                                              const uint8_t *b, \
341                                              int b_stride, \
342                                              uint32_t *sse) { \
343   int sum; \
344   highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
345   return *sse - (((int64_t)sum * sum) / (W * H)); \
346 } \
347 \
348 uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
349                                              int a_stride, \
350                                              const uint8_t *b, \
351                                              int b_stride, \
352                                              uint32_t *sse) { \
353   int sum; \
354   highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
355   return *sse - (((int64_t)sum * sum) / (W * H)); \
356 }
357 
358 #define HIGHBD_GET_VAR(S) \
359 void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
360                                       const uint8_t *ref, int ref_stride, \
361                                       uint32_t *sse, int *sum) { \
362   highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
363 } \
364 \
365 void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
366                                        const uint8_t *ref, int ref_stride, \
367                                        uint32_t *sse, int *sum) { \
368   highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
369 } \
370 \
371 void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
372                                        const uint8_t *ref, int ref_stride, \
373                                        uint32_t *sse, int *sum) { \
374   highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
375 }
376 
377 #define HIGHBD_MSE(W, H) \
378 uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, \
379                                        int src_stride, \
380                                        const uint8_t *ref, \
381                                        int ref_stride, \
382                                        uint32_t *sse) { \
383   int sum; \
384   highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
385   return *sse; \
386 } \
387 \
388 uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, \
389                                         int src_stride, \
390                                         const uint8_t *ref, \
391                                         int ref_stride, \
392                                         uint32_t *sse) { \
393   int sum; \
394   highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
395   return *sse; \
396 } \
397 \
398 uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, \
399                                         int src_stride, \
400                                         const uint8_t *ref, \
401                                         int ref_stride, \
402                                         uint32_t *sse) { \
403   int sum; \
404   highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
405   return *sse; \
406 }
407 
highbd_var_filter_block2d_bil_first_pass(const uint8_t * src_ptr8,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)408 static void highbd_var_filter_block2d_bil_first_pass(
409     const uint8_t *src_ptr8,
410     uint16_t *output_ptr,
411     unsigned int src_pixels_per_line,
412     int pixel_step,
413     unsigned int output_height,
414     unsigned int output_width,
415     const uint8_t *filter) {
416   unsigned int i, j;
417   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
418   for (i = 0; i < output_height; ++i) {
419     for (j = 0; j < output_width; ++j) {
420       output_ptr[j] =
421           ROUND_POWER_OF_TWO((int)src_ptr[0] * filter[0] +
422                              (int)src_ptr[pixel_step] * filter[1],
423                              FILTER_BITS);
424 
425       ++src_ptr;
426     }
427 
428     // Next row...
429     src_ptr += src_pixels_per_line - output_width;
430     output_ptr += output_width;
431   }
432 }
433 
highbd_var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)434 static void highbd_var_filter_block2d_bil_second_pass(
435     const uint16_t *src_ptr,
436     uint16_t *output_ptr,
437     unsigned int src_pixels_per_line,
438     unsigned int pixel_step,
439     unsigned int output_height,
440     unsigned int output_width,
441     const uint8_t *filter) {
442   unsigned int  i, j;
443 
444   for (i = 0; i < output_height; ++i) {
445     for (j = 0; j < output_width; ++j) {
446       output_ptr[j] =
447           ROUND_POWER_OF_TWO((int)src_ptr[0] * filter[0] +
448                              (int)src_ptr[pixel_step] * filter[1],
449                              FILTER_BITS);
450       ++src_ptr;
451     }
452 
453     src_ptr += src_pixels_per_line - output_width;
454     output_ptr += output_width;
455   }
456 }
457 
458 #define HIGHBD_SUBPIX_VAR(W, H) \
459 uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
460   const uint8_t *src, int  src_stride, \
461   int xoffset, int  yoffset, \
462   const uint8_t *dst, int dst_stride, \
463   uint32_t *sse) { \
464   uint16_t fdata3[(H + 1) * W]; \
465   uint16_t temp2[H * W]; \
466 \
467   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
468                                            W, bilinear_filters[xoffset]); \
469   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
470                                             bilinear_filters[yoffset]); \
471 \
472   return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
473                                           dst_stride, sse); \
474 } \
475 \
476 uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
477   const uint8_t *src, int  src_stride, \
478   int xoffset, int  yoffset, \
479   const uint8_t *dst, int dst_stride, \
480   uint32_t *sse) { \
481   uint16_t fdata3[(H + 1) * W]; \
482   uint16_t temp2[H * W]; \
483 \
484   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
485                                            W, bilinear_filters[xoffset]); \
486   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
487                                             bilinear_filters[yoffset]); \
488 \
489   return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
490                                              W, dst, dst_stride, sse); \
491 } \
492 \
493 uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
494   const uint8_t *src, int  src_stride, \
495   int xoffset, int  yoffset, \
496   const uint8_t *dst, int dst_stride, \
497   uint32_t *sse) { \
498   uint16_t fdata3[(H + 1) * W]; \
499   uint16_t temp2[H * W]; \
500 \
501   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
502                                            W, bilinear_filters[xoffset]); \
503   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
504                                             bilinear_filters[yoffset]); \
505 \
506   return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
507                                              W, dst, dst_stride, sse); \
508 }
509 
510 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
511 uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
512   const uint8_t *src, int  src_stride, \
513   int xoffset, int  yoffset, \
514   const uint8_t *dst, int dst_stride, \
515   uint32_t *sse, \
516   const uint8_t *second_pred) { \
517   uint16_t fdata3[(H + 1) * W]; \
518   uint16_t temp2[H * W]; \
519   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
520 \
521   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
522                                            W, bilinear_filters[xoffset]); \
523   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
524                                             bilinear_filters[yoffset]); \
525 \
526   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
527                            CONVERT_TO_BYTEPTR(temp2), W); \
528 \
529   return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
530                                           dst_stride, sse); \
531 } \
532 \
533 uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
534   const uint8_t *src, int  src_stride, \
535   int xoffset, int  yoffset, \
536   const uint8_t *dst, int dst_stride, \
537   uint32_t *sse, \
538   const uint8_t *second_pred) { \
539   uint16_t fdata3[(H + 1) * W]; \
540   uint16_t temp2[H * W]; \
541   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
542 \
543   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
544                                            W, bilinear_filters[xoffset]); \
545   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
546                                             bilinear_filters[yoffset]); \
547 \
548   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
549                            CONVERT_TO_BYTEPTR(temp2), W); \
550 \
551   return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
552                                              W, dst, dst_stride, sse); \
553 } \
554 \
555 uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
556   const uint8_t *src, int  src_stride, \
557   int xoffset, int  yoffset, \
558   const uint8_t *dst, int dst_stride, \
559   uint32_t *sse, \
560   const uint8_t *second_pred) { \
561   uint16_t fdata3[(H + 1) * W]; \
562   uint16_t temp2[H * W]; \
563   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
564 \
565   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
566                                            W, bilinear_filters[xoffset]); \
567   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
568                                             bilinear_filters[yoffset]); \
569 \
570   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
571                            CONVERT_TO_BYTEPTR(temp2), W); \
572 \
573   return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
574                                              W, dst, dst_stride, sse); \
575 }
576 
577 /* All three forms of the variance are available in the same sizes. */
578 #define HIGHBD_VARIANCES(W, H) \
579     HIGHBD_VAR(W, H) \
580     HIGHBD_SUBPIX_VAR(W, H) \
581     HIGHBD_SUBPIX_AVG_VAR(W, H)
582 
583 HIGHBD_VARIANCES(64, 64)
584 HIGHBD_VARIANCES(64, 32)
585 HIGHBD_VARIANCES(32, 64)
586 HIGHBD_VARIANCES(32, 32)
587 HIGHBD_VARIANCES(32, 16)
588 HIGHBD_VARIANCES(16, 32)
589 HIGHBD_VARIANCES(16, 16)
590 HIGHBD_VARIANCES(16, 8)
591 HIGHBD_VARIANCES(8, 16)
592 HIGHBD_VARIANCES(8, 8)
593 HIGHBD_VARIANCES(8, 4)
594 HIGHBD_VARIANCES(4, 8)
595 HIGHBD_VARIANCES(4, 4)
596 
597 HIGHBD_GET_VAR(8)
598 HIGHBD_GET_VAR(16)
599 
600 HIGHBD_MSE(16, 16)
601 HIGHBD_MSE(16, 8)
602 HIGHBD_MSE(8, 16)
603 HIGHBD_MSE(8, 8)
604 
vpx_highbd_comp_avg_pred(uint16_t * comp_pred,const uint8_t * pred8,int width,int height,const uint8_t * ref8,int ref_stride)605 void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
606                               int width, int height, const uint8_t *ref8,
607                               int ref_stride) {
608   int i, j;
609   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
610   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
611   for (i = 0; i < height; ++i) {
612     for (j = 0; j < width; ++j) {
613       const int tmp = pred[j] + ref[j];
614       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
615     }
616     comp_pred += width;
617     pred += width;
618     ref += ref_stride;
619   }
620 }
621 #endif  // CONFIG_VP9_HIGHBITDEPTH
622