1 /*
2  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cstdlib>
13 #include <new>
14 
15 #include "config/aom_config.h"
16 #include "config/aom_dsp_rtcd.h"
17 
18 #include "aom/aom_codec.h"
19 #include "aom/aom_integer.h"
20 #include "aom_dsp/variance.h"
21 #include "aom_mem/aom_mem.h"
22 #include "aom_ports/aom_timer.h"
23 #include "aom_ports/mem.h"
24 #include "av1/common/reconinter.h"
25 #include "test/acm_random.h"
26 #include "test/clear_system_state.h"
27 #include "test/register_state_check.h"
28 #include "test/util.h"
29 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
30 
31 namespace AV1CompMaskVariance {
32 typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
33                                     int width, int height, const uint8_t *ref,
34                                     int ref_stride, const uint8_t *mask,
35                                     int mask_stride, int invert_mask);
36 
37 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AV2
38 const BLOCK_SIZE kValidBlockSize[] = {
39   BLOCK_8X8,   BLOCK_8X16, BLOCK_8X32,  BLOCK_16X8,  BLOCK_16X16,
40   BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32,
41 };
42 #endif
43 typedef ::testing::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
44 
45 class AV1CompMaskVarianceTest
46     : public ::testing::TestWithParam<CompMaskPredParam> {
47  public:
48   ~AV1CompMaskVarianceTest();
49   void SetUp();
50 
51   void TearDown();
52 
53  protected:
54   void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
55   void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
CheckResult(int width,int height)56   bool CheckResult(int width, int height) {
57     for (int y = 0; y < height; ++y) {
58       for (int x = 0; x < width; ++x) {
59         const int idx = y * width + x;
60         if (comp_pred1_[idx] != comp_pred2_[idx]) {
61           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
62           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
63           return false;
64         }
65       }
66     }
67     return true;
68   }
69 
70   libaom_test::ACMRandom rnd_;
71   uint8_t *comp_pred1_;
72   uint8_t *comp_pred2_;
73   uint8_t *pred_;
74   uint8_t *ref_buffer_;
75   uint8_t *ref_;
76 };
77 
~AV1CompMaskVarianceTest()78 AV1CompMaskVarianceTest::~AV1CompMaskVarianceTest() { ; }
79 
SetUp()80 void AV1CompMaskVarianceTest::SetUp() {
81   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
82   av1_init_wedge_masks();
83   comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
84   comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
85   pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
86   ref_buffer_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (8 * MAX_SB_SIZE));
87   ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
88   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
89     pred_[i] = rnd_.Rand8();
90   }
91   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
92     ref_buffer_[i] = rnd_.Rand8();
93   }
94 }
95 
TearDown()96 void AV1CompMaskVarianceTest::TearDown() {
97   aom_free(comp_pred1_);
98   aom_free(comp_pred2_);
99   aom_free(pred_);
100   aom_free(ref_buffer_);
101   libaom_test::ClearSystemState();
102 }
103 
RunCheckOutput(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)104 void AV1CompMaskVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
105                                              BLOCK_SIZE bsize, int inv) {
106   const int w = block_size_wide[bsize];
107   const int h = block_size_high[bsize];
108 
109   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
110   for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
111     const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
112 
113     aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
114                          inv);
115     test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
116 
117     ASSERT_EQ(CheckResult(w, h), true)
118         << " wedge " << wedge_index << " inv " << inv;
119   }
120 }
121 
RunSpeedTest(comp_mask_pred_func test_impl,BLOCK_SIZE bsize)122 void AV1CompMaskVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
123                                            BLOCK_SIZE bsize) {
124   const int w = block_size_wide[bsize];
125   const int h = block_size_high[bsize];
126 
127   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
128   int wedge_index = wedge_types / 2;
129   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
130   const int num_loops = 1000000000 / (w + h);
131 
132   comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
133   double elapsed_time[2] = { 0 };
134   for (int i = 0; i < 2; ++i) {
135     aom_usec_timer timer;
136     aom_usec_timer_start(&timer);
137     comp_mask_pred_func func = funcs[i];
138     for (int j = 0; j < num_loops; ++j) {
139       func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
140     }
141     aom_usec_timer_mark(&timer);
142     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
143     elapsed_time[i] = 1000.0 * time / num_loops;
144   }
145   printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
146          elapsed_time[1]);
147   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
148 }
149 
TEST_P(AV1CompMaskVarianceTest,CheckOutput)150 TEST_P(AV1CompMaskVarianceTest, CheckOutput) {
151   // inv = 0, 1
152   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
153   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
154 }
155 
TEST_P(AV1CompMaskVarianceTest,DISABLED_Speed)156 TEST_P(AV1CompMaskVarianceTest, DISABLED_Speed) {
157   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
158 }
159 
160 #if HAVE_SSSE3
161 INSTANTIATE_TEST_CASE_P(
162     SSSE3, AV1CompMaskVarianceTest,
163     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
164                        ::testing::ValuesIn(kValidBlockSize)));
165 #endif
166 
167 #if HAVE_AVX2
168 INSTANTIATE_TEST_CASE_P(
169     AVX2, AV1CompMaskVarianceTest,
170     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
171                        ::testing::ValuesIn(kValidBlockSize)));
172 #endif
173 
174 #ifndef aom_comp_mask_pred
175 // can't run this test if aom_comp_mask_pred is defined to aom_comp_mask_pred_c
176 class AV1CompMaskUpVarianceTest : public AV1CompMaskVarianceTest {
177  public:
178   ~AV1CompMaskUpVarianceTest();
179 
180  protected:
181   void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
182   void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
183                     int havSub);
184 };
185 
~AV1CompMaskUpVarianceTest()186 AV1CompMaskUpVarianceTest::~AV1CompMaskUpVarianceTest() { ; }
187 
RunCheckOutput(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)188 void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
189                                                BLOCK_SIZE bsize, int inv) {
190   const int w = block_size_wide[bsize];
191   const int h = block_size_high[bsize];
192   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
193   int subpel_search;
194   for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
195        ++subpel_search) {
196     // loop through subx and suby
197     for (int sub = 0; sub < 8 * 8; ++sub) {
198       int subx = sub & 0x7;
199       int suby = (sub >> 3);
200       for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
201         const uint8_t *mask =
202             av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
203 
204         // ref
205         aom_comp_mask_upsampled_pred_c(
206             NULL, NULL, 0, 0, NULL, comp_pred1_, pred_, w, h, subx, suby, ref_,
207             MAX_SB_SIZE, mask, w, inv, subpel_search);
208 
209         aom_comp_mask_pred = test_impl;  // test
210         aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred2_, pred_,
211                                      w, h, subx, suby, ref_, MAX_SB_SIZE, mask,
212                                      w, inv, subpel_search);
213         ASSERT_EQ(CheckResult(w, h), true)
214             << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
215             << "," << suby << ")";
216       }
217     }
218   }
219 }
220 
RunSpeedTest(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int havSub)221 void AV1CompMaskUpVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
222                                              BLOCK_SIZE bsize, int havSub) {
223   const int w = block_size_wide[bsize];
224   const int h = block_size_high[bsize];
225   const int subx = havSub ? 3 : 0;
226   const int suby = havSub ? 4 : 0;
227 
228   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
229   int wedge_index = wedge_types / 2;
230   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
231 
232   const int num_loops = 1000000000 / (w + h);
233   comp_mask_pred_func funcs[2] = { &aom_comp_mask_pred_c, test_impl };
234   double elapsed_time[2] = { 0 };
235   int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
236   for (int i = 0; i < 2; ++i) {
237     aom_usec_timer timer;
238     aom_usec_timer_start(&timer);
239     aom_comp_mask_pred = funcs[i];
240     for (int j = 0; j < num_loops; ++j) {
241       aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
242                                    w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
243                                    0, subpel_search);
244     }
245     aom_usec_timer_mark(&timer);
246     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
247     elapsed_time[i] = 1000.0 * time / num_loops;
248   }
249   printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
250          elapsed_time[1]);
251   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
252 }
253 
TEST_P(AV1CompMaskUpVarianceTest,CheckOutput)254 TEST_P(AV1CompMaskUpVarianceTest, CheckOutput) {
255   // inv mask = 0, 1
256   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
257   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
258 }
259 
TEST_P(AV1CompMaskUpVarianceTest,DISABLED_Speed)260 TEST_P(AV1CompMaskUpVarianceTest, DISABLED_Speed) {
261   RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
262 }
263 
264 #if HAVE_SSSE3
265 INSTANTIATE_TEST_CASE_P(
266     SSSE3, AV1CompMaskUpVarianceTest,
267     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
268                        ::testing::ValuesIn(kValidBlockSize)));
269 #endif
270 
271 #if HAVE_AVX2
272 INSTANTIATE_TEST_CASE_P(
273     AVX2, AV1CompMaskUpVarianceTest,
274     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
275                        ::testing::ValuesIn(kValidBlockSize)));
276 #endif
277 
278 #endif  // ifndef aom_comp_mask_pred
279 
280 typedef void (*highbd_comp_mask_pred_func)(uint8_t *comp_pred8,
281                                            const uint8_t *pred8, int width,
282                                            int height, const uint8_t *ref8,
283                                            int ref_stride, const uint8_t *mask,
284                                            int mask_stride, int invert_mask);
285 
286 typedef ::testing::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>
287     HighbdCompMaskPredParam;
288 
289 class AV1HighbdCompMaskVarianceTest
290     : public ::testing::TestWithParam<HighbdCompMaskPredParam> {
291  public:
292   ~AV1HighbdCompMaskVarianceTest();
293   void SetUp();
294 
295   void TearDown();
296 
297  protected:
298   void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
299                       int inv);
300   void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
CheckResult(int width,int height)301   bool CheckResult(int width, int height) {
302     for (int y = 0; y < height; ++y) {
303       for (int x = 0; x < width; ++x) {
304         const int idx = y * width + x;
305         if (comp_pred1_[idx] != comp_pred2_[idx]) {
306           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
307           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
308           return false;
309         }
310       }
311     }
312     return true;
313   }
314 
315   libaom_test::ACMRandom rnd_;
316   uint16_t *comp_pred1_;
317   uint16_t *comp_pred2_;
318   uint16_t *pred_;
319   uint16_t *ref_buffer_;
320   uint16_t *ref_;
321 };
322 
~AV1HighbdCompMaskVarianceTest()323 AV1HighbdCompMaskVarianceTest::~AV1HighbdCompMaskVarianceTest() { ; }
324 
SetUp()325 void AV1HighbdCompMaskVarianceTest::SetUp() {
326   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
327   av1_init_wedge_masks();
328 
329   comp_pred1_ =
330       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
331   comp_pred2_ =
332       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
333   pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
334   ref_buffer_ = (uint16_t *)aom_memalign(
335       16, (MAX_SB_SQUARE + (8 * MAX_SB_SIZE)) * sizeof(*ref_buffer_));
336   ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
337 }
338 
TearDown()339 void AV1HighbdCompMaskVarianceTest::TearDown() {
340   aom_free(comp_pred1_);
341   aom_free(comp_pred2_);
342   aom_free(pred_);
343   aom_free(ref_buffer_);
344   libaom_test::ClearSystemState();
345 }
346 
RunCheckOutput(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)347 void AV1HighbdCompMaskVarianceTest::RunCheckOutput(
348     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
349   int bd_ = GET_PARAM(2);
350 
351   const int w = block_size_wide[bsize];
352   const int h = block_size_high[bsize];
353 
354   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
355 
356   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
357     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
358   }
359   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
360     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
361   }
362 
363   for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
364     const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
365 
366     aom_highbd_comp_mask_pred_c(
367         CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
368         CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
369 
370     test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
371               CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
372 
373     ASSERT_EQ(CheckResult(w, h), true)
374         << " wedge " << wedge_index << " inv " << inv;
375   }
376 }
377 
RunSpeedTest(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize)378 void AV1HighbdCompMaskVarianceTest::RunSpeedTest(
379     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) {
380   int bd_ = GET_PARAM(2);
381 
382   const int w = block_size_wide[bsize];
383   const int h = block_size_high[bsize];
384 
385   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
386   int wedge_index = wedge_types / 2;
387 
388   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
389     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
390   }
391   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
392     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
393   }
394 
395   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
396   const int num_loops = 1000000000 / (w + h);
397 
398   highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c,
399                                           test_impl };
400   double elapsed_time[2] = { 0 };
401   for (int i = 0; i < 2; ++i) {
402     aom_usec_timer timer;
403     aom_usec_timer_start(&timer);
404     highbd_comp_mask_pred_func func = funcs[i];
405     for (int j = 0; j < num_loops; ++j) {
406       func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
407            CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0);
408     }
409     aom_usec_timer_mark(&timer);
410     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
411     elapsed_time[i] = 1000.0 * time / num_loops;
412   }
413   printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
414          elapsed_time[1]);
415   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
416 }
417 
TEST_P(AV1HighbdCompMaskVarianceTest,CheckOutput)418 TEST_P(AV1HighbdCompMaskVarianceTest, CheckOutput) {
419   // inv = 0, 1
420   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
421   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
422 }
423 
TEST_P(AV1HighbdCompMaskVarianceTest,DISABLED_Speed)424 TEST_P(AV1HighbdCompMaskVarianceTest, DISABLED_Speed) {
425   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
426 }
427 
428 #if HAVE_AVX2
429 INSTANTIATE_TEST_CASE_P(
430     AVX2, AV1HighbdCompMaskVarianceTest,
431     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
432                        ::testing::ValuesIn(kValidBlockSize),
433                        ::testing::Range(8, 13, 2)));
434 #endif
435 
436 #if HAVE_SSE2
437 INSTANTIATE_TEST_CASE_P(
438     SSE2, AV1HighbdCompMaskVarianceTest,
439     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
440                        ::testing::ValuesIn(kValidBlockSize),
441                        ::testing::Range(8, 13, 2)));
442 #endif
443 
444 #ifndef aom_highbd_comp_mask_pred
445 // can't run this test if aom_highbd_comp_mask_pred is defined to
446 // aom_highbd_comp_mask_pred_c
447 class AV1HighbdCompMaskUpVarianceTest : public AV1HighbdCompMaskVarianceTest {
448  public:
449   ~AV1HighbdCompMaskUpVarianceTest();
450 
451  protected:
452   void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
453                       int inv);
454   void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
455                     int havSub);
456 };
457 
~AV1HighbdCompMaskUpVarianceTest()458 AV1HighbdCompMaskUpVarianceTest::~AV1HighbdCompMaskUpVarianceTest() { ; }
459 
RunCheckOutput(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)460 void AV1HighbdCompMaskUpVarianceTest::RunCheckOutput(
461     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
462   (void)test_impl;
463   int bd_ = GET_PARAM(2);
464   const int w = block_size_wide[bsize];
465   const int h = block_size_high[bsize];
466   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
467 
468   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
469     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
470   }
471   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
472     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
473   }
474 
475   int subpel_search;
476   for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
477     // loop through subx and suby
478     for (int sub = 0; sub < 8 * 8; ++sub) {
479       int subx = sub & 0x7;
480       int suby = (sub >> 3);
481       for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
482         const uint8_t *mask =
483             av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
484 
485         // ref
486         aom_highbd_upsampled_pred_c(
487             NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx,
488             suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
489 
490         aom_highbd_comp_mask_pred_c(
491             CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
492             CONVERT_TO_BYTEPTR(comp_pred1_), w, mask, w, inv);
493 
494         // test
495         aom_highbd_upsampled_pred(
496             NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx,
497             suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
498 
499         aom_highbd_comp_mask_pred(
500             CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
501             CONVERT_TO_BYTEPTR(comp_pred2_), w, mask, w, inv);
502 
503         ASSERT_EQ(CheckResult(w, h), true)
504             << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
505             << "," << suby << ")";
506       }
507     }
508   }
509 }
510 
RunSpeedTest(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int havSub)511 void AV1HighbdCompMaskUpVarianceTest::RunSpeedTest(
512     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
513   int bd_ = GET_PARAM(2);
514   const int w = block_size_wide[bsize];
515   const int h = block_size_high[bsize];
516   const int subx = havSub ? 3 : 0;
517   const int suby = havSub ? 4 : 0;
518 
519   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
520   int wedge_index = wedge_types / 2;
521   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
522 
523   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
524     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
525   }
526   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
527     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
528   }
529 
530   const int num_loops = 1000000000 / (w + h);
531   highbd_comp_mask_pred_func funcs[2] = { &aom_highbd_comp_mask_pred_c,
532                                           test_impl };
533   double elapsed_time[2] = { 0 };
534   for (int i = 0; i < 2; ++i) {
535     aom_usec_timer timer;
536     aom_usec_timer_start(&timer);
537     aom_highbd_comp_mask_pred = funcs[i];
538     int subpel_search = 2;  // set to 1 to test 4-tap filter.
539     for (int j = 0; j < num_loops; ++j) {
540       aom_highbd_comp_mask_upsampled_pred(
541           NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_),
542           CONVERT_TO_BYTEPTR(pred_), w, h, subx, suby, CONVERT_TO_BYTEPTR(ref_),
543           MAX_SB_SIZE, mask, w, 0, bd_, subpel_search);
544     }
545     aom_usec_timer_mark(&timer);
546     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
547     elapsed_time[i] = 1000.0 * time / num_loops;
548   }
549   printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
550          elapsed_time[1]);
551   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
552 }
553 
TEST_P(AV1HighbdCompMaskUpVarianceTest,CheckOutput)554 TEST_P(AV1HighbdCompMaskUpVarianceTest, CheckOutput) {
555   // inv mask = 0, 1
556   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
557   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
558 }
559 
TEST_P(AV1HighbdCompMaskUpVarianceTest,DISABLED_Speed)560 TEST_P(AV1HighbdCompMaskUpVarianceTest, DISABLED_Speed) {
561   RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
562 }
563 
564 #if HAVE_AVX2
565 INSTANTIATE_TEST_CASE_P(
566     AVX2, AV1HighbdCompMaskUpVarianceTest,
567     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
568                        ::testing::ValuesIn(kValidBlockSize),
569                        ::testing::Range(8, 13, 2)));
570 #endif
571 
572 #if HAVE_SSE2
573 INSTANTIATE_TEST_CASE_P(
574     SSE2, AV1HighbdCompMaskUpVarianceTest,
575     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
576                        ::testing::ValuesIn(kValidBlockSize),
577                        ::testing::Range(8, 13, 2)));
578 #endif
579 
580 #endif  // ifndef aom_highbd_comp_mask_pred
581 }  // namespace AV1CompMaskVariance
582