1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <math.h>
14 #include <stdio.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 
19 #include "aom_dsp/aom_dsp_common.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23 
24 #include "av1/common/common.h"
25 #include "av1/common/mvref_common.h"
26 #include "av1/common/onyxc_int.h"
27 #include "av1/common/reconinter.h"
28 
29 #include "av1/encoder/encoder.h"
30 #include "av1/encoder/encodemv.h"
31 #include "av1/encoder/mcomp.h"
32 #include "av1/encoder/partition_strategy.h"
33 #include "av1/encoder/rdopt.h"
34 #include "av1/encoder/reconinter_enc.h"
35 
36 // #define NEW_DIAMOND_SEARCH
37 
get_buf_from_mv(const struct buf_2d * buf,const MV * mv)38 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
39                                              const MV *mv) {
40   return &buf->buf[mv->row * buf->stride + mv->col];
41 }
42 
av1_set_mv_search_range(MvLimits * mv_limits,const MV * mv)43 void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
44   int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
45   int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
46   int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
47   int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
48 
49   col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
50   row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
51   col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
52   row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
53 
54   // Get intersection of UMV window and valid MV window to reduce # of checks
55   // in diamond search.
56   if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
57   if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
58   if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
59   if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
60 }
61 
set_subpel_mv_search_range(const MvLimits * mv_limits,int * col_min,int * col_max,int * row_min,int * row_max,const MV * ref_mv)62 static void set_subpel_mv_search_range(const MvLimits *mv_limits, int *col_min,
63                                        int *col_max, int *row_min, int *row_max,
64                                        const MV *ref_mv) {
65   const int max_mv = MAX_FULL_PEL_VAL * 8;
66   const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
67   const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
68   const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
69   const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
70 
71   *col_min = AOMMAX(MV_LOW + 1, minc);
72   *col_max = AOMMIN(MV_UPP - 1, maxc);
73   *row_min = AOMMAX(MV_LOW + 1, minr);
74   *row_max = AOMMIN(MV_UPP - 1, maxr);
75 }
76 
av1_init_search_range(int size)77 int av1_init_search_range(int size) {
78   int sr = 0;
79   // Minimum search size no matter what the passed in value.
80   size = AOMMAX(16, size);
81 
82   while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
83 
84   sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2);
85   return sr;
86 }
87 
mv_cost(const MV * mv,const int * joint_cost,int * const comp_cost[2])88 static INLINE int mv_cost(const MV *mv, const int *joint_cost,
89                           int *const comp_cost[2]) {
90   return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
91          comp_cost[1][mv->col];
92 }
93 
av1_mv_bit_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int weight)94 int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
95                     int *mvcost[2], int weight) {
96   const MV diff = { mv->row - ref->row, mv->col - ref->col };
97   return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
98 }
99 
100 #define PIXEL_TRANSFORM_ERROR_SCALE 4
mv_err_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int error_per_bit)101 static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
102                        int *mvcost[2], int error_per_bit) {
103   if (mvcost) {
104     const MV diff = { mv->row - ref->row, mv->col - ref->col };
105     return (int)ROUND_POWER_OF_TWO_64(
106         (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
107         RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
108             PIXEL_TRANSFORM_ERROR_SCALE);
109   }
110   return 0;
111 }
112 
mvsad_err_cost(const MACROBLOCK * x,const MV * mv,const MV * ref,int sad_per_bit)113 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
114                           int sad_per_bit) {
115   const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
116   return ROUND_POWER_OF_TWO(
117       (unsigned)mv_cost(&diff, x->nmv_vec_cost, x->mv_cost_stack) * sad_per_bit,
118       AV1_PROB_COST_SHIFT);
119 }
120 
av1_init_dsmotion_compensation(search_site_config * cfg,int stride)121 void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
122   int len, ss_count = 1;
123 
124   cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
125   cfg->ss[0].offset = 0;
126 
127   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
128     // Generate offsets for 4 search sites per step.
129     const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
130     int i;
131     for (i = 0; i < 4; ++i) {
132       search_site *const ss = &cfg->ss[ss_count++];
133       ss->mv = ss_mvs[i];
134       ss->offset = ss->mv.row * stride + ss->mv.col;
135     }
136   }
137 
138   cfg->ss_count = ss_count;
139   cfg->searches_per_step = 4;
140 }
141 
av1_init3smotion_compensation(search_site_config * cfg,int stride)142 void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
143   int len, ss_count = 1;
144 
145   cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
146   cfg->ss[0].offset = 0;
147 
148   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
149     // Generate offsets for 8 search sites per step.
150     const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
151                            { 0, len },    { -len, -len }, { -len, len },
152                            { len, -len }, { len, len } };
153     int i;
154     for (i = 0; i < 8; ++i) {
155       search_site *const ss = &cfg->ss[ss_count++];
156       ss->mv = ss_mvs[i];
157       ss->offset = ss->mv.row * stride + ss->mv.col;
158     }
159   }
160 
161   cfg->ss_count = ss_count;
162   cfg->searches_per_step = 8;
163 }
164 
165 /*
166  * To avoid the penalty for crossing cache-line read, preload the reference
167  * area in a small buffer, which is aligned to make sure there won't be crossing
168  * cache-line read while reading from this buffer. This reduced the cpu
169  * cycles spent on reading ref data in sub-pixel filter functions.
170  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
171  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
172  * could reduce the area.
173  */
174 
175 // convert motion vector component to offset for sv[a]f calc
sp(int x)176 static INLINE int sp(int x) { return x & 7; }
177 
pre(const uint8_t * buf,int stride,int r,int c)178 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
179   const int offset = (r >> 3) * stride + (c >> 3);
180   return buf + offset;
181 }
182 
183 /* checks if (r, c) has better score than previous best */
184 #define CHECK_BETTER(v, r, c)                                             \
185   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                 \
186     MV this_mv = { r, c };                                                \
187     v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);    \
188     if (second_pred == NULL) {                                            \
189       thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),  \
190                          src_address, src_stride, &sse);                  \
191     } else if (mask) {                                                    \
192       thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
193                           src_address, src_stride, second_pred, mask,     \
194                           mask_stride, invert_mask, &sse);                \
195     } else {                                                              \
196       thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
197                           src_address, src_stride, &sse, second_pred);    \
198     }                                                                     \
199     v += thismse;                                                         \
200     if (v < besterr) {                                                    \
201       besterr = v;                                                        \
202       br = r;                                                             \
203       bc = c;                                                             \
204       *distortion = thismse;                                              \
205       *sse1 = sse;                                                        \
206     }                                                                     \
207   } else {                                                                \
208     v = INT_MAX;                                                          \
209   }
210 
211 #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
212 
213 /* checks if (r, c) has better score than previous best */
214 #define CHECK_BETTER1(v, r, c)                                             \
215   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                  \
216     MV this_mv = { r, c };                                                 \
217     thismse = upsampled_pref_error(                                        \
218         xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,    \
219         pre(y, y_stride, r, c), y_stride, sp(c), sp(r), second_pred, mask, \
220         mask_stride, invert_mask, w, h, &sse, use_accurate_subpel_search); \
221     v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);     \
222     v += thismse;                                                          \
223     if (v < besterr) {                                                     \
224       besterr = v;                                                         \
225       br = r;                                                              \
226       bc = c;                                                              \
227       *distortion = thismse;                                               \
228       *sse1 = sse;                                                         \
229     }                                                                      \
230   } else {                                                                 \
231     v = INT_MAX;                                                           \
232   }
233 
234 #define FIRST_LEVEL_CHECKS                                       \
235   {                                                              \
236     unsigned int left, right, up, down, diag;                    \
237     CHECK_BETTER(left, tr, tc - hstep);                          \
238     CHECK_BETTER(right, tr, tc + hstep);                         \
239     CHECK_BETTER(up, tr - hstep, tc);                            \
240     CHECK_BETTER(down, tr + hstep, tc);                          \
241     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);     \
242     switch (whichdir) {                                          \
243       case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
244       case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
245       case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
246       case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
247     }                                                            \
248   }
249 
250 #define SECOND_LEVEL_CHECKS                                       \
251   {                                                               \
252     int kr, kc;                                                   \
253     unsigned int second;                                          \
254     if (tr != br && tc != bc) {                                   \
255       kr = br - tr;                                               \
256       kc = bc - tc;                                               \
257       CHECK_BETTER(second, tr + kr, tc + 2 * kc);                 \
258       CHECK_BETTER(second, tr + 2 * kr, tc + kc);                 \
259     } else if (tr == br && tc != bc) {                            \
260       kc = bc - tc;                                               \
261       CHECK_BETTER(second, tr + hstep, tc + 2 * kc);              \
262       CHECK_BETTER(second, tr - hstep, tc + 2 * kc);              \
263       switch (whichdir) {                                         \
264         case 0:                                                   \
265         case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
266         case 2:                                                   \
267         case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
268       }                                                           \
269     } else if (tr != br && tc == bc) {                            \
270       kr = br - tr;                                               \
271       CHECK_BETTER(second, tr + 2 * kr, tc + hstep);              \
272       CHECK_BETTER(second, tr + 2 * kr, tc - hstep);              \
273       switch (whichdir) {                                         \
274         case 0:                                                   \
275         case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
276         case 1:                                                   \
277         case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
278       }                                                           \
279     }                                                             \
280   }
281 
282 // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
283 // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
284 // later in the same way.
285 #define SECOND_LEVEL_CHECKS_BEST(k)                \
286   {                                                \
287     unsigned int second;                           \
288     int br0 = br;                                  \
289     int bc0 = bc;                                  \
290     assert(tr == br || tc == bc);                  \
291     if (tr == br && tc != bc) {                    \
292       kc = bc - tc;                                \
293     } else if (tr != br && tc == bc) {             \
294       kr = br - tr;                                \
295     }                                              \
296     CHECK_BETTER##k(second, br0 + kr, bc0);        \
297     CHECK_BETTER##k(second, br0, bc0 + kc);        \
298     if (br0 != br || bc0 != bc) {                  \
299       CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
300     }                                              \
301   }
302 
303 #define SETUP_SUBPEL_SEARCH                                             \
304   const uint8_t *const src_address = x->plane[0].src.buf;               \
305   const int src_stride = x->plane[0].src.stride;                        \
306   const MACROBLOCKD *xd = &x->e_mbd;                                    \
307   unsigned int besterr = INT_MAX;                                       \
308   unsigned int sse;                                                     \
309   unsigned int whichdir;                                                \
310   int thismse;                                                          \
311   MV *bestmv = &x->best_mv.as_mv;                                       \
312   const unsigned int halfiters = iters_per_step;                        \
313   const unsigned int quarteriters = iters_per_step;                     \
314   const unsigned int eighthiters = iters_per_step;                      \
315   const int y_stride = xd->plane[0].pre[0].stride;                      \
316   const int offset = bestmv->row * y_stride + bestmv->col;              \
317   const uint8_t *const y = xd->plane[0].pre[0].buf;                     \
318                                                                         \
319   int br = bestmv->row * 8;                                             \
320   int bc = bestmv->col * 8;                                             \
321   int hstep = 4;                                                        \
322   int minc, maxc, minr, maxr;                                           \
323   int tr = br;                                                          \
324   int tc = bc;                                                          \
325                                                                         \
326   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, \
327                              ref_mv);                                   \
328                                                                         \
329   bestmv->row *= 8;                                                     \
330   bestmv->col *= 8;
331 
setup_center_error(const MACROBLOCKD * xd,const MV * bestmv,const MV * ref_mv,int error_per_bit,const aom_variance_fn_ptr_t * vfp,const uint8_t * const src,const int src_stride,const uint8_t * const y,int y_stride,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,int offset,int * mvjcost,int * mvcost[2],unsigned int * sse1,int * distortion)332 static unsigned int setup_center_error(
333     const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
334     int error_per_bit, const aom_variance_fn_ptr_t *vfp,
335     const uint8_t *const src, const int src_stride, const uint8_t *const y,
336     int y_stride, const uint8_t *second_pred, const uint8_t *mask,
337     int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
338     int *mvcost[2], unsigned int *sse1, int *distortion) {
339   unsigned int besterr;
340   if (second_pred != NULL) {
341     if (is_cur_buf_hbd(xd)) {
342       DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
343       uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16);
344       if (mask) {
345         aom_highbd_comp_mask_pred(comp_pred, second_pred, w, h, y + offset,
346                                   y_stride, mask, mask_stride, invert_mask);
347       } else {
348         aom_highbd_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
349                                  y_stride);
350       }
351       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
352     } else {
353       DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
354       if (mask) {
355         aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
356                            mask, mask_stride, invert_mask);
357       } else {
358         aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
359       }
360       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
361     }
362   } else {
363     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
364   }
365   *distortion = besterr;
366   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
367   return besterr;
368 }
369 
divide_and_round(int n,int d)370 static INLINE int divide_and_round(int n, int d) {
371   return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
372 }
373 
is_cost_list_wellbehaved(int * cost_list)374 static INLINE int is_cost_list_wellbehaved(int *cost_list) {
375   return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
376          cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
377 }
378 
379 // Returns surface minima estimate at given precision in 1/2^n bits.
380 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
381 // For a given set of costs S0, S1, S2, S3, S4 at points
382 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
383 // the solution for the location of the minima (x0, y0) is given by:
384 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
385 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
386 // The code below is an integerized version of that.
get_cost_surf_min(int * cost_list,int * ir,int * ic,int bits)387 static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
388   *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
389                          (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
390   *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
391                          (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
392 }
393 
av1_find_best_sub_pixel_tree_pruned_evenmore(MACROBLOCK * x,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * ref_mv,int allow_hp,int error_per_bit,const aom_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,int use_accurate_subpel_search,const int do_reset_fractional_mv)394 int av1_find_best_sub_pixel_tree_pruned_evenmore(
395     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
396     const MV *ref_mv, int allow_hp, int error_per_bit,
397     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
398     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
399     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
400     int mask_stride, int invert_mask, int w, int h,
401     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
402   SETUP_SUBPEL_SEARCH;
403   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
404                                src_address, src_stride, y, y_stride,
405                                second_pred, mask, mask_stride, invert_mask, w,
406                                h, offset, mvjcost, mvcost, sse1, distortion);
407   (void)halfiters;
408   (void)quarteriters;
409   (void)eighthiters;
410   (void)whichdir;
411   (void)allow_hp;
412   (void)forced_stop;
413   (void)hstep;
414   (void)use_accurate_subpel_search;
415   (void)cm;
416   (void)mi_row;
417   (void)mi_col;
418   (void)do_reset_fractional_mv;
419 
420   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
421       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
422       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
423     int ir, ic;
424     unsigned int minpt;
425     get_cost_surf_min(cost_list, &ir, &ic, 2);
426     if (ir != 0 || ic != 0) {
427       CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
428     }
429   } else {
430     FIRST_LEVEL_CHECKS;
431     if (halfiters > 1) {
432       SECOND_LEVEL_CHECKS;
433     }
434 
435     tr = br;
436     tc = bc;
437 
438     // Each subsequent iteration checks at least one point in common with
439     // the last iteration could be 2 ( if diag selected) 1/4 pel
440     // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
441     if (forced_stop != 2) {
442       hstep >>= 1;
443       FIRST_LEVEL_CHECKS;
444       if (quarteriters > 1) {
445         SECOND_LEVEL_CHECKS;
446       }
447     }
448   }
449 
450   tr = br;
451   tc = bc;
452 
453   if (allow_hp && forced_stop == 0) {
454     hstep >>= 1;
455     FIRST_LEVEL_CHECKS;
456     if (eighthiters > 1) {
457       SECOND_LEVEL_CHECKS;
458     }
459   }
460 
461   bestmv->row = br;
462   bestmv->col = bc;
463 
464   return besterr;
465 }
466 
av1_find_best_sub_pixel_tree_pruned_more(MACROBLOCK * x,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * ref_mv,int allow_hp,int error_per_bit,const aom_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,int use_accurate_subpel_search,const int do_reset_fractional_mv)467 int av1_find_best_sub_pixel_tree_pruned_more(
468     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
469     const MV *ref_mv, int allow_hp, int error_per_bit,
470     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
471     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
472     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
473     int mask_stride, int invert_mask, int w, int h,
474     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
475   SETUP_SUBPEL_SEARCH;
476   (void)use_accurate_subpel_search;
477   (void)cm;
478   (void)mi_row;
479   (void)mi_col;
480   (void)do_reset_fractional_mv;
481 
482   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
483                                src_address, src_stride, y, y_stride,
484                                second_pred, mask, mask_stride, invert_mask, w,
485                                h, offset, mvjcost, mvcost, sse1, distortion);
486   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
487       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
488       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
489     unsigned int minpt;
490     int ir, ic;
491     get_cost_surf_min(cost_list, &ir, &ic, 1);
492     if (ir != 0 || ic != 0) {
493       CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
494     }
495   } else {
496     FIRST_LEVEL_CHECKS;
497     if (halfiters > 1) {
498       SECOND_LEVEL_CHECKS;
499     }
500   }
501 
502   // Each subsequent iteration checks at least one point in common with
503   // the last iteration could be 2 ( if diag selected) 1/4 pel
504 
505   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
506   if (forced_stop != 2) {
507     tr = br;
508     tc = bc;
509     hstep >>= 1;
510     FIRST_LEVEL_CHECKS;
511     if (quarteriters > 1) {
512       SECOND_LEVEL_CHECKS;
513     }
514   }
515 
516   if (allow_hp && forced_stop == 0) {
517     tr = br;
518     tc = bc;
519     hstep >>= 1;
520     FIRST_LEVEL_CHECKS;
521     if (eighthiters > 1) {
522       SECOND_LEVEL_CHECKS;
523     }
524   }
525   // These lines insure static analysis doesn't warn that
526   // tr and tc aren't used after the above point.
527   (void)tr;
528   (void)tc;
529 
530   bestmv->row = br;
531   bestmv->col = bc;
532 
533   return besterr;
534 }
535 
av1_find_best_sub_pixel_tree_pruned(MACROBLOCK * x,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * ref_mv,int allow_hp,int error_per_bit,const aom_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,int use_accurate_subpel_search,const int do_reset_fractional_mv)536 int av1_find_best_sub_pixel_tree_pruned(
537     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
538     const MV *ref_mv, int allow_hp, int error_per_bit,
539     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
540     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
541     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
542     int mask_stride, int invert_mask, int w, int h,
543     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
544   SETUP_SUBPEL_SEARCH;
545   (void)use_accurate_subpel_search;
546   (void)cm;
547   (void)mi_row;
548   (void)mi_col;
549   (void)do_reset_fractional_mv;
550 
551   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
552                                src_address, src_stride, y, y_stride,
553                                second_pred, mask, mask_stride, invert_mask, w,
554                                h, offset, mvjcost, mvcost, sse1, distortion);
555   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
556       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
557       cost_list[4] != INT_MAX) {
558     unsigned int left, right, up, down, diag;
559     whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
560                (cost_list[2] < cost_list[4] ? 0 : 2);
561     switch (whichdir) {
562       case 0:
563         CHECK_BETTER(left, tr, tc - hstep);
564         CHECK_BETTER(down, tr + hstep, tc);
565         CHECK_BETTER(diag, tr + hstep, tc - hstep);
566         break;
567       case 1:
568         CHECK_BETTER(right, tr, tc + hstep);
569         CHECK_BETTER(down, tr + hstep, tc);
570         CHECK_BETTER(diag, tr + hstep, tc + hstep);
571         break;
572       case 2:
573         CHECK_BETTER(left, tr, tc - hstep);
574         CHECK_BETTER(up, tr - hstep, tc);
575         CHECK_BETTER(diag, tr - hstep, tc - hstep);
576         break;
577       case 3:
578         CHECK_BETTER(right, tr, tc + hstep);
579         CHECK_BETTER(up, tr - hstep, tc);
580         CHECK_BETTER(diag, tr - hstep, tc + hstep);
581         break;
582     }
583   } else {
584     FIRST_LEVEL_CHECKS;
585     if (halfiters > 1) {
586       SECOND_LEVEL_CHECKS;
587     }
588   }
589 
590   tr = br;
591   tc = bc;
592 
593   // Each subsequent iteration checks at least one point in common with
594   // the last iteration could be 2 ( if diag selected) 1/4 pel
595 
596   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
597   if (forced_stop != 2) {
598     hstep >>= 1;
599     FIRST_LEVEL_CHECKS;
600     if (quarteriters > 1) {
601       SECOND_LEVEL_CHECKS;
602     }
603     tr = br;
604     tc = bc;
605   }
606 
607   if (allow_hp && forced_stop == 0) {
608     hstep >>= 1;
609     FIRST_LEVEL_CHECKS;
610     if (eighthiters > 1) {
611       SECOND_LEVEL_CHECKS;
612     }
613     tr = br;
614     tc = bc;
615   }
616   // These lines insure static analysis doesn't warn that
617   // tr and tc aren't used after the above point.
618   (void)tr;
619   (void)tc;
620 
621   bestmv->row = br;
622   bestmv->col = bc;
623 
624   return besterr;
625 }
626 
627 /* clang-format off */
628 static const MV search_step_table[12] = {
629   // left, right, up, down
630   { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
631   { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
632   { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
633 };
634 /* clang-format on */
635 
upsampled_pref_error(MACROBLOCKD * xd,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * const mv,const aom_variance_fn_ptr_t * vfp,const uint8_t * const src,const int src_stride,const uint8_t * const y,int y_stride,int subpel_x_q3,int subpel_y_q3,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,unsigned int * sse,int subpel_search)636 static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *const cm,
637                                 int mi_row, int mi_col, const MV *const mv,
638                                 const aom_variance_fn_ptr_t *vfp,
639                                 const uint8_t *const src, const int src_stride,
640                                 const uint8_t *const y, int y_stride,
641                                 int subpel_x_q3, int subpel_y_q3,
642                                 const uint8_t *second_pred, const uint8_t *mask,
643                                 int mask_stride, int invert_mask, int w, int h,
644                                 unsigned int *sse, int subpel_search) {
645   unsigned int besterr;
646   if (is_cur_buf_hbd(xd)) {
647     DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
648     uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16);
649     if (second_pred != NULL) {
650       if (mask) {
651         aom_highbd_comp_mask_upsampled_pred(
652             xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
653             subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask, xd->bd,
654             subpel_search);
655       } else {
656         aom_highbd_comp_avg_upsampled_pred(
657             xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
658             subpel_y_q3, y, y_stride, xd->bd, subpel_search);
659       }
660     } else {
661       aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
662                                 subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
663                                 subpel_search);
664     }
665     besterr = vfp->vf(pred8, w, src, src_stride, sse);
666   } else {
667     DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
668     if (second_pred != NULL) {
669       if (mask) {
670         aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
671                                      second_pred, w, h, subpel_x_q3,
672                                      subpel_y_q3, y, y_stride, mask,
673                                      mask_stride, invert_mask, subpel_search);
674       } else {
675         aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
676                                     second_pred, w, h, subpel_x_q3, subpel_y_q3,
677                                     y, y_stride, subpel_search);
678       }
679     } else {
680       aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
681                          subpel_y_q3, y, y_stride, subpel_search);
682     }
683 
684     besterr = vfp->vf(pred, w, src, src_stride, sse);
685   }
686   return besterr;
687 }
688 
upsampled_setup_center_error(MACROBLOCKD * xd,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * bestmv,const MV * ref_mv,int error_per_bit,const aom_variance_fn_ptr_t * vfp,const uint8_t * const src,const int src_stride,const uint8_t * const y,int y_stride,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,int offset,int * mvjcost,int * mvcost[2],unsigned int * sse1,int * distortion,int subpel_search)689 static unsigned int upsampled_setup_center_error(
690     MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
691     const MV *bestmv, const MV *ref_mv, int error_per_bit,
692     const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
693     const int src_stride, const uint8_t *const y, int y_stride,
694     const uint8_t *second_pred, const uint8_t *mask, int mask_stride,
695     int invert_mask, int w, int h, int offset, int *mvjcost, int *mvcost[2],
696     unsigned int *sse1, int *distortion, int subpel_search) {
697   unsigned int besterr =
698       upsampled_pref_error(xd, cm, mi_row, mi_col, bestmv, vfp, src, src_stride,
699                            y + offset, y_stride, 0, 0, second_pred, mask,
700                            mask_stride, invert_mask, w, h, sse1, subpel_search);
701   *distortion = besterr;
702   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
703   return besterr;
704 }
705 
706 // when use_accurate_subpel_search == 0
estimate_upsampled_pref_error(const aom_variance_fn_ptr_t * vfp,const uint8_t * const src,const int src_stride,const uint8_t * const pre,int y_stride,int subpel_x_q3,int subpel_y_q3,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,unsigned int * sse)707 static INLINE unsigned int estimate_upsampled_pref_error(
708     const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
709     const int src_stride, const uint8_t *const pre, int y_stride,
710     int subpel_x_q3, int subpel_y_q3, const uint8_t *second_pred,
711     const uint8_t *mask, int mask_stride, int invert_mask, unsigned int *sse) {
712   if (second_pred == NULL) {
713     return vfp->svf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
714                     sse);
715   } else if (mask) {
716     return vfp->msvf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
717                      second_pred, mask, mask_stride, invert_mask, sse);
718   } else {
719     return vfp->svaf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
720                      sse, second_pred);
721   }
722 }
723 
av1_find_best_sub_pixel_tree(MACROBLOCK * x,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * ref_mv,int allow_hp,int error_per_bit,const aom_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,int use_accurate_subpel_search,const int do_reset_fractional_mv)724 int av1_find_best_sub_pixel_tree(
725     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
726     const MV *ref_mv, int allow_hp, int error_per_bit,
727     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
728     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
729     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
730     int mask_stride, int invert_mask, int w, int h,
731     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
732   const uint8_t *const src_address = x->plane[0].src.buf;
733   const int src_stride = x->plane[0].src.stride;
734   MACROBLOCKD *xd = &x->e_mbd;
735   unsigned int besterr = INT_MAX;
736   unsigned int sse;
737   unsigned int thismse;
738   const int y_stride = xd->plane[0].pre[0].stride;
739   MV *bestmv = &x->best_mv.as_mv;
740   const int offset = bestmv->row * y_stride + bestmv->col;
741   const uint8_t *const y = xd->plane[0].pre[0].buf;
742 
743   int br = bestmv->row * 8;
744   int bc = bestmv->col * 8;
745   int hstep = 4;
746   int iter, round = 3 - forced_stop;
747   int tr = br;
748   int tc = bc;
749   const MV *search_step = search_step_table;
750   int idx, best_idx = -1;
751   unsigned int cost_array[5];
752   int kr, kc;
753   int minc, maxc, minr, maxr;
754 
755   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
756 
757   if (!allow_hp)
758     if (round == 3) round = 2;
759 
760   bestmv->row *= 8;
761   bestmv->col *= 8;
762 
763   if (use_accurate_subpel_search)
764     besterr = upsampled_setup_center_error(
765         xd, cm, mi_row, mi_col, bestmv, ref_mv, error_per_bit, vfp, src_address,
766         src_stride, y, y_stride, second_pred, mask, mask_stride, invert_mask, w,
767         h, offset, mvjcost, mvcost, sse1, distortion,
768         use_accurate_subpel_search);
769   else
770     besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
771                                  src_address, src_stride, y, y_stride,
772                                  second_pred, mask, mask_stride, invert_mask, w,
773                                  h, offset, mvjcost, mvcost, sse1, distortion);
774 
775   (void)cost_list;  // to silence compiler warning
776 
777   if (do_reset_fractional_mv) {
778     av1_set_fractional_mv(x->fractional_best_mv);
779   }
780 
781   for (iter = 0; iter < round; ++iter) {
782     if ((x->fractional_best_mv[iter].as_mv.row == br) &&
783         (x->fractional_best_mv[iter].as_mv.col == bc))
784       return INT_MAX;
785     x->fractional_best_mv[iter].as_mv.row = br;
786     x->fractional_best_mv[iter].as_mv.col = bc;
787     // Check vertical and horizontal sub-pixel positions.
788     for (idx = 0; idx < 4; ++idx) {
789       tr = br + search_step[idx].row;
790       tc = bc + search_step[idx].col;
791       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
792         MV this_mv = { tr, tc };
793 
794         if (use_accurate_subpel_search) {
795           thismse = upsampled_pref_error(
796               xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
797               pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
798               mask, mask_stride, invert_mask, w, h, &sse,
799               use_accurate_subpel_search);
800         } else {
801           thismse = estimate_upsampled_pref_error(
802               vfp, src_address, src_stride, pre(y, y_stride, tr, tc), y_stride,
803               sp(tc), sp(tr), second_pred, mask, mask_stride, invert_mask,
804               &sse);
805         }
806 
807         cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
808                                                 mvcost, error_per_bit);
809 
810         if (cost_array[idx] < besterr) {
811           best_idx = idx;
812           besterr = cost_array[idx];
813           *distortion = thismse;
814           *sse1 = sse;
815         }
816       } else {
817         cost_array[idx] = INT_MAX;
818       }
819     }
820 
821     // Check diagonal sub-pixel position
822     kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
823     kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
824 
825     tc = bc + kc;
826     tr = br + kr;
827     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
828       MV this_mv = { tr, tc };
829 
830       if (use_accurate_subpel_search) {
831         thismse = upsampled_pref_error(
832             xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
833             pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
834             mask, mask_stride, invert_mask, w, h, &sse,
835             use_accurate_subpel_search);
836       } else {
837         thismse = estimate_upsampled_pref_error(
838             vfp, src_address, src_stride, pre(y, y_stride, tr, tc), y_stride,
839             sp(tc), sp(tr), second_pred, mask, mask_stride, invert_mask, &sse);
840       }
841 
842       cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
843                                             error_per_bit);
844 
845       if (cost_array[4] < besterr) {
846         best_idx = 4;
847         besterr = cost_array[4];
848         *distortion = thismse;
849         *sse1 = sse;
850       }
851     } else {
852       cost_array[idx] = INT_MAX;
853     }
854 
855     if (best_idx < 4 && best_idx >= 0) {
856       br += search_step[best_idx].row;
857       bc += search_step[best_idx].col;
858     } else if (best_idx == 4) {
859       br = tr;
860       bc = tc;
861     }
862 
863     if (iters_per_step > 1 && best_idx != -1) {
864       if (use_accurate_subpel_search) {
865         SECOND_LEVEL_CHECKS_BEST(1);
866       } else {
867         SECOND_LEVEL_CHECKS_BEST(0);
868       }
869     }
870 
871     search_step += 4;
872     hstep >>= 1;
873     best_idx = -1;
874   }
875 
876   // These lines insure static analysis doesn't warn that
877   // tr and tc aren't used after the above point.
878   (void)tr;
879   (void)tc;
880 
881   bestmv->row = br;
882   bestmv->col = bc;
883 
884   return besterr;
885 }
886 
887 #undef PRE
888 #undef CHECK_BETTER
889 
av1_compute_motion_cost(const AV1_COMP * cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,int mi_row,int mi_col,const MV * this_mv)890 unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
891                                      BLOCK_SIZE bsize, int mi_row, int mi_col,
892                                      const MV *this_mv) {
893   const AV1_COMMON *const cm = &cpi->common;
894   MACROBLOCKD *xd = &x->e_mbd;
895   const uint8_t *const src = x->plane[0].src.buf;
896   const int src_stride = x->plane[0].src.stride;
897   uint8_t *const dst = xd->plane[0].dst.buf;
898   const int dst_stride = xd->plane[0].dst.stride;
899   const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize];
900   const int_mv ref_mv = av1_get_ref_mv(x, 0);
901   unsigned int mse;
902   unsigned int sse;
903 
904   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
905                                 AOM_PLANE_Y, AOM_PLANE_Y);
906   mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
907   mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmv_vec_cost, x->mv_cost_stack,
908                      x->errorperbit);
909   return mse;
910 }
911 
912 // Refine MV in a small range
av1_refine_warped_mv(const AV1_COMP * cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,int mi_row,int mi_col,int * pts0,int * pts_inref0,int total_samples)913 unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
914                                   BLOCK_SIZE bsize, int mi_row, int mi_col,
915                                   int *pts0, int *pts_inref0,
916                                   int total_samples) {
917   const AV1_COMMON *const cm = &cpi->common;
918   MACROBLOCKD *xd = &x->e_mbd;
919   MB_MODE_INFO *mbmi = xd->mi[0];
920   const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
921                             { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } };
922   const int_mv ref_mv = av1_get_ref_mv(x, 0);
923   int16_t br = mbmi->mv[0].as_mv.row;
924   int16_t bc = mbmi->mv[0].as_mv.col;
925   int16_t *tr = &mbmi->mv[0].as_mv.row;
926   int16_t *tc = &mbmi->mv[0].as_mv.col;
927   WarpedMotionParams best_wm_params = mbmi->wm_params;
928   int best_num_proj_ref = mbmi->num_proj_ref;
929   unsigned int bestmse;
930   int minc, maxc, minr, maxr;
931   const int start = cm->allow_high_precision_mv ? 0 : 4;
932   int ite;
933 
934   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
935                              &ref_mv.as_mv);
936 
937   // Calculate the center position's error
938   assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr);
939   bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col,
940                                     &mbmi->mv[0].as_mv);
941 
942   // MV search
943   for (ite = 0; ite < 2; ++ite) {
944     int best_idx = -1;
945     int idx;
946 
947     for (idx = start; idx < start + 4; ++idx) {
948       unsigned int thismse;
949 
950       *tr = br + neighbors[idx].row;
951       *tc = bc + neighbors[idx].col;
952 
953       if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
954         MV this_mv = { *tr, *tc };
955         int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
956 
957         memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
958         memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
959         if (total_samples > 1)
960           mbmi->num_proj_ref =
961               selectSamples(&this_mv, pts, pts_inref, total_samples, bsize);
962 
963         if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, *tr,
964                              *tc, &mbmi->wm_params, mi_row, mi_col)) {
965           thismse =
966               av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv);
967 
968           if (thismse < bestmse) {
969             best_idx = idx;
970             best_wm_params = mbmi->wm_params;
971             best_num_proj_ref = mbmi->num_proj_ref;
972             bestmse = thismse;
973           }
974         }
975       }
976     }
977 
978     if (best_idx == -1) break;
979 
980     if (best_idx >= 0) {
981       br += neighbors[best_idx].row;
982       bc += neighbors[best_idx].col;
983     }
984   }
985 
986   *tr = br;
987   *tc = bc;
988   mbmi->wm_params = best_wm_params;
989   mbmi->num_proj_ref = best_num_proj_ref;
990   return bestmse;
991 }
992 
check_bounds(const MvLimits * mv_limits,int row,int col,int range)993 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
994                                int range) {
995   return ((row - range) >= mv_limits->row_min) &
996          ((row + range) <= mv_limits->row_max) &
997          ((col - range) >= mv_limits->col_min) &
998          ((col + range) <= mv_limits->col_max);
999 }
1000 
is_mv_in(const MvLimits * mv_limits,const MV * mv)1001 static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
1002   return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
1003          (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
1004 }
1005 
1006 #define CHECK_BETTER                                                      \
1007   {                                                                       \
1008     if (thissad < bestsad) {                                              \
1009       if (use_mvcost)                                                     \
1010         thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
1011       if (thissad < bestsad) {                                            \
1012         bestsad = thissad;                                                \
1013         best_site = i;                                                    \
1014       }                                                                   \
1015     }                                                                     \
1016   }
1017 
1018 #define MAX_PATTERN_SCALES 11
1019 #define MAX_PATTERN_CANDIDATES 8  // max number of canddiates per scale
1020 #define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
1021 
1022 // Calculate and return a sad+mvcost list around an integer best pel.
calc_int_cost_list(const MACROBLOCK * x,const MV * const ref_mv,int sadpb,const aom_variance_fn_ptr_t * fn_ptr,const MV * best_mv,int * cost_list)1023 static INLINE void calc_int_cost_list(const MACROBLOCK *x,
1024                                       const MV *const ref_mv, int sadpb,
1025                                       const aom_variance_fn_ptr_t *fn_ptr,
1026                                       const MV *best_mv, int *cost_list) {
1027   static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1028   const struct buf_2d *const what = &x->plane[0].src;
1029   const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
1030   const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1031   const int br = best_mv->row;
1032   const int bc = best_mv->col;
1033   int i;
1034   unsigned int sse;
1035   const MV this_mv = { br, bc };
1036 
1037   cost_list[0] =
1038       fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
1039                  in_what->stride, &sse) +
1040       mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
1041   if (check_bounds(&x->mv_limits, br, bc, 1)) {
1042     for (i = 0; i < 4; i++) {
1043       const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1044       cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
1045                                     get_buf_from_mv(in_what, &neighbor_mv),
1046                                     in_what->stride, &sse) +
1047                          mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmv_vec_cost,
1048                                      x->mv_cost_stack, x->errorperbit);
1049     }
1050   } else {
1051     for (i = 0; i < 4; i++) {
1052       const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1053       if (!is_mv_in(&x->mv_limits, &neighbor_mv))
1054         cost_list[i + 1] = INT_MAX;
1055       else
1056         cost_list[i + 1] =
1057             fn_ptr->vf(what->buf, what->stride,
1058                        get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
1059                        &sse) +
1060             mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmv_vec_cost,
1061                         x->mv_cost_stack, x->errorperbit);
1062     }
1063   }
1064 }
1065 
calc_int_sad_list(const MACROBLOCK * x,const MV * const ref_mv,int sadpb,const aom_variance_fn_ptr_t * fn_ptr,const MV * best_mv,int * cost_list,const int use_mvcost,const int bestsad)1066 static INLINE void calc_int_sad_list(const MACROBLOCK *x,
1067                                      const MV *const ref_mv, int sadpb,
1068                                      const aom_variance_fn_ptr_t *fn_ptr,
1069                                      const MV *best_mv, int *cost_list,
1070                                      const int use_mvcost, const int bestsad) {
1071   static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1072   const struct buf_2d *const what = &x->plane[0].src;
1073   const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
1074   const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1075   int i;
1076   const int br = best_mv->row;
1077   const int bc = best_mv->col;
1078 
1079   if (cost_list[0] == INT_MAX) {
1080     cost_list[0] = bestsad;
1081     if (check_bounds(&x->mv_limits, br, bc, 1)) {
1082       for (i = 0; i < 4; i++) {
1083         const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1084         cost_list[i + 1] =
1085             fn_ptr->sdf(what->buf, what->stride,
1086                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1087       }
1088     } else {
1089       for (i = 0; i < 4; i++) {
1090         const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1091         if (!is_mv_in(&x->mv_limits, &this_mv))
1092           cost_list[i + 1] = INT_MAX;
1093         else
1094           cost_list[i + 1] =
1095               fn_ptr->sdf(what->buf, what->stride,
1096                           get_buf_from_mv(in_what, &this_mv), in_what->stride);
1097       }
1098     }
1099   } else {
1100     if (use_mvcost) {
1101       for (i = 0; i < 4; i++) {
1102         const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1103         if (cost_list[i + 1] != INT_MAX) {
1104           cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
1105         }
1106       }
1107     }
1108   }
1109 }
1110 
1111 // Generic pattern search function that searches over multiple scales.
1112 // Each scale can have a different number of candidates and shape of
1113 // candidates as indicated in the num_candidates and candidates arrays
1114 // passed into this function
1115 //
pattern_search(MACROBLOCK * x,MV * start_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const aom_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,const int num_candidates[MAX_PATTERN_SCALES],const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES])1116 static int pattern_search(
1117     MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit,
1118     int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp,
1119     int use_mvcost, const MV *center_mv,
1120     const int num_candidates[MAX_PATTERN_SCALES],
1121     const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
1122   const MACROBLOCKD *const xd = &x->e_mbd;
1123   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1124     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1125   };
1126   int i, s, t;
1127   const struct buf_2d *const what = &x->plane[0].src;
1128   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1129   const int last_is_4 = num_candidates[0] == 4;
1130   int br, bc;
1131   int bestsad = INT_MAX;
1132   int thissad;
1133   int k = -1;
1134   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1135   assert(search_param < MAX_MVSEARCH_STEPS);
1136   int best_init_s = search_param_to_steps[search_param];
1137   // adjust ref_mv to make sure it is within MV range
1138   clamp_mv(start_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1139            x->mv_limits.row_min, x->mv_limits.row_max);
1140   br = start_mv->row;
1141   bc = start_mv->col;
1142   if (cost_list != NULL) {
1143     cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
1144         INT_MAX;
1145   }
1146 
1147   // Work out the start point for the search
1148   bestsad = vfp->sdf(what->buf, what->stride,
1149                      get_buf_from_mv(in_what, start_mv), in_what->stride) +
1150             mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit);
1151 
1152   // Search all possible scales upto the search param around the center point
1153   // pick the scale of the point that is best as the starting scale of
1154   // further steps around it.
1155   if (do_init_search) {
1156     s = best_init_s;
1157     best_init_s = -1;
1158     for (t = 0; t <= s; ++t) {
1159       int best_site = -1;
1160       if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
1161         for (i = 0; i < num_candidates[t]; i++) {
1162           const MV this_mv = { br + candidates[t][i].row,
1163                                bc + candidates[t][i].col };
1164           thissad =
1165               vfp->sdf(what->buf, what->stride,
1166                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
1167           CHECK_BETTER
1168         }
1169       } else {
1170         for (i = 0; i < num_candidates[t]; i++) {
1171           const MV this_mv = { br + candidates[t][i].row,
1172                                bc + candidates[t][i].col };
1173           if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1174           thissad =
1175               vfp->sdf(what->buf, what->stride,
1176                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
1177           CHECK_BETTER
1178         }
1179       }
1180       if (best_site == -1) {
1181         continue;
1182       } else {
1183         best_init_s = t;
1184         k = best_site;
1185       }
1186     }
1187     if (best_init_s != -1) {
1188       br += candidates[best_init_s][k].row;
1189       bc += candidates[best_init_s][k].col;
1190     }
1191   }
1192 
1193   // If the center point is still the best, just skip this and move to
1194   // the refinement step.
1195   if (best_init_s != -1) {
1196     const int last_s = (last_is_4 && cost_list != NULL);
1197     int best_site = -1;
1198     s = best_init_s;
1199 
1200     for (; s >= last_s; s--) {
1201       // No need to search all points the 1st time if initial search was used
1202       if (!do_init_search || s != best_init_s) {
1203         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1204           for (i = 0; i < num_candidates[s]; i++) {
1205             const MV this_mv = { br + candidates[s][i].row,
1206                                  bc + candidates[s][i].col };
1207             thissad =
1208                 vfp->sdf(what->buf, what->stride,
1209                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1210             CHECK_BETTER
1211           }
1212         } else {
1213           for (i = 0; i < num_candidates[s]; i++) {
1214             const MV this_mv = { br + candidates[s][i].row,
1215                                  bc + candidates[s][i].col };
1216             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1217             thissad =
1218                 vfp->sdf(what->buf, what->stride,
1219                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1220             CHECK_BETTER
1221           }
1222         }
1223 
1224         if (best_site == -1) {
1225           continue;
1226         } else {
1227           br += candidates[s][best_site].row;
1228           bc += candidates[s][best_site].col;
1229           k = best_site;
1230         }
1231       }
1232 
1233       do {
1234         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1235         best_site = -1;
1236         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1237         next_chkpts_indices[1] = k;
1238         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1239 
1240         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1241           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1242             const MV this_mv = {
1243               br + candidates[s][next_chkpts_indices[i]].row,
1244               bc + candidates[s][next_chkpts_indices[i]].col
1245             };
1246             thissad =
1247                 vfp->sdf(what->buf, what->stride,
1248                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1249             CHECK_BETTER
1250           }
1251         } else {
1252           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1253             const MV this_mv = {
1254               br + candidates[s][next_chkpts_indices[i]].row,
1255               bc + candidates[s][next_chkpts_indices[i]].col
1256             };
1257             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1258             thissad =
1259                 vfp->sdf(what->buf, what->stride,
1260                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1261             CHECK_BETTER
1262           }
1263         }
1264 
1265         if (best_site != -1) {
1266           k = next_chkpts_indices[best_site];
1267           br += candidates[s][k].row;
1268           bc += candidates[s][k].col;
1269         }
1270       } while (best_site != -1);
1271     }
1272 
1273     // Note: If we enter the if below, then cost_list must be non-NULL.
1274     if (s == 0) {
1275       cost_list[0] = bestsad;
1276       if (!do_init_search || s != best_init_s) {
1277         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1278           for (i = 0; i < num_candidates[s]; i++) {
1279             const MV this_mv = { br + candidates[s][i].row,
1280                                  bc + candidates[s][i].col };
1281             cost_list[i + 1] = thissad =
1282                 vfp->sdf(what->buf, what->stride,
1283                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1284             CHECK_BETTER
1285           }
1286         } else {
1287           for (i = 0; i < num_candidates[s]; i++) {
1288             const MV this_mv = { br + candidates[s][i].row,
1289                                  bc + candidates[s][i].col };
1290             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1291             cost_list[i + 1] = thissad =
1292                 vfp->sdf(what->buf, what->stride,
1293                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1294             CHECK_BETTER
1295           }
1296         }
1297 
1298         if (best_site != -1) {
1299           br += candidates[s][best_site].row;
1300           bc += candidates[s][best_site].col;
1301           k = best_site;
1302         }
1303       }
1304       while (best_site != -1) {
1305         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1306         best_site = -1;
1307         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1308         next_chkpts_indices[1] = k;
1309         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1310         cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1311         cost_list[((k + 2) % 4) + 1] = cost_list[0];
1312         cost_list[0] = bestsad;
1313 
1314         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1315           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1316             const MV this_mv = {
1317               br + candidates[s][next_chkpts_indices[i]].row,
1318               bc + candidates[s][next_chkpts_indices[i]].col
1319             };
1320             cost_list[next_chkpts_indices[i] + 1] = thissad =
1321                 vfp->sdf(what->buf, what->stride,
1322                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1323             CHECK_BETTER
1324           }
1325         } else {
1326           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1327             const MV this_mv = {
1328               br + candidates[s][next_chkpts_indices[i]].row,
1329               bc + candidates[s][next_chkpts_indices[i]].col
1330             };
1331             if (!is_mv_in(&x->mv_limits, &this_mv)) {
1332               cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1333               continue;
1334             }
1335             cost_list[next_chkpts_indices[i] + 1] = thissad =
1336                 vfp->sdf(what->buf, what->stride,
1337                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1338             CHECK_BETTER
1339           }
1340         }
1341 
1342         if (best_site != -1) {
1343           k = next_chkpts_indices[best_site];
1344           br += candidates[s][k].row;
1345           bc += candidates[s][k].col;
1346         }
1347       }
1348     }
1349   }
1350 
1351   // Returns the one-away integer pel cost/sad around the best as follows:
1352   // cost_list[0]: cost/sad at the best integer pel
1353   // cost_list[1]: cost/sad at delta {0, -1} (left)   from the best integer pel
1354   // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
1355   // cost_list[3]: cost/sad at delta { 0, 1} (right)  from the best integer pel
1356   // cost_list[4]: cost/sad at delta {-1, 0} (top)    from the best integer pel
1357   if (cost_list) {
1358     const MV best_int_mv = { br, bc };
1359     if (last_is_4) {
1360       calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
1361                         use_mvcost, bestsad);
1362     } else {
1363       calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
1364                          cost_list);
1365     }
1366   }
1367   x->best_mv.as_mv.row = br;
1368   x->best_mv.as_mv.col = bc;
1369   return bestsad;
1370 }
1371 
av1_get_mvpred_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const aom_variance_fn_ptr_t * vfp,int use_mvcost)1372 int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
1373                        const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
1374                        int use_mvcost) {
1375   const MACROBLOCKD *const xd = &x->e_mbd;
1376   const struct buf_2d *const what = &x->plane[0].src;
1377   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1378   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1379   unsigned int unused;
1380 
1381   return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1382                  in_what->stride, &unused) +
1383          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
1384                                    x->mv_cost_stack, x->errorperbit)
1385                      : 0);
1386 }
1387 
av1_get_mvpred_av_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const uint8_t * second_pred,const aom_variance_fn_ptr_t * vfp,int use_mvcost)1388 int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
1389                           const MV *center_mv, const uint8_t *second_pred,
1390                           const aom_variance_fn_ptr_t *vfp, int use_mvcost) {
1391   const MACROBLOCKD *const xd = &x->e_mbd;
1392   const struct buf_2d *const what = &x->plane[0].src;
1393   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1394   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1395   unsigned int unused;
1396 
1397   return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
1398                    what->buf, what->stride, &unused, second_pred) +
1399          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
1400                                    x->mv_cost_stack, x->errorperbit)
1401                      : 0);
1402 }
1403 
av1_get_mvpred_mask_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,const aom_variance_fn_ptr_t * vfp,int use_mvcost)1404 int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
1405                             const MV *center_mv, const uint8_t *second_pred,
1406                             const uint8_t *mask, int mask_stride,
1407                             int invert_mask, const aom_variance_fn_ptr_t *vfp,
1408                             int use_mvcost) {
1409   const MACROBLOCKD *const xd = &x->e_mbd;
1410   const struct buf_2d *const what = &x->plane[0].src;
1411   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1412   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1413   unsigned int unused;
1414 
1415   return vfp->msvf(what->buf, what->stride, 0, 0,
1416                    get_buf_from_mv(in_what, best_mv), in_what->stride,
1417                    second_pred, mask, mask_stride, invert_mask, &unused) +
1418          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
1419                                    x->mv_cost_stack, x->errorperbit)
1420                      : 0);
1421 }
1422 
av1_hex_search(MACROBLOCK * x,MV * start_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const aom_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv)1423 int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
1424                    int sad_per_bit, int do_init_search, int *cost_list,
1425                    const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1426                    const MV *center_mv) {
1427   // First scale has 8-closest points, the rest have 6 points in hex shape
1428   // at increasing scales
1429   static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
1430                                                               6, 6, 6, 6, 6 };
1431   // Note that the largest candidate step at each scale is 2^scale
1432   /* clang-format off */
1433   static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1434     { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
1435       { -1, 0 } },
1436     { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
1437     { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
1438     { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
1439     { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
1440     { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
1441       { -32, 0 } },
1442     { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
1443       { -64, 0 } },
1444     { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
1445       { -128, 0 } },
1446     { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
1447       { -256, 0 } },
1448     { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
1449       { -512, 0 } },
1450     { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
1451       { -512, 1024 }, { -1024, 0 } },
1452   };
1453   /* clang-format on */
1454   return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1455                         cost_list, vfp, use_mvcost, center_mv,
1456                         hex_num_candidates, hex_candidates);
1457 }
1458 
bigdia_search(MACROBLOCK * x,MV * start_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const aom_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv)1459 static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param,
1460                          int sad_per_bit, int do_init_search, int *cost_list,
1461                          const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1462                          const MV *center_mv) {
1463   // First scale has 4-closest points, the rest have 8 points in diamond
1464   // shape at increasing scales
1465   static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
1466     4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1467   };
1468   // Note that the largest candidate step at each scale is 2^scale
1469   /* clang-format off */
1470   static const MV
1471       bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1472         { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
1473         { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
1474           { -1, 1 }, { -2, 0 } },
1475         { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
1476           { -2, 2 }, { -4, 0 } },
1477         { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
1478           { -4, 4 }, { -8, 0 } },
1479         { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
1480           { -8, 8 }, { -16, 0 } },
1481         { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
1482           { 0, 32 }, { -16, 16 }, { -32, 0 } },
1483         { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
1484           { 0, 64 }, { -32, 32 }, { -64, 0 } },
1485         { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
1486           { 0, 128 }, { -64, 64 }, { -128, 0 } },
1487         { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
1488           { 0, 256 }, { -128, 128 }, { -256, 0 } },
1489         { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
1490           { 0, 512 }, { -256, 256 }, { -512, 0 } },
1491         { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
1492           { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
1493       };
1494   /* clang-format on */
1495   return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1496                         cost_list, vfp, use_mvcost, center_mv,
1497                         bigdia_num_candidates, bigdia_candidates);
1498 }
1499 
square_search(MACROBLOCK * x,MV * start_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const aom_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv)1500 static int square_search(MACROBLOCK *x, MV *start_mv, int search_param,
1501                          int sad_per_bit, int do_init_search, int *cost_list,
1502                          const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1503                          const MV *center_mv) {
1504   // All scales have 8 closest points in square shape
1505   static const int square_num_candidates[MAX_PATTERN_SCALES] = {
1506     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1507   };
1508   // Note that the largest candidate step at each scale is 2^scale
1509   /* clang-format off */
1510   static const MV
1511       square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1512         { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
1513           { -1, 1 }, { -1, 0 } },
1514         { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
1515           { -2, 2 }, { -2, 0 } },
1516         { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
1517           { -4, 4 }, { -4, 0 } },
1518         { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
1519           { -8, 8 }, { -8, 0 } },
1520         { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
1521           { 0, 16 }, { -16, 16 }, { -16, 0 } },
1522         { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
1523           { 0, 32 }, { -32, 32 }, { -32, 0 } },
1524         { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
1525           { 0, 64 }, { -64, 64 }, { -64, 0 } },
1526         { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
1527           { 0, 128 }, { -128, 128 }, { -128, 0 } },
1528         { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
1529           { 0, 256 }, { -256, 256 }, { -256, 0 } },
1530         { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
1531           { 0, 512 }, { -512, 512 }, { -512, 0 } },
1532         { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
1533           { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
1534       };
1535   /* clang-format on */
1536   return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1537                         cost_list, vfp, use_mvcost, center_mv,
1538                         square_num_candidates, square_candidates);
1539 }
1540 
fast_hex_search(MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const aom_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv)1541 static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param,
1542                            int sad_per_bit,
1543                            int do_init_search,  // must be zero for fast_hex
1544                            int *cost_list, const aom_variance_fn_ptr_t *vfp,
1545                            int use_mvcost, const MV *center_mv) {
1546   return av1_hex_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1547                         sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1548                         center_mv);
1549 }
1550 
fast_dia_search(MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const aom_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv)1551 static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param,
1552                            int sad_per_bit, int do_init_search, int *cost_list,
1553                            const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1554                            const MV *center_mv) {
1555   return bigdia_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1556                        sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1557                        center_mv);
1558 }
1559 
1560 #undef CHECK_BETTER
1561 
1562 // Exhuastive motion search around a given centre position with a given
1563 // step size.
exhuastive_mesh_search(MACROBLOCK * x,MV * ref_mv,MV * best_mv,int range,int step,int sad_per_bit,const aom_variance_fn_ptr_t * fn_ptr,const MV * center_mv)1564 static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
1565                                   int range, int step, int sad_per_bit,
1566                                   const aom_variance_fn_ptr_t *fn_ptr,
1567                                   const MV *center_mv) {
1568   const MACROBLOCKD *const xd = &x->e_mbd;
1569   const struct buf_2d *const what = &x->plane[0].src;
1570   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1571   MV fcenter_mv = { center_mv->row, center_mv->col };
1572   unsigned int best_sad = INT_MAX;
1573   int r, c, i;
1574   int start_col, end_col, start_row, end_row;
1575   int col_step = (step > 1) ? step : 4;
1576 
1577   assert(step >= 1);
1578 
1579   clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1580            x->mv_limits.row_min, x->mv_limits.row_max);
1581   *best_mv = fcenter_mv;
1582   best_sad =
1583       fn_ptr->sdf(what->buf, what->stride,
1584                   get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
1585       mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
1586   start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
1587   start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
1588   end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row);
1589   end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col);
1590 
1591   for (r = start_row; r <= end_row; r += step) {
1592     for (c = start_col; c <= end_col; c += col_step) {
1593       // Step > 1 means we are not checking every location in this pass.
1594       if (step > 1) {
1595         const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
1596         unsigned int sad =
1597             fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
1598                         in_what->stride);
1599         if (sad < best_sad) {
1600           sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1601           if (sad < best_sad) {
1602             best_sad = sad;
1603             x->second_best_mv.as_mv = *best_mv;
1604             *best_mv = mv;
1605           }
1606         }
1607       } else {
1608         // 4 sads in a single call if we are checking every location
1609         if (c + 3 <= end_col) {
1610           unsigned int sads[4];
1611           const uint8_t *addrs[4];
1612           for (i = 0; i < 4; ++i) {
1613             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1614             addrs[i] = get_buf_from_mv(in_what, &mv);
1615           }
1616           fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
1617 
1618           for (i = 0; i < 4; ++i) {
1619             if (sads[i] < best_sad) {
1620               const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1621               const unsigned int sad =
1622                   sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1623               if (sad < best_sad) {
1624                 best_sad = sad;
1625                 x->second_best_mv.as_mv = *best_mv;
1626                 *best_mv = mv;
1627               }
1628             }
1629           }
1630         } else {
1631           for (i = 0; i < end_col - c; ++i) {
1632             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1633             unsigned int sad =
1634                 fn_ptr->sdf(what->buf, what->stride,
1635                             get_buf_from_mv(in_what, &mv), in_what->stride);
1636             if (sad < best_sad) {
1637               sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1638               if (sad < best_sad) {
1639                 best_sad = sad;
1640                 x->second_best_mv.as_mv = *best_mv;
1641                 *best_mv = mv;
1642               }
1643             }
1644           }
1645         }
1646       }
1647     }
1648   }
1649 
1650   return best_sad;
1651 }
1652 
av1_diamond_search_sad_c(MACROBLOCK * x,const search_site_config * cfg,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const aom_variance_fn_ptr_t * fn_ptr,const MV * center_mv)1653 int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg,
1654                              MV *ref_mv, MV *best_mv, int search_param,
1655                              int sad_per_bit, int *num00,
1656                              const aom_variance_fn_ptr_t *fn_ptr,
1657                              const MV *center_mv) {
1658   int i, j, step;
1659 
1660   const MACROBLOCKD *const xd = &x->e_mbd;
1661   uint8_t *what = x->plane[0].src.buf;
1662   const int what_stride = x->plane[0].src.stride;
1663   const uint8_t *in_what;
1664   const int in_what_stride = xd->plane[0].pre[0].stride;
1665   const uint8_t *best_address;
1666 
1667   unsigned int bestsad = INT_MAX;
1668   int best_site = 0;
1669   int last_site = 0;
1670 
1671   int ref_row;
1672   int ref_col;
1673 
1674   // search_param determines the length of the initial step and hence the number
1675   // of iterations.
1676   // 0 = initial step (MAX_FIRST_STEP) pel
1677   // 1 = (MAX_FIRST_STEP/2) pel,
1678   // 2 = (MAX_FIRST_STEP/4) pel...
1679   const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
1680   const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
1681 
1682   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1683   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1684            x->mv_limits.row_min, x->mv_limits.row_max);
1685   ref_row = ref_mv->row;
1686   ref_col = ref_mv->col;
1687   *num00 = 0;
1688   best_mv->row = ref_row;
1689   best_mv->col = ref_col;
1690 
1691   // Work out the start point for the search
1692   in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1693   best_address = in_what;
1694 
1695   // Check the starting position
1696   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1697             mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
1698 
1699   i = 1;
1700 
1701   for (step = 0; step < tot_steps; step++) {
1702     int all_in = 1, t;
1703 
1704     // All_in is true if every one of the points we are checking are within
1705     // the bounds of the image.
1706     all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min);
1707     all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max);
1708     all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min);
1709     all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max);
1710 
1711     // If all the pixels are within the bounds we don't check whether the
1712     // search point is valid in this loop,  otherwise we check each point
1713     // for validity..
1714     if (all_in) {
1715       unsigned int sad_array[4];
1716 
1717       for (j = 0; j < cfg->searches_per_step; j += 4) {
1718         unsigned char const *block_offset[4];
1719 
1720         for (t = 0; t < 4; t++)
1721           block_offset[t] = ss[i + t].offset + best_address;
1722 
1723         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1724                        sad_array);
1725 
1726         for (t = 0; t < 4; t++, i++) {
1727           if (sad_array[t] < bestsad) {
1728             const MV this_mv = { best_mv->row + ss[i].mv.row,
1729                                  best_mv->col + ss[i].mv.col };
1730             sad_array[t] +=
1731                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1732             if (sad_array[t] < bestsad) {
1733               bestsad = sad_array[t];
1734               best_site = i;
1735             }
1736           }
1737         }
1738       }
1739     } else {
1740       for (j = 0; j < cfg->searches_per_step; j++) {
1741         // Trap illegal vectors
1742         const MV this_mv = { best_mv->row + ss[i].mv.row,
1743                              best_mv->col + ss[i].mv.col };
1744 
1745         if (is_mv_in(&x->mv_limits, &this_mv)) {
1746           const uint8_t *const check_here = ss[i].offset + best_address;
1747           unsigned int thissad =
1748               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1749 
1750           if (thissad < bestsad) {
1751             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1752             if (thissad < bestsad) {
1753               bestsad = thissad;
1754               best_site = i;
1755             }
1756           }
1757         }
1758         i++;
1759       }
1760     }
1761     if (best_site != last_site) {
1762       x->second_best_mv.as_mv = *best_mv;
1763       best_mv->row += ss[best_site].mv.row;
1764       best_mv->col += ss[best_site].mv.col;
1765       best_address += ss[best_site].offset;
1766       last_site = best_site;
1767 #if defined(NEW_DIAMOND_SEARCH)
1768       while (1) {
1769         const MV this_mv = { best_mv->row + ss[best_site].mv.row,
1770                              best_mv->col + ss[best_site].mv.col };
1771         if (is_mv_in(&x->mv_limits, &this_mv)) {
1772           const uint8_t *const check_here = ss[best_site].offset + best_address;
1773           unsigned int thissad =
1774               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1775           if (thissad < bestsad) {
1776             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1777             if (thissad < bestsad) {
1778               bestsad = thissad;
1779               best_mv->row += ss[best_site].mv.row;
1780               best_mv->col += ss[best_site].mv.col;
1781               best_address += ss[best_site].offset;
1782               continue;
1783             }
1784           }
1785         }
1786         break;
1787       }
1788 #endif
1789     } else if (best_address == in_what) {
1790       (*num00)++;
1791     }
1792   }
1793   return bestsad;
1794 }
1795 
1796 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
1797               point as the best match, we will do a final 1-away diamond
1798               refining search  */
full_pixel_diamond(const AV1_COMP * const cpi,MACROBLOCK * x,MV * mvp_full,int step_param,int sadpb,int further_steps,int do_refine,int * cost_list,const aom_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,const search_site_config * cfg)1799 static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
1800                               MV *mvp_full, int step_param, int sadpb,
1801                               int further_steps, int do_refine, int *cost_list,
1802                               const aom_variance_fn_ptr_t *fn_ptr,
1803                               const MV *ref_mv, const search_site_config *cfg) {
1804   MV temp_mv;
1805   int thissme, n, num00 = 0;
1806   int bestsme = cpi->diamond_search_sad(x, cfg, mvp_full, &temp_mv, step_param,
1807                                         sadpb, &n, fn_ptr, ref_mv);
1808   if (bestsme < INT_MAX)
1809     bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1810   x->best_mv.as_mv = temp_mv;
1811 
1812   // If there won't be more n-step search, check to see if refining search is
1813   // needed.
1814   if (n > further_steps) do_refine = 0;
1815 
1816   while (n < further_steps) {
1817     ++n;
1818 
1819     if (num00) {
1820       num00--;
1821     } else {
1822       thissme =
1823           cpi->diamond_search_sad(x, cfg, mvp_full, &temp_mv, step_param + n,
1824                                   sadpb, &num00, fn_ptr, ref_mv);
1825       if (thissme < INT_MAX)
1826         thissme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1827 
1828       // check to see if refining search is needed.
1829       if (num00 > further_steps - n) do_refine = 0;
1830 
1831       if (thissme < bestsme) {
1832         bestsme = thissme;
1833         x->best_mv.as_mv = temp_mv;
1834       }
1835     }
1836   }
1837 
1838   // final 1-away diamond refining search
1839   if (do_refine) {
1840     const int search_range = 8;
1841     MV best_mv = x->best_mv.as_mv;
1842     thissme = av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
1843                                       ref_mv);
1844     if (thissme < INT_MAX)
1845       thissme = av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
1846     if (thissme < bestsme) {
1847       bestsme = thissme;
1848       x->best_mv.as_mv = best_mv;
1849     }
1850   }
1851 
1852   // Return cost list.
1853   if (cost_list) {
1854     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
1855   }
1856   return bestsme;
1857 }
1858 
1859 #define MIN_RANGE 7
1860 #define MAX_RANGE 256
1861 #define MIN_INTERVAL 1
1862 // Runs an limited range exhaustive mesh search using a pattern set
1863 // according to the encode speed profile.
full_pixel_exhaustive(const AV1_COMP * const cpi,MACROBLOCK * x,const MV * centre_mv_full,int sadpb,int * cost_list,const aom_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,MV * dst_mv)1864 static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
1865                                  const MV *centre_mv_full, int sadpb,
1866                                  int *cost_list,
1867                                  const aom_variance_fn_ptr_t *fn_ptr,
1868                                  const MV *ref_mv, MV *dst_mv) {
1869   const SPEED_FEATURES *const sf = &cpi->sf;
1870   MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
1871   MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1872   int bestsme;
1873   int i;
1874   int interval = sf->mesh_patterns[0].interval;
1875   int range = sf->mesh_patterns[0].range;
1876   int baseline_interval_divisor;
1877 
1878   // Keep track of number of exhaustive calls (this frame in this thread).
1879   if (x->ex_search_count_ptr != NULL) ++(*x->ex_search_count_ptr);
1880 
1881   // Trap illegal values for interval and range for this function.
1882   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
1883       (interval > range))
1884     return INT_MAX;
1885 
1886   baseline_interval_divisor = range / interval;
1887 
1888   // Check size of proposed first range against magnitude of the centre
1889   // value used as a starting point.
1890   range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
1891   range = AOMMIN(range, MAX_RANGE);
1892   interval = AOMMAX(interval, range / baseline_interval_divisor);
1893 
1894   // initial search
1895   bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
1896                                    sadpb, fn_ptr, &temp_mv);
1897 
1898   if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
1899     // Progressive searches with range and step size decreasing each time
1900     // till we reach a step size of 1. Then break out.
1901     for (i = 1; i < MAX_MESH_STEP; ++i) {
1902       // First pass with coarser step and longer range
1903       bestsme = exhuastive_mesh_search(
1904           x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
1905           sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
1906 
1907       if (sf->mesh_patterns[i].interval == 1) break;
1908     }
1909   }
1910 
1911   if (bestsme < INT_MAX)
1912     bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1913   *dst_mv = temp_mv;
1914 
1915   // Return cost list.
1916   if (cost_list) {
1917     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
1918   }
1919   return bestsme;
1920 }
1921 
av1_refining_search_sad(MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const aom_variance_fn_ptr_t * fn_ptr,const MV * center_mv)1922 int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
1923                             int search_range,
1924                             const aom_variance_fn_ptr_t *fn_ptr,
1925                             const MV *center_mv) {
1926   const MACROBLOCKD *const xd = &x->e_mbd;
1927   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1928   const struct buf_2d *const what = &x->plane[0].src;
1929   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1930   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1931   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
1932   unsigned int best_sad =
1933       fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
1934       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
1935   int i, j;
1936 
1937   for (i = 0; i < search_range; i++) {
1938     int best_site = -1;
1939     const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
1940                        ((ref_mv->row + 1) < x->mv_limits.row_max) &
1941                        ((ref_mv->col - 1) > x->mv_limits.col_min) &
1942                        ((ref_mv->col + 1) < x->mv_limits.col_max);
1943 
1944     if (all_in) {
1945       unsigned int sads[4];
1946       const uint8_t *const positions[4] = { best_address - in_what->stride,
1947                                             best_address - 1, best_address + 1,
1948                                             best_address + in_what->stride };
1949 
1950       fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
1951 
1952       for (j = 0; j < 4; ++j) {
1953         if (sads[j] < best_sad) {
1954           const MV mv = { ref_mv->row + neighbors[j].row,
1955                           ref_mv->col + neighbors[j].col };
1956           sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
1957           if (sads[j] < best_sad) {
1958             best_sad = sads[j];
1959             best_site = j;
1960           }
1961         }
1962       }
1963     } else {
1964       for (j = 0; j < 4; ++j) {
1965         const MV mv = { ref_mv->row + neighbors[j].row,
1966                         ref_mv->col + neighbors[j].col };
1967 
1968         if (is_mv_in(&x->mv_limits, &mv)) {
1969           unsigned int sad =
1970               fn_ptr->sdf(what->buf, what->stride,
1971                           get_buf_from_mv(in_what, &mv), in_what->stride);
1972           if (sad < best_sad) {
1973             sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
1974             if (sad < best_sad) {
1975               best_sad = sad;
1976               best_site = j;
1977             }
1978           }
1979         }
1980       }
1981     }
1982 
1983     if (best_site == -1) {
1984       break;
1985     } else {
1986       x->second_best_mv.as_mv = *ref_mv;
1987       ref_mv->row += neighbors[best_site].row;
1988       ref_mv->col += neighbors[best_site].col;
1989       best_address = get_buf_from_mv(in_what, ref_mv);
1990     }
1991   }
1992 
1993   return best_sad;
1994 }
1995 
1996 // This function is called when we do joint motion search in comp_inter_inter
1997 // mode, or when searching for one component of an ext-inter compound mode.
av1_refining_search_8p_c(MACROBLOCK * x,int error_per_bit,int search_range,const aom_variance_fn_ptr_t * fn_ptr,const uint8_t * mask,int mask_stride,int invert_mask,const MV * center_mv,const uint8_t * second_pred)1998 int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
1999                              const aom_variance_fn_ptr_t *fn_ptr,
2000                              const uint8_t *mask, int mask_stride,
2001                              int invert_mask, const MV *center_mv,
2002                              const uint8_t *second_pred) {
2003   static const search_neighbors neighbors[8] = {
2004     { { -1, 0 }, -1 * SEARCH_GRID_STRIDE_8P + 0 },
2005     { { 0, -1 }, 0 * SEARCH_GRID_STRIDE_8P - 1 },
2006     { { 0, 1 }, 0 * SEARCH_GRID_STRIDE_8P + 1 },
2007     { { 1, 0 }, 1 * SEARCH_GRID_STRIDE_8P + 0 },
2008     { { -1, -1 }, -1 * SEARCH_GRID_STRIDE_8P - 1 },
2009     { { 1, -1 }, 1 * SEARCH_GRID_STRIDE_8P - 1 },
2010     { { -1, 1 }, -1 * SEARCH_GRID_STRIDE_8P + 1 },
2011     { { 1, 1 }, 1 * SEARCH_GRID_STRIDE_8P + 1 }
2012   };
2013   const MACROBLOCKD *const xd = &x->e_mbd;
2014   const struct buf_2d *const what = &x->plane[0].src;
2015   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2016   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2017   MV *best_mv = &x->best_mv.as_mv;
2018   unsigned int best_sad = INT_MAX;
2019   int i, j;
2020   uint8_t do_refine_search_grid[SEARCH_GRID_STRIDE_8P * SEARCH_GRID_STRIDE_8P] =
2021       { 0 };
2022   int grid_center = SEARCH_GRID_CENTER_8P;
2023   int grid_coord = grid_center;
2024 
2025   clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2026            x->mv_limits.row_min, x->mv_limits.row_max);
2027   if (mask) {
2028     best_sad = fn_ptr->msdf(what->buf, what->stride,
2029                             get_buf_from_mv(in_what, best_mv), in_what->stride,
2030                             second_pred, mask, mask_stride, invert_mask) +
2031                mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
2032   } else {
2033     best_sad =
2034         fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
2035                      in_what->stride, second_pred) +
2036         mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
2037   }
2038 
2039   do_refine_search_grid[grid_coord] = 1;
2040 
2041   for (i = 0; i < search_range; ++i) {
2042     int best_site = -1;
2043 
2044     for (j = 0; j < 8; ++j) {
2045       grid_coord = grid_center + neighbors[j].coord_offset;
2046       if (do_refine_search_grid[grid_coord] == 1) {
2047         continue;
2048       }
2049       const MV mv = { best_mv->row + neighbors[j].coord.row,
2050                       best_mv->col + neighbors[j].coord.col };
2051 
2052       do_refine_search_grid[grid_coord] = 1;
2053       if (is_mv_in(&x->mv_limits, &mv)) {
2054         unsigned int sad;
2055         if (mask) {
2056           sad = fn_ptr->msdf(what->buf, what->stride,
2057                              get_buf_from_mv(in_what, &mv), in_what->stride,
2058                              second_pred, mask, mask_stride, invert_mask);
2059         } else {
2060           sad = fn_ptr->sdaf(what->buf, what->stride,
2061                              get_buf_from_mv(in_what, &mv), in_what->stride,
2062                              second_pred);
2063         }
2064         if (sad < best_sad) {
2065           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2066           if (sad < best_sad) {
2067             best_sad = sad;
2068             best_site = j;
2069           }
2070         }
2071       }
2072     }
2073 
2074     if (best_site == -1) {
2075       break;
2076     } else {
2077       best_mv->row += neighbors[best_site].coord.row;
2078       best_mv->col += neighbors[best_site].coord.col;
2079       grid_center += neighbors[best_site].coord_offset;
2080     }
2081   }
2082   return best_sad;
2083 }
2084 
2085 #define MIN_EX_SEARCH_LIMIT 128
is_exhaustive_allowed(const AV1_COMP * const cpi,MACROBLOCK * x)2086 static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
2087   const SPEED_FEATURES *const sf = &cpi->sf;
2088   int is_allowed = sf->allow_exhaustive_searches &&
2089                    (sf->exhaustive_searches_thresh < INT_MAX) &&
2090                    !cpi->rc.is_src_frame_alt_ref;
2091   if (x->m_search_count_ptr != NULL && x->ex_search_count_ptr != NULL) {
2092     const int max_ex =
2093         AOMMAX(MIN_EX_SEARCH_LIMIT,
2094                (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
2095     is_allowed = *x->ex_search_count_ptr <= max_ex && is_allowed;
2096   }
2097   return is_allowed;
2098 }
2099 
vector_match(int16_t * ref,int16_t * src,int bwl)2100 static int vector_match(int16_t *ref, int16_t *src, int bwl) {
2101   int best_sad = INT_MAX;
2102   int this_sad;
2103   int d;
2104   int center, offset = 0;
2105   int bw = 4 << bwl;  // redundant variable, to be changed in the experiments.
2106   for (d = 0; d <= bw; d += 16) {
2107     this_sad = aom_vector_var(&ref[d], src, bwl);
2108     if (this_sad < best_sad) {
2109       best_sad = this_sad;
2110       offset = d;
2111     }
2112   }
2113   center = offset;
2114 
2115   for (d = -8; d <= 8; d += 16) {
2116     int this_pos = offset + d;
2117     // check limit
2118     if (this_pos < 0 || this_pos > bw) continue;
2119     this_sad = aom_vector_var(&ref[this_pos], src, bwl);
2120     if (this_sad < best_sad) {
2121       best_sad = this_sad;
2122       center = this_pos;
2123     }
2124   }
2125   offset = center;
2126 
2127   for (d = -4; d <= 4; d += 8) {
2128     int this_pos = offset + d;
2129     // check limit
2130     if (this_pos < 0 || this_pos > bw) continue;
2131     this_sad = aom_vector_var(&ref[this_pos], src, bwl);
2132     if (this_sad < best_sad) {
2133       best_sad = this_sad;
2134       center = this_pos;
2135     }
2136   }
2137   offset = center;
2138 
2139   for (d = -2; d <= 2; d += 4) {
2140     int this_pos = offset + d;
2141     // check limit
2142     if (this_pos < 0 || this_pos > bw) continue;
2143     this_sad = aom_vector_var(&ref[this_pos], src, bwl);
2144     if (this_sad < best_sad) {
2145       best_sad = this_sad;
2146       center = this_pos;
2147     }
2148   }
2149   offset = center;
2150 
2151   for (d = -1; d <= 1; d += 2) {
2152     int this_pos = offset + d;
2153     // check limit
2154     if (this_pos < 0 || this_pos > bw) continue;
2155     this_sad = aom_vector_var(&ref[this_pos], src, bwl);
2156     if (this_sad < best_sad) {
2157       best_sad = this_sad;
2158       center = this_pos;
2159     }
2160   }
2161 
2162   return (center - (bw >> 1));
2163 }
2164 
2165 static const MV search_pos[4] = {
2166   { -1, 0 },
2167   { 0, -1 },
2168   { 0, 1 },
2169   { 1, 0 },
2170 };
2171 
av1_int_pro_motion_estimation(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,const MV * ref_mv)2172 unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
2173                                            BLOCK_SIZE bsize, int mi_row,
2174                                            int mi_col, const MV *ref_mv) {
2175   MACROBLOCKD *xd = &x->e_mbd;
2176   MB_MODE_INFO *mi = xd->mi[0];
2177   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
2178   DECLARE_ALIGNED(16, int16_t, hbuf[256]);
2179   DECLARE_ALIGNED(16, int16_t, vbuf[256]);
2180   DECLARE_ALIGNED(16, int16_t, src_hbuf[128]);
2181   DECLARE_ALIGNED(16, int16_t, src_vbuf[128]);
2182   int idx;
2183   const int bw = 4 << mi_size_wide_log2[bsize];
2184   const int bh = 4 << mi_size_high_log2[bsize];
2185   const int search_width = bw << 1;
2186   const int search_height = bh << 1;
2187   const int src_stride = x->plane[0].src.stride;
2188   const int ref_stride = xd->plane[0].pre[0].stride;
2189   uint8_t const *ref_buf, *src_buf;
2190   MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
2191   unsigned int best_sad, tmp_sad, this_sad[4];
2192   MV this_mv;
2193   const int norm_factor = 3 + (bw >> 5);
2194   const YV12_BUFFER_CONFIG *scaled_ref_frame =
2195       av1_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
2196   MvLimits subpel_mv_limits;
2197 
2198   if (scaled_ref_frame) {
2199     int i;
2200     // Swap out the reference frame for a version that's been scaled to
2201     // match the resolution of the current frame, allowing the existing
2202     // motion search code to be used without additional modifications.
2203     for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
2204     av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
2205                          MAX_MB_PLANE);
2206   }
2207 
2208   if (xd->bd != 8) {
2209     unsigned int sad;
2210     tmp_mv->row = 0;
2211     tmp_mv->col = 0;
2212     sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
2213                                  xd->plane[0].pre[0].buf, ref_stride);
2214 
2215     if (scaled_ref_frame) {
2216       int i;
2217       for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2218     }
2219     return sad;
2220   }
2221 
2222   // Set up prediction 1-D reference set
2223   ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
2224   for (idx = 0; idx < search_width; idx += 16) {
2225     aom_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
2226     ref_buf += 16;
2227   }
2228 
2229   ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
2230   for (idx = 0; idx < search_height; ++idx) {
2231     vbuf[idx] = aom_int_pro_col(ref_buf, bw) >> norm_factor;
2232     ref_buf += ref_stride;
2233   }
2234 
2235   // Set up src 1-D reference set
2236   for (idx = 0; idx < bw; idx += 16) {
2237     src_buf = x->plane[0].src.buf + idx;
2238     aom_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
2239   }
2240 
2241   src_buf = x->plane[0].src.buf;
2242   for (idx = 0; idx < bh; ++idx) {
2243     src_vbuf[idx] = aom_int_pro_col(src_buf, bw) >> norm_factor;
2244     src_buf += src_stride;
2245   }
2246 
2247   // Find the best match per 1-D search
2248   tmp_mv->col = vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize]);
2249   tmp_mv->row = vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize]);
2250 
2251   this_mv = *tmp_mv;
2252   src_buf = x->plane[0].src.buf;
2253   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
2254   best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
2255 
2256   {
2257     const uint8_t *const pos[4] = {
2258       ref_buf - ref_stride,
2259       ref_buf - 1,
2260       ref_buf + 1,
2261       ref_buf + ref_stride,
2262     };
2263 
2264     cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
2265   }
2266 
2267   for (idx = 0; idx < 4; ++idx) {
2268     if (this_sad[idx] < best_sad) {
2269       best_sad = this_sad[idx];
2270       tmp_mv->row = search_pos[idx].row + this_mv.row;
2271       tmp_mv->col = search_pos[idx].col + this_mv.col;
2272     }
2273   }
2274 
2275   if (this_sad[0] < this_sad[3])
2276     this_mv.row -= 1;
2277   else
2278     this_mv.row += 1;
2279 
2280   if (this_sad[1] < this_sad[2])
2281     this_mv.col -= 1;
2282   else
2283     this_mv.col += 1;
2284 
2285   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
2286 
2287   tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
2288   if (best_sad > tmp_sad) {
2289     *tmp_mv = this_mv;
2290     best_sad = tmp_sad;
2291   }
2292 
2293   tmp_mv->row *= 8;
2294   tmp_mv->col *= 8;
2295 
2296   set_subpel_mv_search_range(
2297       &x->mv_limits, &subpel_mv_limits.col_min, &subpel_mv_limits.col_max,
2298       &subpel_mv_limits.row_min, &subpel_mv_limits.row_max, ref_mv);
2299   clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max,
2300            subpel_mv_limits.row_min, subpel_mv_limits.row_max);
2301 
2302   if (scaled_ref_frame) {
2303     int i;
2304     for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2305   }
2306 
2307   return best_sad;
2308 }
2309 
av1_full_pixel_search(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,MV * mvp_full,int step_param,int method,int run_mesh_search,int error_per_bit,int * cost_list,const MV * ref_mv,int var_max,int rd,int x_pos,int y_pos,int intra,const search_site_config * cfg)2310 int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2311                           MV *mvp_full, int step_param, int method,
2312                           int run_mesh_search, int error_per_bit,
2313                           int *cost_list, const MV *ref_mv, int var_max, int rd,
2314                           int x_pos, int y_pos, int intra,
2315                           const search_site_config *cfg) {
2316   const SPEED_FEATURES *const sf = &cpi->sf;
2317   const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
2318   int var = 0;
2319 
2320   if (cost_list) {
2321     cost_list[0] = INT_MAX;
2322     cost_list[1] = INT_MAX;
2323     cost_list[2] = INT_MAX;
2324     cost_list[3] = INT_MAX;
2325     cost_list[4] = INT_MAX;
2326   }
2327 
2328   // Keep track of number of searches (this frame in this thread).
2329   if (x->m_search_count_ptr != NULL) ++(*x->m_search_count_ptr);
2330 
2331   switch (method) {
2332     case FAST_DIAMOND:
2333       var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
2334                             cost_list, fn_ptr, 1, ref_mv);
2335       break;
2336     case FAST_HEX:
2337       var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
2338                             cost_list, fn_ptr, 1, ref_mv);
2339       break;
2340     case HEX:
2341       var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2342                            fn_ptr, 1, ref_mv);
2343       break;
2344     case SQUARE:
2345       var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2346                           fn_ptr, 1, ref_mv);
2347       break;
2348     case BIGDIA:
2349       var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2350                           fn_ptr, 1, ref_mv);
2351       break;
2352     case NSTEP:
2353       var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
2354                                MAX_MVSEARCH_STEPS - 1 - step_param, 1,
2355                                cost_list, fn_ptr, ref_mv, cfg);
2356 
2357       // Should we allow a follow on exhaustive search?
2358       if (is_exhaustive_allowed(cpi, x)) {
2359         int exhuastive_thr = sf->exhaustive_searches_thresh;
2360         exhuastive_thr >>=
2361             10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
2362 
2363         // Threshold variance for an exhaustive full search.
2364         if (var > exhuastive_thr) {
2365           int var_ex;
2366           MV tmp_mv_ex;
2367           var_ex =
2368               full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
2369                                     cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
2370 
2371           if (var_ex < var) {
2372             var = var_ex;
2373             x->best_mv.as_mv = tmp_mv_ex;
2374           }
2375         }
2376       }
2377       break;
2378     default: assert(0 && "Invalid search method.");
2379   }
2380 
2381   // Should we allow a follow on exhaustive search?
2382   if (!run_mesh_search) {
2383     if (method == NSTEP) {
2384       if (is_exhaustive_allowed(cpi, x)) {
2385         int exhuastive_thr = sf->exhaustive_searches_thresh;
2386         exhuastive_thr >>=
2387             10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
2388         // Threshold variance for an exhaustive full search.
2389         if (var > exhuastive_thr) run_mesh_search = 1;
2390       }
2391     }
2392   }
2393 
2394   if (run_mesh_search) {
2395     int var_ex;
2396     MV tmp_mv_ex;
2397     var_ex = full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
2398                                    cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
2399     if (var_ex < var) {
2400       var = var_ex;
2401       x->best_mv.as_mv = tmp_mv_ex;
2402     }
2403   }
2404 
2405   if (method != NSTEP && rd && var < var_max)
2406     var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
2407 
2408   do {
2409     if (!intra || !av1_use_hash_me(&cpi->common)) break;
2410 
2411     // already single ME
2412     // get block size and original buffer of current block
2413     const int block_height = block_size_high[bsize];
2414     const int block_width = block_size_wide[bsize];
2415     if (block_height == block_width && x_pos >= 0 && y_pos >= 0) {
2416       if (block_width == 4 || block_width == 8 || block_width == 16 ||
2417           block_width == 32 || block_width == 64 || block_width == 128) {
2418         uint8_t *what = x->plane[0].src.buf;
2419         const int what_stride = x->plane[0].src.stride;
2420         uint32_t hash_value1, hash_value2;
2421         MV best_hash_mv;
2422         int best_hash_cost = INT_MAX;
2423 
2424         // for the hashMap
2425         hash_table *ref_frame_hash =
2426             intra ? &cpi->common.cur_frame->hash_table
2427                   : av1_get_ref_frame_hash_map(&cpi->common,
2428                                                x->e_mbd.mi[0]->ref_frame[0]);
2429 
2430         av1_get_block_hash_value(what, what_stride, block_width, &hash_value1,
2431                                  &hash_value2, is_cur_buf_hbd(&x->e_mbd), x);
2432 
2433         const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
2434         // for intra, at lest one matching can be found, itself.
2435         if (count <= (intra ? 1 : 0)) {
2436           break;
2437         }
2438 
2439         Iterator iterator =
2440             av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
2441         for (int i = 0; i < count; i++, iterator_increment(&iterator)) {
2442           block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator));
2443           if (hash_value2 == ref_block_hash.hash_value2) {
2444             // For intra, make sure the prediction is from valid area.
2445             if (intra) {
2446               const int mi_col = x_pos / MI_SIZE;
2447               const int mi_row = y_pos / MI_SIZE;
2448               const MV dv = { 8 * (ref_block_hash.y - y_pos),
2449                               8 * (ref_block_hash.x - x_pos) };
2450               if (!av1_is_dv_valid(dv, &cpi->common, &x->e_mbd, mi_row, mi_col,
2451                                    bsize, cpi->common.seq_params.mib_size_log2))
2452                 continue;
2453             }
2454             MV hash_mv;
2455             hash_mv.col = ref_block_hash.x - x_pos;
2456             hash_mv.row = ref_block_hash.y - y_pos;
2457             if (!is_mv_in(&x->mv_limits, &hash_mv)) continue;
2458             const int refCost =
2459                 av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
2460             if (refCost < best_hash_cost) {
2461               best_hash_cost = refCost;
2462               best_hash_mv = hash_mv;
2463             }
2464           }
2465         }
2466         if (best_hash_cost < var) {
2467           x->second_best_mv = x->best_mv;
2468           x->best_mv.as_mv = best_hash_mv;
2469           var = best_hash_cost;
2470         }
2471       }
2472     }
2473   } while (0);
2474 
2475   return var;
2476 }
2477 
2478 /* returns subpixel variance error function */
2479 #define DIST(r, c) \
2480   vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
2481 
2482 /* checks if (r, c) has better score than previous best */
2483 #define MVC(r, c)                                                              \
2484   (unsigned int)(mvcost                                                        \
2485                      ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +              \
2486                          mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
2487                             error_per_bit +                                    \
2488                         4096) >>                                               \
2489                            13                                                  \
2490                      : 0)
2491 
2492 #define CHECK_BETTER(v, r, c)                             \
2493   if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2494     thismse = (DIST(r, c));                               \
2495     if ((v = MVC(r, c) + thismse) < besterr) {            \
2496       besterr = v;                                        \
2497       br = r;                                             \
2498       bc = c;                                             \
2499       *distortion = thismse;                              \
2500       *sse1 = sse;                                        \
2501     }                                                     \
2502   } else {                                                \
2503     v = INT_MAX;                                          \
2504   }
2505 
2506 #undef CHECK_BETTER0
2507 #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
2508 
2509 #undef CHECK_BETTER1
2510 #define CHECK_BETTER1(v, r, c)                                                \
2511   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                     \
2512     MV this_mv = { r, c };                                                    \
2513     thismse = upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, &this_mv,     \
2514                                         mask, vfp, z, pre(y, y_stride, r, c), \
2515                                         y_stride, sp(c), sp(r), w, h, &sse,   \
2516                                         use_accurate_subpel_search);          \
2517     v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);        \
2518     if ((v + thismse) < besterr) {                                            \
2519       besterr = v + thismse;                                                  \
2520       br = r;                                                                 \
2521       bc = c;                                                                 \
2522       *distortion = thismse;                                                  \
2523       *sse1 = sse;                                                            \
2524     }                                                                         \
2525   } else {                                                                    \
2526     v = INT_MAX;                                                              \
2527   }
2528 
setup_obmc_center_error(const int32_t * mask,const MV * bestmv,const MV * ref_mv,int error_per_bit,const aom_variance_fn_ptr_t * vfp,const int32_t * const wsrc,const uint8_t * const y,int y_stride,int offset,int * mvjcost,int * mvcost[2],unsigned int * sse1,int * distortion)2529 static unsigned int setup_obmc_center_error(
2530     const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
2531     const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
2532     const uint8_t *const y, int y_stride, int offset, int *mvjcost,
2533     int *mvcost[2], unsigned int *sse1, int *distortion) {
2534   unsigned int besterr;
2535   besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
2536   *distortion = besterr;
2537   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2538   return besterr;
2539 }
2540 
upsampled_obmc_pref_error(MACROBLOCKD * xd,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * const mv,const int32_t * mask,const aom_variance_fn_ptr_t * vfp,const int32_t * const wsrc,const uint8_t * const y,int y_stride,int subpel_x_q3,int subpel_y_q3,int w,int h,unsigned int * sse,int subpel_search)2541 static int upsampled_obmc_pref_error(
2542     MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
2543     const MV *const mv, const int32_t *mask, const aom_variance_fn_ptr_t *vfp,
2544     const int32_t *const wsrc, const uint8_t *const y, int y_stride,
2545     int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse,
2546     int subpel_search) {
2547   unsigned int besterr;
2548 
2549   DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
2550   if (is_cur_buf_hbd(xd)) {
2551     uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred);
2552     aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
2553                               subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
2554                               subpel_search);
2555     besterr = vfp->ovf(pred8, w, wsrc, mask, sse);
2556   } else {
2557     aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
2558                        subpel_y_q3, y, y_stride, subpel_search);
2559 
2560     besterr = vfp->ovf(pred, w, wsrc, mask, sse);
2561   }
2562   return besterr;
2563 }
2564 
upsampled_setup_obmc_center_error(MACROBLOCKD * xd,const AV1_COMMON * const cm,int mi_row,int mi_col,const int32_t * mask,const MV * bestmv,const MV * ref_mv,int error_per_bit,const aom_variance_fn_ptr_t * vfp,const int32_t * const wsrc,const uint8_t * const y,int y_stride,int w,int h,int offset,int * mvjcost,int * mvcost[2],unsigned int * sse1,int * distortion,int subpel_search)2565 static unsigned int upsampled_setup_obmc_center_error(
2566     MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
2567     const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
2568     const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
2569     const uint8_t *const y, int y_stride, int w, int h, int offset,
2570     int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion,
2571     int subpel_search) {
2572   unsigned int besterr = upsampled_obmc_pref_error(
2573       xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0,
2574       0, w, h, sse1, subpel_search);
2575   *distortion = besterr;
2576   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2577   return besterr;
2578 }
2579 
av1_find_best_obmc_sub_pixel_tree_up(MACROBLOCK * x,const AV1_COMMON * const cm,int mi_row,int mi_col,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const aom_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,int is_second,int use_accurate_subpel_search)2580 int av1_find_best_obmc_sub_pixel_tree_up(
2581     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
2582     MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
2583     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
2584     int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
2585     int is_second, int use_accurate_subpel_search) {
2586   const int32_t *wsrc = x->wsrc_buf;
2587   const int32_t *mask = x->mask_buf;
2588   const int *const z = wsrc;
2589   const int *const src_address = z;
2590   MACROBLOCKD *xd = &x->e_mbd;
2591   struct macroblockd_plane *const pd = &xd->plane[0];
2592   MB_MODE_INFO *mbmi = xd->mi[0];
2593   unsigned int besterr = INT_MAX;
2594   unsigned int sse;
2595   unsigned int thismse;
2596 
2597   int rr = ref_mv->row;
2598   int rc = ref_mv->col;
2599   int br = bestmv->row * 8;
2600   int bc = bestmv->col * 8;
2601   int hstep = 4;
2602   int iter;
2603   int round = 3 - forced_stop;
2604   int tr = br;
2605   int tc = bc;
2606   const MV *search_step = search_step_table;
2607   int idx, best_idx = -1;
2608   unsigned int cost_array[5];
2609   int kr, kc;
2610   const int w = block_size_wide[mbmi->sb_type];
2611   const int h = block_size_high[mbmi->sb_type];
2612   int offset;
2613   int y_stride;
2614   const uint8_t *y;
2615 
2616   int minc, maxc, minr, maxr;
2617 
2618   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
2619 
2620   y = pd->pre[is_second].buf;
2621   y_stride = pd->pre[is_second].stride;
2622   offset = bestmv->row * y_stride + bestmv->col;
2623 
2624   if (!allow_hp)
2625     if (round == 3) round = 2;
2626 
2627   bestmv->row *= 8;
2628   bestmv->col *= 8;
2629   // use_accurate_subpel_search can be 0 or 1 or 2
2630   if (use_accurate_subpel_search)
2631     besterr = upsampled_setup_obmc_center_error(
2632         xd, cm, mi_row, mi_col, mask, bestmv, ref_mv, error_per_bit, vfp, z, y,
2633         y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion,
2634         use_accurate_subpel_search);
2635   else
2636     besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
2637                                       z, y, y_stride, offset, mvjcost, mvcost,
2638                                       sse1, distortion);
2639 
2640   for (iter = 0; iter < round; ++iter) {
2641     // Check vertical and horizontal sub-pixel positions.
2642     for (idx = 0; idx < 4; ++idx) {
2643       tr = br + search_step[idx].row;
2644       tc = bc + search_step[idx].col;
2645       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
2646         MV this_mv = { tr, tc };
2647         if (use_accurate_subpel_search) {
2648           thismse = upsampled_obmc_pref_error(
2649               xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
2650               pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
2651               use_accurate_subpel_search);
2652         } else {
2653           thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc),
2654                               sp(tr), src_address, mask, &sse);
2655         }
2656 
2657         cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
2658                                                 mvcost, error_per_bit);
2659         if (cost_array[idx] < besterr) {
2660           best_idx = idx;
2661           besterr = cost_array[idx];
2662           *distortion = thismse;
2663           *sse1 = sse;
2664         }
2665       } else {
2666         cost_array[idx] = INT_MAX;
2667       }
2668     }
2669 
2670     // Check diagonal sub-pixel position
2671     kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
2672     kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
2673 
2674     tc = bc + kc;
2675     tr = br + kr;
2676     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
2677       MV this_mv = { tr, tc };
2678 
2679       if (use_accurate_subpel_search) {
2680         thismse = upsampled_obmc_pref_error(
2681             xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
2682             pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
2683             use_accurate_subpel_search);
2684       } else {
2685         thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
2686                             src_address, mask, &sse);
2687       }
2688 
2689       cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
2690                                             error_per_bit);
2691 
2692       if (cost_array[4] < besterr) {
2693         best_idx = 4;
2694         besterr = cost_array[4];
2695         *distortion = thismse;
2696         *sse1 = sse;
2697       }
2698     } else {
2699       cost_array[idx] = INT_MAX;
2700     }
2701 
2702     if (best_idx < 4 && best_idx >= 0) {
2703       br += search_step[best_idx].row;
2704       bc += search_step[best_idx].col;
2705     } else if (best_idx == 4) {
2706       br = tr;
2707       bc = tc;
2708     }
2709 
2710     if (iters_per_step > 1 && best_idx != -1) {
2711       if (use_accurate_subpel_search) {
2712         SECOND_LEVEL_CHECKS_BEST(1);
2713       } else {
2714         SECOND_LEVEL_CHECKS_BEST(0);
2715       }
2716     }
2717 
2718     tr = br;
2719     tc = bc;
2720 
2721     search_step += 4;
2722     hstep >>= 1;
2723     best_idx = -1;
2724   }
2725 
2726   // These lines insure static analysis doesn't warn that
2727   // tr and tc aren't used after the above point.
2728   (void)tr;
2729   (void)tc;
2730 
2731   bestmv->row = br;
2732   bestmv->col = bc;
2733 
2734   return besterr;
2735 }
2736 
2737 #undef DIST
2738 #undef MVC
2739 #undef CHECK_BETTER
2740 
get_obmc_mvpred_var(const MACROBLOCK * x,const int32_t * wsrc,const int32_t * mask,const MV * best_mv,const MV * center_mv,const aom_variance_fn_ptr_t * vfp,int use_mvcost,int is_second)2741 static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc,
2742                                const int32_t *mask, const MV *best_mv,
2743                                const MV *center_mv,
2744                                const aom_variance_fn_ptr_t *vfp, int use_mvcost,
2745                                int is_second) {
2746   const MACROBLOCKD *const xd = &x->e_mbd;
2747   const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2748   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
2749   unsigned int unused;
2750 
2751   return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc,
2752                   mask, &unused) +
2753          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
2754                                    x->mv_cost_stack, x->errorperbit)
2755                      : 0);
2756 }
2757 
obmc_refining_search_sad(const MACROBLOCK * x,const int32_t * wsrc,const int32_t * mask,MV * ref_mv,int error_per_bit,int search_range,const aom_variance_fn_ptr_t * fn_ptr,const MV * center_mv,int is_second)2758 static int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc,
2759                                     const int32_t *mask, MV *ref_mv,
2760                                     int error_per_bit, int search_range,
2761                                     const aom_variance_fn_ptr_t *fn_ptr,
2762                                     const MV *center_mv, int is_second) {
2763   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2764   const MACROBLOCKD *const xd = &x->e_mbd;
2765   const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2766   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2767   unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
2768                                        in_what->stride, wsrc, mask) +
2769                           mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2770   int i, j;
2771 
2772   for (i = 0; i < search_range; i++) {
2773     int best_site = -1;
2774 
2775     for (j = 0; j < 4; j++) {
2776       const MV mv = { ref_mv->row + neighbors[j].row,
2777                       ref_mv->col + neighbors[j].col };
2778       if (is_mv_in(&x->mv_limits, &mv)) {
2779         unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
2780                                         in_what->stride, wsrc, mask);
2781         if (sad < best_sad) {
2782           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2783           if (sad < best_sad) {
2784             best_sad = sad;
2785             best_site = j;
2786           }
2787         }
2788       }
2789     }
2790 
2791     if (best_site == -1) {
2792       break;
2793     } else {
2794       ref_mv->row += neighbors[best_site].row;
2795       ref_mv->col += neighbors[best_site].col;
2796     }
2797   }
2798   return best_sad;
2799 }
2800 
obmc_diamond_search_sad(const MACROBLOCK * x,const search_site_config * cfg,const int32_t * wsrc,const int32_t * mask,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const aom_variance_fn_ptr_t * fn_ptr,const MV * center_mv,int is_second)2801 static int obmc_diamond_search_sad(const MACROBLOCK *x,
2802                                    const search_site_config *cfg,
2803                                    const int32_t *wsrc, const int32_t *mask,
2804                                    MV *ref_mv, MV *best_mv, int search_param,
2805                                    int sad_per_bit, int *num00,
2806                                    const aom_variance_fn_ptr_t *fn_ptr,
2807                                    const MV *center_mv, int is_second) {
2808   const MACROBLOCKD *const xd = &x->e_mbd;
2809   const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2810   // search_param determines the length of the initial step and hence the number
2811   // of iterations
2812   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
2813   // (MAX_FIRST_STEP/4) pel... etc.
2814   const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
2815   const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
2816   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2817   const uint8_t *best_address, *in_what_ref;
2818   int best_sad = INT_MAX;
2819   int best_site = 0;
2820   int last_site = 0;
2821   int i, j, step;
2822 
2823   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2824            x->mv_limits.row_min, x->mv_limits.row_max);
2825   in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
2826   best_address = in_what_ref;
2827   *num00 = 0;
2828   *best_mv = *ref_mv;
2829 
2830   // Check the starting position
2831   best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
2832              mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
2833 
2834   i = 1;
2835 
2836   for (step = 0; step < tot_steps; step++) {
2837     for (j = 0; j < cfg->searches_per_step; j++) {
2838       const MV mv = { best_mv->row + ss[i].mv.row,
2839                       best_mv->col + ss[i].mv.col };
2840       if (is_mv_in(&x->mv_limits, &mv)) {
2841         int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
2842                                wsrc, mask);
2843         if (sad < best_sad) {
2844           sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2845           if (sad < best_sad) {
2846             best_sad = sad;
2847             best_site = i;
2848           }
2849         }
2850       }
2851 
2852       i++;
2853     }
2854 
2855     if (best_site != last_site) {
2856       best_mv->row += ss[best_site].mv.row;
2857       best_mv->col += ss[best_site].mv.col;
2858       best_address += ss[best_site].offset;
2859       last_site = best_site;
2860 #if defined(NEW_DIAMOND_SEARCH)
2861       while (1) {
2862         const MV this_mv = { best_mv->row + ss[best_site].mv.row,
2863                              best_mv->col + ss[best_site].mv.col };
2864         if (is_mv_in(&x->mv_limits, &this_mv)) {
2865           int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
2866                                  in_what->stride, wsrc, mask);
2867           if (sad < best_sad) {
2868             sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
2869             if (sad < best_sad) {
2870               best_sad = sad;
2871               best_mv->row += ss[best_site].mv.row;
2872               best_mv->col += ss[best_site].mv.col;
2873               best_address += ss[best_site].offset;
2874               continue;
2875             }
2876           }
2877         }
2878         break;
2879       }
2880 #endif
2881     } else if (best_address == in_what_ref) {
2882       (*num00)++;
2883     }
2884   }
2885   return best_sad;
2886 }
2887 
obmc_full_pixel_diamond(const AV1_COMP * cpi,MACROBLOCK * x,MV * mvp_full,int step_param,int sadpb,int further_steps,int do_refine,const aom_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,MV * dst_mv,int is_second,const search_site_config * cfg)2888 static int obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
2889                                    MV *mvp_full, int step_param, int sadpb,
2890                                    int further_steps, int do_refine,
2891                                    const aom_variance_fn_ptr_t *fn_ptr,
2892                                    const MV *ref_mv, MV *dst_mv, int is_second,
2893                                    const search_site_config *cfg) {
2894   (void)cpi;  // to silence compiler warning
2895   const int32_t *wsrc = x->wsrc_buf;
2896   const int32_t *mask = x->mask_buf;
2897   MV temp_mv;
2898   int thissme, n, num00 = 0;
2899   int bestsme =
2900       obmc_diamond_search_sad(x, cfg, wsrc, mask, mvp_full, &temp_mv,
2901                               step_param, sadpb, &n, fn_ptr, ref_mv, is_second);
2902   if (bestsme < INT_MAX)
2903     bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1,
2904                                   is_second);
2905   *dst_mv = temp_mv;
2906 
2907   // If there won't be more n-step search, check to see if refining search is
2908   // needed.
2909   if (n > further_steps) do_refine = 0;
2910 
2911   while (n < further_steps) {
2912     ++n;
2913 
2914     if (num00) {
2915       num00--;
2916     } else {
2917       thissme = obmc_diamond_search_sad(x, cfg, wsrc, mask, mvp_full, &temp_mv,
2918                                         step_param + n, sadpb, &num00, fn_ptr,
2919                                         ref_mv, is_second);
2920       if (thissme < INT_MAX)
2921         thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr,
2922                                       1, is_second);
2923 
2924       // check to see if refining search is needed.
2925       if (num00 > further_steps - n) do_refine = 0;
2926 
2927       if (thissme < bestsme) {
2928         bestsme = thissme;
2929         *dst_mv = temp_mv;
2930       }
2931     }
2932   }
2933 
2934   // final 1-away diamond refining search
2935   if (do_refine) {
2936     const int search_range = 8;
2937     MV best_mv = *dst_mv;
2938     thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb,
2939                                        search_range, fn_ptr, ref_mv, is_second);
2940     if (thissme < INT_MAX)
2941       thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1,
2942                                     is_second);
2943     if (thissme < bestsme) {
2944       bestsme = thissme;
2945       *dst_mv = best_mv;
2946     }
2947   }
2948   return bestsme;
2949 }
2950 
av1_obmc_full_pixel_search(const AV1_COMP * cpi,MACROBLOCK * x,MV * mvp_full,int step_param,int sadpb,int further_steps,int do_refine,const aom_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,MV * dst_mv,int is_second,const search_site_config * cfg)2951 int av1_obmc_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
2952                                int step_param, int sadpb, int further_steps,
2953                                int do_refine,
2954                                const aom_variance_fn_ptr_t *fn_ptr,
2955                                const MV *ref_mv, MV *dst_mv, int is_second,
2956                                const search_site_config *cfg) {
2957   if (cpi->sf.obmc_full_pixel_search_level == 0) {
2958     return obmc_full_pixel_diamond(cpi, x, mvp_full, step_param, sadpb,
2959                                    further_steps, do_refine, fn_ptr, ref_mv,
2960                                    dst_mv, is_second, cfg);
2961   } else {
2962     const int32_t *wsrc = x->wsrc_buf;
2963     const int32_t *mask = x->mask_buf;
2964     const int search_range = 8;
2965     *dst_mv = *mvp_full;
2966     clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2967              x->mv_limits.row_min, x->mv_limits.row_max);
2968     int thissme = obmc_refining_search_sad(
2969         x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second);
2970     if (thissme < INT_MAX)
2971       thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1,
2972                                     is_second);
2973     return thissme;
2974   }
2975 }
2976 
2977 // Note(yunqingwang): The following 2 functions are only used in the motion
2978 // vector unit test, which return extreme motion vectors allowed by the MV
2979 // limits.
2980 #define COMMON_MV_TEST              \
2981   SETUP_SUBPEL_SEARCH;              \
2982                                     \
2983   (void)error_per_bit;              \
2984   (void)vfp;                        \
2985   (void)src_address;                \
2986   (void)src_stride;                 \
2987   (void)y;                          \
2988   (void)y_stride;                   \
2989   (void)second_pred;                \
2990   (void)w;                          \
2991   (void)h;                          \
2992   (void)use_accurate_subpel_search; \
2993   (void)offset;                     \
2994   (void)mvjcost;                    \
2995   (void)mvcost;                     \
2996   (void)sse1;                       \
2997   (void)distortion;                 \
2998                                     \
2999   (void)halfiters;                  \
3000   (void)quarteriters;               \
3001   (void)eighthiters;                \
3002   (void)whichdir;                   \
3003   (void)forced_stop;                \
3004   (void)hstep;                      \
3005                                     \
3006   (void)tr;                         \
3007   (void)tc;                         \
3008   (void)sse;                        \
3009   (void)thismse;                    \
3010   (void)cost_list;
3011 // Return the maximum MV.
av1_return_max_sub_pixel_mv(MACROBLOCK * x,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * ref_mv,int allow_hp,int error_per_bit,const aom_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,int use_accurate_subpel_search,const int do_reset_fractional_mv)3012 int av1_return_max_sub_pixel_mv(
3013     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
3014     const MV *ref_mv, int allow_hp, int error_per_bit,
3015     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
3016     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
3017     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
3018     int mask_stride, int invert_mask, int w, int h,
3019     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
3020   COMMON_MV_TEST;
3021   (void)mask;
3022   (void)mask_stride;
3023   (void)invert_mask;
3024   (void)minr;
3025   (void)minc;
3026 
3027   (void)cm;
3028   (void)mi_row;
3029   (void)mi_col;
3030   (void)do_reset_fractional_mv;
3031 
3032   bestmv->row = maxr;
3033   bestmv->col = maxc;
3034   besterr = 0;
3035   // In the sub-pel motion search, if hp is not used, then the last bit of mv
3036   // has to be 0.
3037   lower_mv_precision(bestmv, allow_hp, 0);
3038   return besterr;
3039 }
3040 // Return the minimum MV.
av1_return_min_sub_pixel_mv(MACROBLOCK * x,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * ref_mv,int allow_hp,int error_per_bit,const aom_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,const uint8_t * mask,int mask_stride,int invert_mask,int w,int h,int use_accurate_subpel_search,const int do_reset_fractional_mv)3041 int av1_return_min_sub_pixel_mv(
3042     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
3043     const MV *ref_mv, int allow_hp, int error_per_bit,
3044     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
3045     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
3046     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
3047     int mask_stride, int invert_mask, int w, int h,
3048     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
3049   COMMON_MV_TEST;
3050   (void)maxr;
3051   (void)maxc;
3052   (void)mask;
3053   (void)mask_stride;
3054   (void)invert_mask;
3055 
3056   (void)cm;
3057   (void)mi_row;
3058   (void)mi_col;
3059   (void)do_reset_fractional_mv;
3060 
3061   bestmv->row = minr;
3062   bestmv->col = minc;
3063   besterr = 0;
3064   // In the sub-pel motion search, if hp is not used, then the last bit of mv
3065   // has to be 0.
3066   lower_mv_precision(bestmv, allow_hp, 0);
3067   return besterr;
3068 }
3069 
av1_simple_motion_search(AV1_COMP * const cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize,int ref,MV ref_mv_full,int num_planes,int use_subpixel)3070 void av1_simple_motion_search(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
3071                               int mi_col, BLOCK_SIZE bsize, int ref,
3072                               MV ref_mv_full, int num_planes,
3073                               int use_subpixel) {
3074   assert(num_planes == 1 &&
3075          "Currently simple_motion_search only supports luma plane");
3076   assert(!frame_is_intra_only(&cpi->common) &&
3077          "Simple motion search only enabled for non-key frames");
3078   AV1_COMMON *const cm = &cpi->common;
3079   MACROBLOCKD *xd = &x->e_mbd;
3080 
3081   set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize);
3082 
3083   MB_MODE_INFO *mbmi = xd->mi[0];
3084   mbmi->sb_type = bsize;
3085   mbmi->ref_frame[0] = ref;
3086   mbmi->ref_frame[1] = NONE_FRAME;
3087   mbmi->motion_mode = SIMPLE_TRANSLATION;
3088 
3089   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref);
3090   const YV12_BUFFER_CONFIG *scaled_ref_frame =
3091       av1_get_scaled_ref_frame(cpi, ref);
3092   struct buf_2d backup_yv12;
3093   // ref_mv is used to code the motion vector. ref_mv_full is the initial point.
3094   // ref_mv is in units of 1/8 pel whereas ref_mv_full is in units of pel.
3095   MV ref_mv = { 0, 0 };
3096   const int step_param = cpi->mv_step_param;
3097   const MvLimits tmp_mv_limits = x->mv_limits;
3098   const SEARCH_METHODS search_methods = NSTEP;
3099   const int do_mesh_search = 0;
3100   const int sadpb = x->sadperbit16;
3101   int cost_list[5];
3102   const int ref_idx = 0;
3103   int var;
3104 
3105   av1_setup_pre_planes(xd, ref_idx, yv12, mi_row, mi_col,
3106                        get_ref_scale_factors(cm, ref), num_planes);
3107   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3108   if (scaled_ref_frame) {
3109     backup_yv12 = xd->plane[AOM_PLANE_Y].pre[ref_idx];
3110     av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
3111                          num_planes);
3112   }
3113 
3114   // This overwrites the mv_limits so we will need to restore it later.
3115   av1_set_mv_search_range(&x->mv_limits, &ref_mv);
3116   var = av1_full_pixel_search(
3117       cpi, x, bsize, &ref_mv_full, step_param, search_methods, do_mesh_search,
3118       sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1,
3119       mi_col * MI_SIZE, mi_row * MI_SIZE, 0, &cpi->ss_cfg[SS_CFG_SRC]);
3120   // Restore
3121   x->mv_limits = tmp_mv_limits;
3122 
3123   const int use_subpel_search =
3124       var < INT_MAX && !cpi->common.cur_frame_force_integer_mv && use_subpixel;
3125   if (scaled_ref_frame) {
3126     xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12;
3127   }
3128   if (use_subpel_search) {
3129     int not_used = 0;
3130     if (cpi->sf.use_accurate_subpel_search) {
3131       const int pw = block_size_wide[bsize];
3132       const int ph = block_size_high[bsize];
3133       cpi->find_fractional_mv_step(
3134           x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
3135           x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
3136           cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
3137           x->nmv_vec_cost, x->mv_cost_stack, &not_used, &x->pred_sse[ref], NULL,
3138           NULL, 0, 0, pw, ph, cpi->sf.use_accurate_subpel_search, 1);
3139     } else {
3140       cpi->find_fractional_mv_step(
3141           x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
3142           x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
3143           cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
3144           x->nmv_vec_cost, x->mv_cost_stack, &not_used, &x->pred_sse[ref], NULL,
3145           NULL, 0, 0, 0, 0, 0, 1);
3146     }
3147   } else {
3148     // Manually convert from units of pixel to 1/8-pixels if we are not doing
3149     // subpel search
3150     x->best_mv.as_mv.row *= 8;
3151     x->best_mv.as_mv.col *= 8;
3152   }
3153 
3154   mbmi->mv[0].as_mv = x->best_mv.as_mv;
3155 
3156   // Get a copy of the prediction output
3157   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
3158                                 AOM_PLANE_Y, AOM_PLANE_Y);
3159 
3160   aom_clear_system_state();
3161 
3162   if (scaled_ref_frame) {
3163     xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12;
3164   }
3165 }
3166 
av1_simple_motion_sse_var(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize,const MV ref_mv_full,int use_subpixel,unsigned int * sse,unsigned int * var)3167 void av1_simple_motion_sse_var(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
3168                                int mi_col, BLOCK_SIZE bsize,
3169                                const MV ref_mv_full, int use_subpixel,
3170                                unsigned int *sse, unsigned int *var) {
3171   MACROBLOCKD *xd = &x->e_mbd;
3172   const MV_REFERENCE_FRAME ref =
3173       cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
3174 
3175   av1_simple_motion_search(cpi, x, mi_row, mi_col, bsize, ref, ref_mv_full, 1,
3176                            use_subpixel);
3177 
3178   const uint8_t *src = x->plane[0].src.buf;
3179   const int src_stride = x->plane[0].src.stride;
3180   const uint8_t *dst = xd->plane[0].dst.buf;
3181   const int dst_stride = xd->plane[0].dst.stride;
3182 
3183   *var = cpi->fn_ptr[bsize].vf(src, src_stride, dst, dst_stride, sse);
3184 }
3185