1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <limits.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vpx_config.h"
16 #include "./vpx_dsp_rtcd.h"
17 
18 #include "vpx_dsp/vpx_dsp_common.h"
19 #include "vpx_mem/vpx_mem.h"
20 #include "vpx_ports/mem.h"
21 
22 #include "vp9/common/vp9_common.h"
23 #include "vp9/common/vp9_reconinter.h"
24 
25 #include "vp9/encoder/vp9_encoder.h"
26 #include "vp9/encoder/vp9_mcomp.h"
27 
28 // #define NEW_DIAMOND_SEARCH
29 
get_buf_from_mv(const struct buf_2d * buf,const MV * mv)30 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
31                                              const MV *mv) {
32   return &buf->buf[mv->row * buf->stride + mv->col];
33 }
34 
vp9_set_mv_search_range(MACROBLOCK * x,const MV * mv)35 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
36   int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
37   int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
38   int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
39   int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
40 
41   col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
42   row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
43   col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
44   row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
45 
46   // Get intersection of UMV window and valid MV window to reduce # of checks
47   // in diamond search.
48   if (x->mv_col_min < col_min)
49     x->mv_col_min = col_min;
50   if (x->mv_col_max > col_max)
51     x->mv_col_max = col_max;
52   if (x->mv_row_min < row_min)
53     x->mv_row_min = row_min;
54   if (x->mv_row_max > row_max)
55     x->mv_row_max = row_max;
56 }
57 
vp9_init_search_range(int size)58 int vp9_init_search_range(int size) {
59   int sr = 0;
60   // Minimum search size no matter what the passed in value.
61   size = VPXMAX(16, size);
62 
63   while ((size << sr) < MAX_FULL_PEL_VAL)
64     sr++;
65 
66   sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
67   return sr;
68 }
69 
mv_cost(const MV * mv,const int * joint_cost,int * const comp_cost[2])70 static INLINE int mv_cost(const MV *mv,
71                           const int *joint_cost, int *const comp_cost[2]) {
72   return joint_cost[vp9_get_mv_joint(mv)] +
73              comp_cost[0][mv->row] + comp_cost[1][mv->col];
74 }
75 
vp9_mv_bit_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int weight)76 int vp9_mv_bit_cost(const MV *mv, const MV *ref,
77                     const int *mvjcost, int *mvcost[2], int weight) {
78   const MV diff = { mv->row - ref->row,
79                     mv->col - ref->col };
80   return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
81 }
82 
mv_err_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int error_per_bit)83 static int mv_err_cost(const MV *mv, const MV *ref,
84                        const int *mvjcost, int *mvcost[2],
85                        int error_per_bit) {
86   if (mvcost) {
87     const MV diff = { mv->row - ref->row,
88                       mv->col - ref->col };
89     return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
90                                   error_per_bit, 13);
91   }
92   return 0;
93 }
94 
mvsad_err_cost(const MACROBLOCK * x,const MV * mv,const MV * ref,int error_per_bit)95 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
96                           int error_per_bit) {
97   const MV diff = { mv->row - ref->row,
98                     mv->col - ref->col };
99   return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
100                                     x->nmvsadcost) * error_per_bit, 8);
101 }
102 
vp9_init_dsmotion_compensation(search_site_config * cfg,int stride)103 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
104   int len, ss_count = 1;
105 
106   cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
107   cfg->ss[0].offset = 0;
108 
109   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
110     // Generate offsets for 4 search sites per step.
111     const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
112     int i;
113     for (i = 0; i < 4; ++i) {
114       search_site *const ss = &cfg->ss[ss_count++];
115       ss->mv = ss_mvs[i];
116       ss->offset = ss->mv.row * stride + ss->mv.col;
117     }
118   }
119 
120   cfg->ss_count = ss_count;
121   cfg->searches_per_step = 4;
122 }
123 
vp9_init3smotion_compensation(search_site_config * cfg,int stride)124 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
125   int len, ss_count = 1;
126 
127   cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
128   cfg->ss[0].offset = 0;
129 
130   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
131     // Generate offsets for 8 search sites per step.
132     const MV ss_mvs[8] = {
133       {-len,  0  }, {len,  0  }, { 0,   -len}, {0,    len},
134       {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
135     };
136     int i;
137     for (i = 0; i < 8; ++i) {
138       search_site *const ss = &cfg->ss[ss_count++];
139       ss->mv = ss_mvs[i];
140       ss->offset = ss->mv.row * stride + ss->mv.col;
141     }
142   }
143 
144   cfg->ss_count = ss_count;
145   cfg->searches_per_step = 8;
146 }
147 
148 /*
149  * To avoid the penalty for crossing cache-line read, preload the reference
150  * area in a small buffer, which is aligned to make sure there won't be crossing
151  * cache-line read while reading from this buffer. This reduced the cpu
152  * cycles spent on reading ref data in sub-pixel filter functions.
153  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
154  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
155  * could reduce the area.
156  */
157 
158 /* estimated cost of a motion vector (r,c) */
159 #define MVC(r, c)                                       \
160     (mvcost ?                                           \
161      ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
162        mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
163       error_per_bit + 4096) >> 13 : 0)
164 
165 
166 // convert motion vector component to offset for sv[a]f calc
sp(int x)167 static INLINE int sp(int x) {
168   return x & 7;
169 }
170 
pre(const uint8_t * buf,int stride,int r,int c)171 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
172   return &buf[(r >> 3) * stride + (c >> 3)];
173 }
174 
175 /* checks if (r, c) has better score than previous best */
176 #define CHECK_BETTER(v, r, c) \
177   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
178     if (second_pred == NULL)                                           \
179       thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
180                              src_stride, &sse);                        \
181     else                                                               \
182       thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
183                               z, src_stride, &sse, second_pred);       \
184     if ((v = MVC(r, c) + thismse) < besterr) {                         \
185       besterr = v;                                                     \
186       br = r;                                                          \
187       bc = c;                                                          \
188       *distortion = thismse;                                           \
189       *sse1 = sse;                                                     \
190     }                                                                  \
191   } else {                                                             \
192     v = INT_MAX;                                                       \
193   }
194 
195 #define FIRST_LEVEL_CHECKS                              \
196   {                                                     \
197     unsigned int left, right, up, down, diag;           \
198     CHECK_BETTER(left, tr, tc - hstep);                 \
199     CHECK_BETTER(right, tr, tc + hstep);                \
200     CHECK_BETTER(up, tr - hstep, tc);                   \
201     CHECK_BETTER(down, tr + hstep, tc);                 \
202     whichdir = (left < right ? 0 : 1) +                 \
203                (up < down ? 0 : 2);                     \
204     switch (whichdir) {                                 \
205       case 0:                                           \
206         CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
207         break;                                          \
208       case 1:                                           \
209         CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
210         break;                                          \
211       case 2:                                           \
212         CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
213         break;                                          \
214       case 3:                                           \
215         CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
216         break;                                          \
217     }                                                   \
218   }
219 
220 #define SECOND_LEVEL_CHECKS                             \
221   {                                                     \
222     int kr, kc;                                         \
223     unsigned int second;                                \
224     if (tr != br && tc != bc) {                         \
225       kr = br - tr;                                     \
226       kc = bc - tc;                                     \
227       CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
228       CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
229     } else if (tr == br && tc != bc) {                  \
230       kc = bc - tc;                                     \
231       CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
232       CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
233       switch (whichdir) {                               \
234         case 0:                                         \
235         case 1:                                         \
236           CHECK_BETTER(second, tr + hstep, tc + kc);    \
237           break;                                        \
238         case 2:                                         \
239         case 3:                                         \
240           CHECK_BETTER(second, tr - hstep, tc + kc);    \
241           break;                                        \
242       }                                                 \
243     } else if (tr != br && tc == bc) {                  \
244       kr = br - tr;                                     \
245       CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
246       CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
247       switch (whichdir) {                               \
248         case 0:                                         \
249         case 2:                                         \
250           CHECK_BETTER(second, tr + kr, tc + hstep);    \
251           break;                                        \
252         case 1:                                         \
253         case 3:                                         \
254           CHECK_BETTER(second, tr + kr, tc - hstep);    \
255           break;                                        \
256       }                                                 \
257     }                                                   \
258   }
259 
260 // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
261 // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
262 // later in the same way.
263 #define SECOND_LEVEL_CHECKS_BEST                        \
264   {                                                     \
265     unsigned int second;                                \
266     int br0 = br;                                       \
267     int bc0 = bc;                                       \
268     assert(tr == br || tc == bc);                       \
269     if (tr == br && tc != bc) {                         \
270       kc = bc - tc;                                     \
271     } else if (tr != br && tc == bc) {                  \
272       kr = br - tr;                                     \
273     }                                                   \
274     CHECK_BETTER(second, br0 + kr, bc0);                \
275     CHECK_BETTER(second, br0, bc0 + kc);                \
276     if (br0 != br || bc0 != bc) {                       \
277       CHECK_BETTER(second, br0 + kr, bc0 + kc);         \
278     }                                                   \
279   }
280 
281 #define SETUP_SUBPEL_SEARCH                                                \
282   const uint8_t *const z = x->plane[0].src.buf;                            \
283   const int src_stride = x->plane[0].src.stride;                           \
284   const MACROBLOCKD *xd = &x->e_mbd;                                       \
285   unsigned int besterr = INT_MAX;                                          \
286   unsigned int sse;                                                        \
287   unsigned int whichdir;                                                   \
288   int thismse;                                                             \
289   const unsigned int halfiters = iters_per_step;                           \
290   const unsigned int quarteriters = iters_per_step;                        \
291   const unsigned int eighthiters = iters_per_step;                         \
292   const int y_stride = xd->plane[0].pre[0].stride;                         \
293   const int offset = bestmv->row * y_stride + bestmv->col;                 \
294   const uint8_t *const y = xd->plane[0].pre[0].buf;                        \
295                                                                            \
296   int rr = ref_mv->row;                                                    \
297   int rc = ref_mv->col;                                                    \
298   int br = bestmv->row * 8;                                                \
299   int bc = bestmv->col * 8;                                                \
300   int hstep = 4;                                                           \
301   const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);        \
302   const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);        \
303   const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);        \
304   const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);        \
305   int tr = br;                                                             \
306   int tc = bc;                                                             \
307                                                                            \
308   bestmv->row *= 8;                                                        \
309   bestmv->col *= 8;
310 
setup_center_error(const MACROBLOCKD * xd,const MV * bestmv,const MV * ref_mv,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,const uint8_t * const src,const int src_stride,const uint8_t * const y,int y_stride,const uint8_t * second_pred,int w,int h,int offset,int * mvjcost,int * mvcost[2],unsigned int * sse1,int * distortion)311 static unsigned int setup_center_error(const MACROBLOCKD *xd,
312                                        const MV *bestmv,
313                                        const MV *ref_mv,
314                                        int error_per_bit,
315                                        const vp9_variance_fn_ptr_t *vfp,
316                                        const uint8_t *const src,
317                                        const int src_stride,
318                                        const uint8_t *const y,
319                                        int y_stride,
320                                        const uint8_t *second_pred,
321                                        int w, int h, int offset,
322                                        int *mvjcost, int *mvcost[2],
323                                        unsigned int *sse1,
324                                        int *distortion) {
325   unsigned int besterr;
326 #if CONFIG_VP9_HIGHBITDEPTH
327   if (second_pred != NULL) {
328     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
329       DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
330       vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
331                                y_stride);
332       besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride,
333                         sse1);
334     } else {
335       DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
336       vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
337       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
338     }
339   } else {
340     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
341   }
342   *distortion = besterr;
343   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
344 #else
345   (void) xd;
346   if (second_pred != NULL) {
347     DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
348     vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
349     besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
350   } else {
351     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
352   }
353   *distortion = besterr;
354   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
355 #endif  // CONFIG_VP9_HIGHBITDEPTH
356   return besterr;
357 }
358 
divide_and_round(const int n,const int d)359 static INLINE int divide_and_round(const int n, const int d) {
360   return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
361 }
362 
is_cost_list_wellbehaved(int * cost_list)363 static INLINE int is_cost_list_wellbehaved(int *cost_list) {
364   return cost_list[0] < cost_list[1] &&
365          cost_list[0] < cost_list[2] &&
366          cost_list[0] < cost_list[3] &&
367          cost_list[0] < cost_list[4];
368 }
369 
370 // Returns surface minima estimate at given precision in 1/2^n bits.
371 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
372 // For a given set of costs S0, S1, S2, S3, S4 at points
373 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
374 // the solution for the location of the minima (x0, y0) is given by:
375 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
376 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
377 // The code below is an integerized version of that.
get_cost_surf_min(int * cost_list,int * ir,int * ic,int bits)378 static void get_cost_surf_min(int *cost_list, int *ir, int *ic,
379                               int bits) {
380   *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
381                          (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
382   *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
383                          (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
384 }
385 
vp9_find_best_sub_pixel_tree_pruned_evenmore(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,int w,int h)386 int vp9_find_best_sub_pixel_tree_pruned_evenmore(
387     const MACROBLOCK *x,
388     MV *bestmv, const MV *ref_mv,
389     int allow_hp,
390     int error_per_bit,
391     const vp9_variance_fn_ptr_t *vfp,
392     int forced_stop,
393     int iters_per_step,
394     int *cost_list,
395     int *mvjcost, int *mvcost[2],
396     int *distortion,
397     unsigned int *sse1,
398     const uint8_t *second_pred,
399     int w, int h) {
400   SETUP_SUBPEL_SEARCH;
401   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
402                                z, src_stride, y, y_stride, second_pred,
403                                w, h, offset, mvjcost, mvcost,
404                                sse1, distortion);
405   (void) halfiters;
406   (void) quarteriters;
407   (void) eighthiters;
408   (void) whichdir;
409   (void) allow_hp;
410   (void) forced_stop;
411   (void) hstep;
412 
413   if (cost_list &&
414       cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
415       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
416       cost_list[4] != INT_MAX &&
417       is_cost_list_wellbehaved(cost_list)) {
418     int ir, ic;
419     unsigned int minpt;
420     get_cost_surf_min(cost_list, &ir, &ic, 2);
421     if (ir != 0 || ic != 0) {
422       CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
423     }
424   } else {
425     FIRST_LEVEL_CHECKS;
426     if (halfiters > 1) {
427       SECOND_LEVEL_CHECKS;
428     }
429 
430     tr = br;
431     tc = bc;
432 
433     // Each subsequent iteration checks at least one point in common with
434     // the last iteration could be 2 ( if diag selected) 1/4 pel
435     // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
436     if (forced_stop != 2) {
437       hstep >>= 1;
438       FIRST_LEVEL_CHECKS;
439       if (quarteriters > 1) {
440         SECOND_LEVEL_CHECKS;
441       }
442     }
443   }
444 
445   tr = br;
446   tc = bc;
447 
448   if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
449     hstep >>= 1;
450     FIRST_LEVEL_CHECKS;
451     if (eighthiters > 1) {
452       SECOND_LEVEL_CHECKS;
453     }
454   }
455 
456   bestmv->row = br;
457   bestmv->col = bc;
458 
459   if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
460       (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
461     return INT_MAX;
462 
463   return besterr;
464 }
465 
vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,int w,int h)466 int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
467                                              MV *bestmv, const MV *ref_mv,
468                                              int allow_hp,
469                                              int error_per_bit,
470                                              const vp9_variance_fn_ptr_t *vfp,
471                                              int forced_stop,
472                                              int iters_per_step,
473                                              int *cost_list,
474                                              int *mvjcost, int *mvcost[2],
475                                              int *distortion,
476                                              unsigned int *sse1,
477                                              const uint8_t *second_pred,
478                                              int w, int h) {
479   SETUP_SUBPEL_SEARCH;
480   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
481                                z, src_stride, y, y_stride, second_pred,
482                                w, h, offset, mvjcost, mvcost,
483                                sse1, distortion);
484   if (cost_list &&
485       cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
486       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
487       cost_list[4] != INT_MAX &&
488       is_cost_list_wellbehaved(cost_list)) {
489     unsigned int minpt;
490     int ir, ic;
491     get_cost_surf_min(cost_list, &ir, &ic, 1);
492     if (ir != 0 || ic != 0) {
493       CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
494     }
495   } else {
496     FIRST_LEVEL_CHECKS;
497     if (halfiters > 1) {
498       SECOND_LEVEL_CHECKS;
499     }
500   }
501 
502   // Each subsequent iteration checks at least one point in common with
503   // the last iteration could be 2 ( if diag selected) 1/4 pel
504 
505   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
506   if (forced_stop != 2) {
507     tr = br;
508     tc = bc;
509     hstep >>= 1;
510     FIRST_LEVEL_CHECKS;
511     if (quarteriters > 1) {
512       SECOND_LEVEL_CHECKS;
513     }
514   }
515 
516   if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
517     tr = br;
518     tc = bc;
519     hstep >>= 1;
520     FIRST_LEVEL_CHECKS;
521     if (eighthiters > 1) {
522       SECOND_LEVEL_CHECKS;
523     }
524   }
525   // These lines insure static analysis doesn't warn that
526   // tr and tc aren't used after the above point.
527   (void) tr;
528   (void) tc;
529 
530   bestmv->row = br;
531   bestmv->col = bc;
532 
533   if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
534       (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
535     return INT_MAX;
536 
537   return besterr;
538 }
539 
vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,int w,int h)540 int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
541                                         MV *bestmv, const MV *ref_mv,
542                                         int allow_hp,
543                                         int error_per_bit,
544                                         const vp9_variance_fn_ptr_t *vfp,
545                                         int forced_stop,
546                                         int iters_per_step,
547                                         int *cost_list,
548                                         int *mvjcost, int *mvcost[2],
549                                         int *distortion,
550                                         unsigned int *sse1,
551                                         const uint8_t *second_pred,
552                                         int w, int h) {
553   SETUP_SUBPEL_SEARCH;
554   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
555                                z, src_stride, y, y_stride, second_pred,
556                                w, h, offset, mvjcost, mvcost,
557                                sse1, distortion);
558   if (cost_list &&
559       cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
560       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
561       cost_list[4] != INT_MAX) {
562     unsigned int left, right, up, down, diag;
563     whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
564                (cost_list[2] < cost_list[4] ? 0 : 2);
565     switch (whichdir) {
566       case 0:
567         CHECK_BETTER(left, tr, tc - hstep);
568         CHECK_BETTER(down, tr + hstep, tc);
569         CHECK_BETTER(diag, tr + hstep, tc - hstep);
570         break;
571       case 1:
572         CHECK_BETTER(right, tr, tc + hstep);
573         CHECK_BETTER(down, tr + hstep, tc);
574         CHECK_BETTER(diag, tr + hstep, tc + hstep);
575         break;
576       case 2:
577         CHECK_BETTER(left, tr, tc - hstep);
578         CHECK_BETTER(up, tr - hstep, tc);
579         CHECK_BETTER(diag, tr - hstep, tc - hstep);
580         break;
581       case 3:
582         CHECK_BETTER(right, tr, tc + hstep);
583         CHECK_BETTER(up, tr - hstep, tc);
584         CHECK_BETTER(diag, tr - hstep, tc + hstep);
585         break;
586     }
587   } else {
588     FIRST_LEVEL_CHECKS;
589     if (halfiters > 1) {
590       SECOND_LEVEL_CHECKS;
591     }
592   }
593 
594   tr = br;
595   tc = bc;
596 
597   // Each subsequent iteration checks at least one point in common with
598   // the last iteration could be 2 ( if diag selected) 1/4 pel
599 
600   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
601   if (forced_stop != 2) {
602     hstep >>= 1;
603     FIRST_LEVEL_CHECKS;
604     if (quarteriters > 1) {
605       SECOND_LEVEL_CHECKS;
606     }
607     tr = br;
608     tc = bc;
609   }
610 
611   if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
612     hstep >>= 1;
613     FIRST_LEVEL_CHECKS;
614     if (eighthiters > 1) {
615       SECOND_LEVEL_CHECKS;
616     }
617     tr = br;
618     tc = bc;
619   }
620   // These lines insure static analysis doesn't warn that
621   // tr and tc aren't used after the above point.
622   (void) tr;
623   (void) tc;
624 
625   bestmv->row = br;
626   bestmv->col = bc;
627 
628   if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
629       (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
630     return INT_MAX;
631 
632   return besterr;
633 }
634 
635 static const MV search_step_table[12] = {
636     // left, right, up, down
637     {0, -4}, {0, 4}, {-4, 0}, {4, 0},
638     {0, -2}, {0, 2}, {-2, 0}, {2, 0},
639     {0, -1}, {0, 1}, {-1, 0}, {1, 0}
640 };
641 
vp9_find_best_sub_pixel_tree(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,int w,int h)642 int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
643                                  MV *bestmv, const MV *ref_mv,
644                                  int allow_hp,
645                                  int error_per_bit,
646                                  const vp9_variance_fn_ptr_t *vfp,
647                                  int forced_stop,
648                                  int iters_per_step,
649                                  int *cost_list,
650                                  int *mvjcost, int *mvcost[2],
651                                  int *distortion,
652                                  unsigned int *sse1,
653                                  const uint8_t *second_pred,
654                                  int w, int h) {
655   const uint8_t *const z = x->plane[0].src.buf;
656   const uint8_t *const src_address = z;
657   const int src_stride = x->plane[0].src.stride;
658   const MACROBLOCKD *xd = &x->e_mbd;
659   unsigned int besterr = INT_MAX;
660   unsigned int sse;
661   int thismse;
662   const int y_stride = xd->plane[0].pre[0].stride;
663   const int offset = bestmv->row * y_stride + bestmv->col;
664   const uint8_t *const y = xd->plane[0].pre[0].buf;
665 
666   int rr = ref_mv->row;
667   int rc = ref_mv->col;
668   int br = bestmv->row * 8;
669   int bc = bestmv->col * 8;
670   int hstep = 4;
671   int iter, round = 3 - forced_stop;
672   const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
673   const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
674   const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
675   const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
676   int tr = br;
677   int tc = bc;
678   const MV *search_step = search_step_table;
679   int idx, best_idx = -1;
680   unsigned int cost_array[5];
681   int kr, kc;
682 
683   if (!(allow_hp && vp9_use_mv_hp(ref_mv)))
684     if (round == 3)
685       round = 2;
686 
687   bestmv->row *= 8;
688   bestmv->col *= 8;
689 
690   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
691                                z, src_stride, y, y_stride, second_pred,
692                                w, h, offset, mvjcost, mvcost,
693                                sse1, distortion);
694 
695   (void) cost_list;  // to silence compiler warning
696 
697   for (iter = 0; iter < round; ++iter) {
698     // Check vertical and horizontal sub-pixel positions.
699     for (idx = 0; idx < 4; ++idx) {
700       tr = br + search_step[idx].row;
701       tc = bc + search_step[idx].col;
702       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
703         const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
704         MV this_mv;
705         this_mv.row = tr;
706         this_mv.col = tc;
707         if (second_pred == NULL)
708           thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
709                              src_address, src_stride, &sse);
710         else
711           thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
712                               src_address, src_stride, &sse, second_pred);
713         cost_array[idx] = thismse +
714             mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
715 
716         if (cost_array[idx] < besterr) {
717           best_idx = idx;
718           besterr = cost_array[idx];
719           *distortion = thismse;
720           *sse1 = sse;
721         }
722       } else {
723         cost_array[idx] = INT_MAX;
724       }
725     }
726 
727     // Check diagonal sub-pixel position
728     kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
729     kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
730 
731     tc = bc + kc;
732     tr = br + kr;
733     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
734       const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
735       MV this_mv = {tr, tc};
736       if (second_pred == NULL)
737         thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
738                            src_address, src_stride, &sse);
739       else
740         thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
741                             src_address, src_stride, &sse, second_pred);
742       cost_array[4] = thismse +
743           mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
744 
745       if (cost_array[4] < besterr) {
746         best_idx = 4;
747         besterr = cost_array[4];
748         *distortion = thismse;
749         *sse1 = sse;
750       }
751     } else {
752       cost_array[idx] = INT_MAX;
753     }
754 
755     if (best_idx < 4 && best_idx >= 0) {
756       br += search_step[best_idx].row;
757       bc += search_step[best_idx].col;
758     } else if (best_idx == 4) {
759       br = tr;
760       bc = tc;
761     }
762 
763     if (iters_per_step > 1 && best_idx != -1)
764       SECOND_LEVEL_CHECKS_BEST;
765 
766     tr = br;
767     tc = bc;
768 
769     search_step += 4;
770     hstep >>= 1;
771     best_idx = -1;
772   }
773 
774   // Each subsequent iteration checks at least one point in common with
775   // the last iteration could be 2 ( if diag selected) 1/4 pel
776 
777   // These lines insure static analysis doesn't warn that
778   // tr and tc aren't used after the above point.
779   (void) tr;
780   (void) tc;
781 
782   bestmv->row = br;
783   bestmv->col = bc;
784 
785   if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
786       (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
787     return INT_MAX;
788 
789   return besterr;
790 }
791 
792 #undef MVC
793 #undef PRE
794 #undef CHECK_BETTER
795 
check_bounds(const MACROBLOCK * x,int row,int col,int range)796 static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
797                                int range) {
798   return ((row - range) >= x->mv_row_min) &
799          ((row + range) <= x->mv_row_max) &
800          ((col - range) >= x->mv_col_min) &
801          ((col + range) <= x->mv_col_max);
802 }
803 
is_mv_in(const MACROBLOCK * x,const MV * mv)804 static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
805   return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
806          (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
807 }
808 
809 #define CHECK_BETTER \
810   {\
811     if (thissad < bestsad) {\
812       if (use_mvcost) \
813         thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\
814       if (thissad < bestsad) {\
815         bestsad = thissad;\
816         best_site = i;\
817       }\
818     }\
819   }
820 
821 #define MAX_PATTERN_SCALES         11
822 #define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
823 #define PATTERN_CANDIDATES_REF      3  // number of refinement candidates
824 
825 // Calculate and return a sad+mvcost list around an integer best pel.
calc_int_cost_list(const MACROBLOCK * x,const MV * ref_mv,int sadpb,const vp9_variance_fn_ptr_t * fn_ptr,const MV * best_mv,int * cost_list)826 static INLINE void calc_int_cost_list(const MACROBLOCK *x,
827                                       const MV *ref_mv,
828                                       int sadpb,
829                                       const vp9_variance_fn_ptr_t *fn_ptr,
830                                       const MV *best_mv,
831                                       int *cost_list) {
832   static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};
833   const struct buf_2d *const what = &x->plane[0].src;
834   const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
835   const MV fcenter_mv = {ref_mv->row >> 3, ref_mv->col >> 3};
836   int br = best_mv->row;
837   int bc = best_mv->col;
838   MV this_mv;
839   int i;
840   unsigned int sse;
841 
842   this_mv.row = br;
843   this_mv.col = bc;
844   cost_list[0] = fn_ptr->vf(what->buf, what->stride,
845                             get_buf_from_mv(in_what, &this_mv),
846                             in_what->stride, &sse) +
847       mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
848   if (check_bounds(x, br, bc, 1)) {
849     for (i = 0; i < 4; i++) {
850       const MV this_mv = {br + neighbors[i].row,
851         bc + neighbors[i].col};
852       cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
853                                     get_buf_from_mv(in_what, &this_mv),
854                                     in_what->stride, &sse) +
855           // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
856           mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
857                       x->errorperbit);
858     }
859   } else {
860     for (i = 0; i < 4; i++) {
861       const MV this_mv = {br + neighbors[i].row,
862         bc + neighbors[i].col};
863       if (!is_mv_in(x, &this_mv))
864         cost_list[i + 1] = INT_MAX;
865       else
866         cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
867                                       get_buf_from_mv(in_what, &this_mv),
868                                       in_what->stride, &sse) +
869             // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
870             mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
871                         x->errorperbit);
872     }
873   }
874 }
875 
876 // Generic pattern search function that searches over multiple scales.
877 // Each scale can have a different number of candidates and shape of
878 // candidates as indicated in the num_candidates and candidates arrays
879 // passed into this function
880 //
vp9_pattern_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv,const int num_candidates[MAX_PATTERN_SCALES],const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES])881 static int vp9_pattern_search(const MACROBLOCK *x,
882                               MV *ref_mv,
883                               int search_param,
884                               int sad_per_bit,
885                               int do_init_search,
886                               int *cost_list,
887                               const vp9_variance_fn_ptr_t *vfp,
888                               int use_mvcost,
889                               const MV *center_mv,
890                               MV *best_mv,
891                               const int num_candidates[MAX_PATTERN_SCALES],
892                               const MV candidates[MAX_PATTERN_SCALES]
893                                                  [MAX_PATTERN_CANDIDATES]) {
894   const MACROBLOCKD *const xd = &x->e_mbd;
895   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
896     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
897   };
898   int i, s, t;
899   const struct buf_2d *const what = &x->plane[0].src;
900   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
901   int br, bc;
902   int bestsad = INT_MAX;
903   int thissad;
904   int k = -1;
905   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
906   int best_init_s = search_param_to_steps[search_param];
907   // adjust ref_mv to make sure it is within MV range
908   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
909   br = ref_mv->row;
910   bc = ref_mv->col;
911 
912   // Work out the start point for the search
913   bestsad = vfp->sdf(what->buf, what->stride,
914                      get_buf_from_mv(in_what, ref_mv), in_what->stride) +
915       mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
916 
917   // Search all possible scales upto the search param around the center point
918   // pick the scale of the point that is best as the starting scale of
919   // further steps around it.
920   if (do_init_search) {
921     s = best_init_s;
922     best_init_s = -1;
923     for (t = 0; t <= s; ++t) {
924       int best_site = -1;
925       if (check_bounds(x, br, bc, 1 << t)) {
926         for (i = 0; i < num_candidates[t]; i++) {
927           const MV this_mv = {br + candidates[t][i].row,
928                               bc + candidates[t][i].col};
929           thissad = vfp->sdf(what->buf, what->stride,
930                              get_buf_from_mv(in_what, &this_mv),
931                              in_what->stride);
932           CHECK_BETTER
933         }
934       } else {
935         for (i = 0; i < num_candidates[t]; i++) {
936           const MV this_mv = {br + candidates[t][i].row,
937                               bc + candidates[t][i].col};
938           if (!is_mv_in(x, &this_mv))
939             continue;
940           thissad = vfp->sdf(what->buf, what->stride,
941                              get_buf_from_mv(in_what, &this_mv),
942                              in_what->stride);
943           CHECK_BETTER
944         }
945       }
946       if (best_site == -1) {
947         continue;
948       } else {
949         best_init_s = t;
950         k = best_site;
951       }
952     }
953     if (best_init_s != -1) {
954       br += candidates[best_init_s][k].row;
955       bc += candidates[best_init_s][k].col;
956     }
957   }
958 
959   // If the center point is still the best, just skip this and move to
960   // the refinement step.
961   if (best_init_s != -1) {
962     int best_site = -1;
963     s = best_init_s;
964 
965     do {
966       // No need to search all 6 points the 1st time if initial search was used
967       if (!do_init_search || s != best_init_s) {
968         if (check_bounds(x, br, bc, 1 << s)) {
969           for (i = 0; i < num_candidates[s]; i++) {
970             const MV this_mv = {br + candidates[s][i].row,
971                                 bc + candidates[s][i].col};
972             thissad = vfp->sdf(what->buf, what->stride,
973                                get_buf_from_mv(in_what, &this_mv),
974                                in_what->stride);
975             CHECK_BETTER
976           }
977         } else {
978           for (i = 0; i < num_candidates[s]; i++) {
979             const MV this_mv = {br + candidates[s][i].row,
980                                 bc + candidates[s][i].col};
981             if (!is_mv_in(x, &this_mv))
982               continue;
983             thissad = vfp->sdf(what->buf, what->stride,
984                                get_buf_from_mv(in_what, &this_mv),
985                                in_what->stride);
986             CHECK_BETTER
987           }
988         }
989 
990         if (best_site == -1) {
991           continue;
992         } else {
993           br += candidates[s][best_site].row;
994           bc += candidates[s][best_site].col;
995           k = best_site;
996         }
997       }
998 
999       do {
1000         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1001         best_site = -1;
1002         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1003         next_chkpts_indices[1] = k;
1004         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1005 
1006         if (check_bounds(x, br, bc, 1 << s)) {
1007           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1008             const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
1009                                 bc + candidates[s][next_chkpts_indices[i]].col};
1010             thissad = vfp->sdf(what->buf, what->stride,
1011                                get_buf_from_mv(in_what, &this_mv),
1012                                in_what->stride);
1013             CHECK_BETTER
1014           }
1015         } else {
1016           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1017             const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
1018                                 bc + candidates[s][next_chkpts_indices[i]].col};
1019             if (!is_mv_in(x, &this_mv))
1020               continue;
1021             thissad = vfp->sdf(what->buf, what->stride,
1022                                get_buf_from_mv(in_what, &this_mv),
1023                                in_what->stride);
1024             CHECK_BETTER
1025           }
1026         }
1027 
1028         if (best_site != -1) {
1029           k = next_chkpts_indices[best_site];
1030           br += candidates[s][k].row;
1031           bc += candidates[s][k].col;
1032         }
1033       } while (best_site != -1);
1034     } while (s--);
1035   }
1036 
1037   // Returns the one-away integer pel sad values around the best as follows:
1038   // cost_list[0]: cost at the best integer pel
1039   // cost_list[1]: cost at delta {0, -1} (left)   from the best integer pel
1040   // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
1041   // cost_list[3]: cost at delta { 0, 1} (right)  from the best integer pel
1042   // cost_list[4]: cost at delta {-1, 0} (top)    from the best integer pel
1043   if (cost_list) {
1044     const MV best_mv = { br, bc };
1045     calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
1046   }
1047   best_mv->row = br;
1048   best_mv->col = bc;
1049   return bestsad;
1050 }
1051 
1052 // A specialized function where the smallest scale search candidates
1053 // are 4 1-away neighbors, and cost_list is non-null
1054 // TODO(debargha): Merge this function with the one above. Also remove
1055 // use_mvcost option since it is always 1, to save unnecessary branches.
vp9_pattern_search_sad(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv,const int num_candidates[MAX_PATTERN_SCALES],const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES])1056 static int vp9_pattern_search_sad(const MACROBLOCK *x,
1057                                   MV *ref_mv,
1058                                   int search_param,
1059                                   int sad_per_bit,
1060                                   int do_init_search,
1061                                   int *cost_list,
1062                                   const vp9_variance_fn_ptr_t *vfp,
1063                                   int use_mvcost,
1064                                   const MV *center_mv,
1065                                   MV *best_mv,
1066                                   const int num_candidates[MAX_PATTERN_SCALES],
1067                                   const MV candidates[MAX_PATTERN_SCALES]
1068                                                      [MAX_PATTERN_CANDIDATES]) {
1069   const MACROBLOCKD *const xd = &x->e_mbd;
1070   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1071     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1072   };
1073   int i, s, t;
1074   const struct buf_2d *const what = &x->plane[0].src;
1075   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1076   int br, bc;
1077   int bestsad = INT_MAX;
1078   int thissad;
1079   int k = -1;
1080   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1081   int best_init_s = search_param_to_steps[search_param];
1082   // adjust ref_mv to make sure it is within MV range
1083   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1084   br = ref_mv->row;
1085   bc = ref_mv->col;
1086   if (cost_list != NULL) {
1087     cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
1088         INT_MAX;
1089   }
1090 
1091   // Work out the start point for the search
1092   bestsad = vfp->sdf(what->buf, what->stride,
1093                      get_buf_from_mv(in_what, ref_mv), in_what->stride) +
1094       mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1095 
1096   // Search all possible scales upto the search param around the center point
1097   // pick the scale of the point that is best as the starting scale of
1098   // further steps around it.
1099   if (do_init_search) {
1100     s = best_init_s;
1101     best_init_s = -1;
1102     for (t = 0; t <= s; ++t) {
1103       int best_site = -1;
1104       if (check_bounds(x, br, bc, 1 << t)) {
1105         for (i = 0; i < num_candidates[t]; i++) {
1106           const MV this_mv = {br + candidates[t][i].row,
1107                               bc + candidates[t][i].col};
1108           thissad = vfp->sdf(what->buf, what->stride,
1109                              get_buf_from_mv(in_what, &this_mv),
1110                              in_what->stride);
1111           CHECK_BETTER
1112         }
1113       } else {
1114         for (i = 0; i < num_candidates[t]; i++) {
1115           const MV this_mv = {br + candidates[t][i].row,
1116                               bc + candidates[t][i].col};
1117           if (!is_mv_in(x, &this_mv))
1118             continue;
1119           thissad = vfp->sdf(what->buf, what->stride,
1120                              get_buf_from_mv(in_what, &this_mv),
1121                              in_what->stride);
1122           CHECK_BETTER
1123         }
1124       }
1125       if (best_site == -1) {
1126         continue;
1127       } else {
1128         best_init_s = t;
1129         k = best_site;
1130       }
1131     }
1132     if (best_init_s != -1) {
1133       br += candidates[best_init_s][k].row;
1134       bc += candidates[best_init_s][k].col;
1135     }
1136   }
1137 
1138   // If the center point is still the best, just skip this and move to
1139   // the refinement step.
1140   if (best_init_s != -1) {
1141     int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
1142     int best_site = -1;
1143     s = best_init_s;
1144 
1145     for (; s >= do_sad; s--) {
1146       if (!do_init_search || s != best_init_s) {
1147         if (check_bounds(x, br, bc, 1 << s)) {
1148           for (i = 0; i < num_candidates[s]; i++) {
1149             const MV this_mv = {br + candidates[s][i].row,
1150                                 bc + candidates[s][i].col};
1151             thissad = vfp->sdf(what->buf, what->stride,
1152                                get_buf_from_mv(in_what, &this_mv),
1153                                in_what->stride);
1154             CHECK_BETTER
1155           }
1156         } else {
1157           for (i = 0; i < num_candidates[s]; i++) {
1158             const MV this_mv = {br + candidates[s][i].row,
1159                                 bc + candidates[s][i].col};
1160             if (!is_mv_in(x, &this_mv))
1161               continue;
1162             thissad = vfp->sdf(what->buf, what->stride,
1163                                get_buf_from_mv(in_what, &this_mv),
1164                                in_what->stride);
1165             CHECK_BETTER
1166           }
1167         }
1168 
1169         if (best_site == -1) {
1170           continue;
1171         } else {
1172           br += candidates[s][best_site].row;
1173           bc += candidates[s][best_site].col;
1174           k = best_site;
1175         }
1176       }
1177 
1178       do {
1179         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1180         best_site = -1;
1181         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1182         next_chkpts_indices[1] = k;
1183         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1184 
1185         if (check_bounds(x, br, bc, 1 << s)) {
1186           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1187             const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
1188                                 bc + candidates[s][next_chkpts_indices[i]].col};
1189             thissad = vfp->sdf(what->buf, what->stride,
1190                                get_buf_from_mv(in_what, &this_mv),
1191                                in_what->stride);
1192             CHECK_BETTER
1193           }
1194         } else {
1195           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1196             const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
1197                                 bc + candidates[s][next_chkpts_indices[i]].col};
1198             if (!is_mv_in(x, &this_mv))
1199               continue;
1200             thissad = vfp->sdf(what->buf, what->stride,
1201                                get_buf_from_mv(in_what, &this_mv),
1202                                in_what->stride);
1203             CHECK_BETTER
1204           }
1205         }
1206 
1207         if (best_site != -1) {
1208           k = next_chkpts_indices[best_site];
1209           br += candidates[s][k].row;
1210           bc += candidates[s][k].col;
1211         }
1212       } while (best_site != -1);
1213     }
1214 
1215     // Note: If we enter the if below, then cost_list must be non-NULL.
1216     if (s == 0) {
1217       cost_list[0] = bestsad;
1218       if (!do_init_search || s != best_init_s) {
1219         if (check_bounds(x, br, bc, 1 << s)) {
1220           for (i = 0; i < num_candidates[s]; i++) {
1221             const MV this_mv = {br + candidates[s][i].row,
1222                                 bc + candidates[s][i].col};
1223             cost_list[i + 1] =
1224             thissad = vfp->sdf(what->buf, what->stride,
1225                                get_buf_from_mv(in_what, &this_mv),
1226                                in_what->stride);
1227             CHECK_BETTER
1228           }
1229         } else {
1230           for (i = 0; i < num_candidates[s]; i++) {
1231             const MV this_mv = {br + candidates[s][i].row,
1232                                 bc + candidates[s][i].col};
1233             if (!is_mv_in(x, &this_mv))
1234               continue;
1235             cost_list[i + 1] =
1236             thissad = vfp->sdf(what->buf, what->stride,
1237                                get_buf_from_mv(in_what, &this_mv),
1238                                in_what->stride);
1239             CHECK_BETTER
1240           }
1241         }
1242 
1243         if (best_site != -1) {
1244           br += candidates[s][best_site].row;
1245           bc += candidates[s][best_site].col;
1246           k = best_site;
1247         }
1248       }
1249       while (best_site != -1) {
1250         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1251         best_site = -1;
1252         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1253         next_chkpts_indices[1] = k;
1254         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1255         cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1256         cost_list[((k + 2) % 4) + 1] = cost_list[0];
1257         cost_list[0] = bestsad;
1258 
1259         if (check_bounds(x, br, bc, 1 << s)) {
1260           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1261             const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
1262                                 bc + candidates[s][next_chkpts_indices[i]].col};
1263             cost_list[next_chkpts_indices[i] + 1] =
1264             thissad = vfp->sdf(what->buf, what->stride,
1265                                get_buf_from_mv(in_what, &this_mv),
1266                                in_what->stride);
1267             CHECK_BETTER
1268           }
1269         } else {
1270           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1271             const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
1272                                 bc + candidates[s][next_chkpts_indices[i]].col};
1273             if (!is_mv_in(x, &this_mv)) {
1274               cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1275               continue;
1276             }
1277             cost_list[next_chkpts_indices[i] + 1] =
1278             thissad = vfp->sdf(what->buf, what->stride,
1279                                get_buf_from_mv(in_what, &this_mv),
1280                                in_what->stride);
1281             CHECK_BETTER
1282           }
1283         }
1284 
1285         if (best_site != -1) {
1286           k = next_chkpts_indices[best_site];
1287           br += candidates[s][k].row;
1288           bc += candidates[s][k].col;
1289         }
1290       }
1291     }
1292   }
1293 
1294   // Returns the one-away integer pel sad values around the best as follows:
1295   // cost_list[0]: sad at the best integer pel
1296   // cost_list[1]: sad at delta {0, -1} (left)   from the best integer pel
1297   // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
1298   // cost_list[3]: sad at delta { 0, 1} (right)  from the best integer pel
1299   // cost_list[4]: sad at delta {-1, 0} (top)    from the best integer pel
1300   if (cost_list) {
1301     static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};
1302     if (cost_list[0] == INT_MAX) {
1303       cost_list[0] = bestsad;
1304       if (check_bounds(x, br, bc, 1)) {
1305         for (i = 0; i < 4; i++) {
1306           const MV this_mv = { br + neighbors[i].row,
1307                                bc + neighbors[i].col };
1308           cost_list[i + 1] = vfp->sdf(what->buf, what->stride,
1309                                      get_buf_from_mv(in_what, &this_mv),
1310                                      in_what->stride);
1311         }
1312       } else {
1313         for (i = 0; i < 4; i++) {
1314           const MV this_mv = {br + neighbors[i].row,
1315             bc + neighbors[i].col};
1316           if (!is_mv_in(x, &this_mv))
1317             cost_list[i + 1] = INT_MAX;
1318           else
1319             cost_list[i + 1] = vfp->sdf(what->buf, what->stride,
1320                                        get_buf_from_mv(in_what, &this_mv),
1321                                        in_what->stride);
1322         }
1323       }
1324     } else {
1325       if (use_mvcost) {
1326         for (i = 0; i < 4; i++) {
1327           const MV this_mv = {br + neighbors[i].row,
1328             bc + neighbors[i].col};
1329           if (cost_list[i + 1] != INT_MAX) {
1330             cost_list[i + 1] +=
1331                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1332           }
1333         }
1334       }
1335     }
1336   }
1337   best_mv->row = br;
1338   best_mv->col = bc;
1339   return bestsad;
1340 }
1341 
vp9_get_mvpred_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const vp9_variance_fn_ptr_t * vfp,int use_mvcost)1342 int vp9_get_mvpred_var(const MACROBLOCK *x,
1343                        const MV *best_mv, const MV *center_mv,
1344                        const vp9_variance_fn_ptr_t *vfp,
1345                        int use_mvcost) {
1346   const MACROBLOCKD *const xd = &x->e_mbd;
1347   const struct buf_2d *const what = &x->plane[0].src;
1348   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1349   const MV mv = {best_mv->row * 8, best_mv->col * 8};
1350   unsigned int unused;
1351 
1352   return vfp->vf(what->buf, what->stride,
1353                  get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
1354       (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
1355                                  x->mvcost, x->errorperbit) : 0);
1356 }
1357 
vp9_get_mvpred_av_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const uint8_t * second_pred,const vp9_variance_fn_ptr_t * vfp,int use_mvcost)1358 int vp9_get_mvpred_av_var(const MACROBLOCK *x,
1359                           const MV *best_mv, const MV *center_mv,
1360                           const uint8_t *second_pred,
1361                           const vp9_variance_fn_ptr_t *vfp,
1362                           int use_mvcost) {
1363   const MACROBLOCKD *const xd = &x->e_mbd;
1364   const struct buf_2d *const what = &x->plane[0].src;
1365   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1366   const MV mv = {best_mv->row * 8, best_mv->col * 8};
1367   unsigned int unused;
1368 
1369   return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
1370                    what->buf, what->stride, &unused, second_pred) +
1371       (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
1372                                  x->mvcost, x->errorperbit) : 0);
1373 }
1374 
hex_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1375 static int hex_search(const MACROBLOCK *x,
1376                       MV *ref_mv,
1377                       int search_param,
1378                       int sad_per_bit,
1379                       int do_init_search,
1380                       int *cost_list,
1381                       const vp9_variance_fn_ptr_t *vfp,
1382                       int use_mvcost,
1383                       const MV *center_mv, MV *best_mv) {
1384   // First scale has 8-closest points, the rest have 6 points in hex shape
1385   // at increasing scales
1386   static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
1387     8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
1388   };
1389   // Note that the largest candidate step at each scale is 2^scale
1390   static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1391     {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
1392     {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
1393     {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
1394     {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
1395     {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
1396     {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
1397     {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
1398     {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
1399     {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
1400     {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
1401     {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
1402       { -1024, 0}},
1403   };
1404   return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
1405                             do_init_search, cost_list, vfp, use_mvcost,
1406                             center_mv, best_mv,
1407                             hex_num_candidates, hex_candidates);
1408 }
1409 
bigdia_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1410 static int bigdia_search(const MACROBLOCK *x,
1411                          MV *ref_mv,
1412                          int search_param,
1413                          int sad_per_bit,
1414                          int do_init_search,
1415                          int *cost_list,
1416                          const vp9_variance_fn_ptr_t *vfp,
1417                          int use_mvcost,
1418                          const MV *center_mv,
1419                          MV *best_mv) {
1420   // First scale has 4-closest points, the rest have 8 points in diamond
1421   // shape at increasing scales
1422   static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
1423     4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1424   };
1425   // Note that the largest candidate step at each scale is 2^scale
1426   static const MV bigdia_candidates[MAX_PATTERN_SCALES]
1427                                    [MAX_PATTERN_CANDIDATES] = {
1428     {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
1429     {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
1430     {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
1431     {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
1432     {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
1433     {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
1434       {-16, 16}, {-32, 0}},
1435     {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
1436       {-32, 32}, {-64, 0}},
1437     {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
1438       {-64, 64}, {-128, 0}},
1439     {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
1440       {-128, 128}, {-256, 0}},
1441     {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
1442       {-256, 256}, {-512, 0}},
1443     {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
1444       {-512, 512}, {-1024, 0}},
1445   };
1446   return vp9_pattern_search_sad(x, ref_mv, search_param, sad_per_bit,
1447                                 do_init_search, cost_list, vfp, use_mvcost,
1448                                 center_mv, best_mv,
1449                                 bigdia_num_candidates, bigdia_candidates);
1450 }
1451 
square_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1452 static int square_search(const MACROBLOCK *x,
1453                          MV *ref_mv,
1454                          int search_param,
1455                          int sad_per_bit,
1456                          int do_init_search,
1457                          int *cost_list,
1458                          const vp9_variance_fn_ptr_t *vfp,
1459                          int use_mvcost,
1460                          const MV *center_mv,
1461                          MV *best_mv) {
1462   // All scales have 8 closest points in square shape
1463   static const int square_num_candidates[MAX_PATTERN_SCALES] = {
1464     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1465   };
1466   // Note that the largest candidate step at each scale is 2^scale
1467   static const MV square_candidates[MAX_PATTERN_SCALES]
1468                                    [MAX_PATTERN_CANDIDATES] = {
1469     {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
1470     {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
1471     {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
1472     {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
1473     {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
1474       {-16, 16}, {-16, 0}},
1475     {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
1476       {-32, 32}, {-32, 0}},
1477     {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
1478       {-64, 64}, {-64, 0}},
1479     {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
1480       {-128, 128}, {-128, 0}},
1481     {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
1482       {-256, 256}, {-256, 0}},
1483     {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
1484       {-512, 512}, {-512, 0}},
1485     {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
1486       {0, 1024}, {-1024, 1024}, {-1024, 0}},
1487   };
1488   return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
1489                             do_init_search, cost_list, vfp, use_mvcost,
1490                             center_mv, best_mv,
1491                             square_num_candidates, square_candidates);
1492 }
1493 
fast_hex_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1494 static int fast_hex_search(const MACROBLOCK *x,
1495                            MV *ref_mv,
1496                            int search_param,
1497                            int sad_per_bit,
1498                            int do_init_search,  // must be zero for fast_hex
1499                            int *cost_list,
1500                            const vp9_variance_fn_ptr_t *vfp,
1501                            int use_mvcost,
1502                            const MV *center_mv,
1503                            MV *best_mv) {
1504   return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1505                     sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1506                     center_mv, best_mv);
1507 }
1508 
fast_dia_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1509 static int fast_dia_search(const MACROBLOCK *x,
1510                            MV *ref_mv,
1511                            int search_param,
1512                            int sad_per_bit,
1513                            int do_init_search,
1514                            int *cost_list,
1515                            const vp9_variance_fn_ptr_t *vfp,
1516                            int use_mvcost,
1517                            const MV *center_mv,
1518                            MV *best_mv) {
1519   return bigdia_search(
1520       x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit,
1521       do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv);
1522 }
1523 
1524 #undef CHECK_BETTER
1525 
vp9_full_range_search_c(const MACROBLOCK * x,const search_site_config * cfg,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)1526 int vp9_full_range_search_c(const MACROBLOCK *x,
1527                             const search_site_config *cfg,
1528                             MV *ref_mv, MV *best_mv,
1529                             int search_param, int sad_per_bit, int *num00,
1530                             const vp9_variance_fn_ptr_t *fn_ptr,
1531                             const MV *center_mv) {
1532   const MACROBLOCKD *const xd = &x->e_mbd;
1533   const struct buf_2d *const what = &x->plane[0].src;
1534   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1535   const int range = 64;
1536   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1537   unsigned int best_sad = INT_MAX;
1538   int r, c, i;
1539   int start_col, end_col, start_row, end_row;
1540 
1541   // The cfg and search_param parameters are not used in this search variant
1542   (void)cfg;
1543   (void)search_param;
1544 
1545   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1546   *best_mv = *ref_mv;
1547   *num00 = 11;
1548   best_sad = fn_ptr->sdf(what->buf, what->stride,
1549                          get_buf_from_mv(in_what, ref_mv), in_what->stride) +
1550                  mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1551   start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row);
1552   start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col);
1553   end_row = VPXMIN(range, x->mv_row_max - ref_mv->row);
1554   end_col = VPXMIN(range, x->mv_col_max - ref_mv->col);
1555 
1556   for (r = start_row; r <= end_row; ++r) {
1557     for (c = start_col; c <= end_col; c += 4) {
1558       if (c + 3 <= end_col) {
1559         unsigned int sads[4];
1560         const uint8_t *addrs[4];
1561         for (i = 0; i < 4; ++i) {
1562           const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
1563           addrs[i] = get_buf_from_mv(in_what, &mv);
1564         }
1565 
1566         fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
1567 
1568         for (i = 0; i < 4; ++i) {
1569           if (sads[i] < best_sad) {
1570             const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
1571             const unsigned int sad = sads[i] +
1572                 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1573             if (sad < best_sad) {
1574               best_sad = sad;
1575               *best_mv = mv;
1576             }
1577           }
1578         }
1579       } else {
1580         for (i = 0; i < end_col - c; ++i) {
1581           const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
1582           unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1583               get_buf_from_mv(in_what, &mv), in_what->stride);
1584           if (sad < best_sad) {
1585             sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1586             if (sad < best_sad) {
1587               best_sad = sad;
1588               *best_mv = mv;
1589             }
1590           }
1591         }
1592       }
1593     }
1594   }
1595 
1596   return best_sad;
1597 }
1598 
vp9_diamond_search_sad_c(const MACROBLOCK * x,const search_site_config * cfg,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)1599 int vp9_diamond_search_sad_c(const MACROBLOCK *x,
1600                              const search_site_config *cfg,
1601                              MV *ref_mv, MV *best_mv, int search_param,
1602                              int sad_per_bit, int *num00,
1603                              const vp9_variance_fn_ptr_t *fn_ptr,
1604                              const MV *center_mv) {
1605   int i, j, step;
1606 
1607   const MACROBLOCKD *const xd = &x->e_mbd;
1608   uint8_t *what = x->plane[0].src.buf;
1609   const int what_stride = x->plane[0].src.stride;
1610   const uint8_t *in_what;
1611   const int in_what_stride = xd->plane[0].pre[0].stride;
1612   const uint8_t *best_address;
1613 
1614   unsigned int bestsad = INT_MAX;
1615   int best_site = 0;
1616   int last_site = 0;
1617 
1618   int ref_row;
1619   int ref_col;
1620 
1621   // search_param determines the length of the initial step and hence the number
1622   // of iterations.
1623   // 0 = initial step (MAX_FIRST_STEP) pel
1624   // 1 = (MAX_FIRST_STEP/2) pel,
1625   // 2 = (MAX_FIRST_STEP/4) pel...
1626   const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
1627   const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
1628 
1629   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1630   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1631   ref_row = ref_mv->row;
1632   ref_col = ref_mv->col;
1633   *num00 = 0;
1634   best_mv->row = ref_row;
1635   best_mv->col = ref_col;
1636 
1637   // Work out the start point for the search
1638   in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1639   best_address = in_what;
1640 
1641   // Check the starting position
1642   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
1643                 + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
1644 
1645   i = 1;
1646 
1647   for (step = 0; step < tot_steps; step++) {
1648     int all_in = 1, t;
1649 
1650     // All_in is true if every one of the points we are checking are within
1651     // the bounds of the image.
1652     all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
1653     all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
1654     all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
1655     all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
1656 
1657     // If all the pixels are within the bounds we don't check whether the
1658     // search point is valid in this loop,  otherwise we check each point
1659     // for validity..
1660     if (all_in) {
1661       unsigned int sad_array[4];
1662 
1663       for (j = 0; j < cfg->searches_per_step; j += 4) {
1664         unsigned char const *block_offset[4];
1665 
1666         for (t = 0; t < 4; t++)
1667           block_offset[t] = ss[i + t].offset + best_address;
1668 
1669         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1670                        sad_array);
1671 
1672         for (t = 0; t < 4; t++, i++) {
1673           if (sad_array[t] < bestsad) {
1674             const MV this_mv = {best_mv->row + ss[i].mv.row,
1675                                 best_mv->col + ss[i].mv.col};
1676             sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
1677                                            sad_per_bit);
1678             if (sad_array[t] < bestsad) {
1679               bestsad = sad_array[t];
1680               best_site = i;
1681             }
1682           }
1683         }
1684       }
1685     } else {
1686       for (j = 0; j < cfg->searches_per_step; j++) {
1687         // Trap illegal vectors
1688         const MV this_mv = {best_mv->row + ss[i].mv.row,
1689                             best_mv->col + ss[i].mv.col};
1690 
1691         if (is_mv_in(x, &this_mv)) {
1692           const uint8_t *const check_here = ss[i].offset + best_address;
1693           unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
1694                                              in_what_stride);
1695 
1696           if (thissad < bestsad) {
1697             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1698             if (thissad < bestsad) {
1699               bestsad = thissad;
1700               best_site = i;
1701             }
1702           }
1703         }
1704         i++;
1705       }
1706     }
1707     if (best_site != last_site) {
1708       best_mv->row += ss[best_site].mv.row;
1709       best_mv->col += ss[best_site].mv.col;
1710       best_address += ss[best_site].offset;
1711       last_site = best_site;
1712 #if defined(NEW_DIAMOND_SEARCH)
1713       while (1) {
1714         const MV this_mv = {best_mv->row + ss[best_site].mv.row,
1715                             best_mv->col + ss[best_site].mv.col};
1716         if (is_mv_in(x, &this_mv)) {
1717           const uint8_t *const check_here = ss[best_site].offset + best_address;
1718           unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
1719                                              in_what_stride);
1720           if (thissad < bestsad) {
1721             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1722             if (thissad < bestsad) {
1723               bestsad = thissad;
1724               best_mv->row += ss[best_site].mv.row;
1725               best_mv->col += ss[best_site].mv.col;
1726               best_address += ss[best_site].offset;
1727               continue;
1728             }
1729           }
1730         }
1731         break;
1732       }
1733 #endif
1734     } else if (best_address == in_what) {
1735       (*num00)++;
1736     }
1737   }
1738   return bestsad;
1739 }
1740 
vector_match(int16_t * ref,int16_t * src,int bwl)1741 static int vector_match(int16_t *ref, int16_t *src, int bwl) {
1742   int best_sad = INT_MAX;
1743   int this_sad;
1744   int d;
1745   int center, offset = 0;
1746   int bw = 4 << bwl;  // redundant variable, to be changed in the experiments.
1747   for (d = 0; d <= bw; d += 16) {
1748     this_sad = vp9_vector_var(&ref[d], src, bwl);
1749     if (this_sad < best_sad) {
1750       best_sad = this_sad;
1751       offset = d;
1752     }
1753   }
1754   center = offset;
1755 
1756   for (d = -8; d <= 8; d += 16) {
1757     int this_pos = offset + d;
1758     // check limit
1759     if (this_pos < 0 || this_pos > bw)
1760       continue;
1761     this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
1762     if (this_sad < best_sad) {
1763       best_sad = this_sad;
1764       center = this_pos;
1765     }
1766   }
1767   offset = center;
1768 
1769   for (d = -4; d <= 4; d += 8) {
1770     int this_pos = offset + d;
1771     // check limit
1772     if (this_pos < 0 || this_pos > bw)
1773       continue;
1774     this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
1775     if (this_sad < best_sad) {
1776       best_sad = this_sad;
1777       center = this_pos;
1778     }
1779   }
1780   offset = center;
1781 
1782   for (d = -2; d <= 2; d += 4) {
1783     int this_pos = offset + d;
1784     // check limit
1785     if (this_pos < 0 || this_pos > bw)
1786       continue;
1787     this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
1788     if (this_sad < best_sad) {
1789       best_sad = this_sad;
1790       center = this_pos;
1791     }
1792   }
1793   offset = center;
1794 
1795   for (d = -1; d <= 1; d += 2) {
1796     int this_pos = offset + d;
1797     // check limit
1798     if (this_pos < 0 || this_pos > bw)
1799       continue;
1800     this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
1801     if (this_sad < best_sad) {
1802       best_sad = this_sad;
1803       center = this_pos;
1804     }
1805   }
1806 
1807   return (center - (bw >> 1));
1808 }
1809 
1810 static const MV search_pos[4] = {
1811     {-1, 0}, {0, -1}, {0, 1}, {1, 0},
1812 };
1813 
vp9_int_pro_motion_estimation(const VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col)1814 unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
1815                                            BLOCK_SIZE bsize,
1816                                            int mi_row, int mi_col) {
1817   MACROBLOCKD *xd = &x->e_mbd;
1818   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1819   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
1820   DECLARE_ALIGNED(16, int16_t, hbuf[128]);
1821   DECLARE_ALIGNED(16, int16_t, vbuf[128]);
1822   DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
1823   DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
1824   int idx;
1825   const int bw = 4 << b_width_log2_lookup[bsize];
1826   const int bh = 4 << b_height_log2_lookup[bsize];
1827   const int search_width = bw << 1;
1828   const int search_height = bh << 1;
1829   const int src_stride = x->plane[0].src.stride;
1830   const int ref_stride = xd->plane[0].pre[0].stride;
1831   uint8_t const *ref_buf, *src_buf;
1832   MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
1833   unsigned int best_sad, tmp_sad, this_sad[4];
1834   MV this_mv;
1835   const int norm_factor = 3 + (bw >> 5);
1836   const YV12_BUFFER_CONFIG *scaled_ref_frame =
1837       vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
1838 
1839   if (scaled_ref_frame) {
1840     int i;
1841     // Swap out the reference frame for a version that's been scaled to
1842     // match the resolution of the current frame, allowing the existing
1843     // motion search code to be used without additional modifications.
1844     for (i = 0; i < MAX_MB_PLANE; i++)
1845       backup_yv12[i] = xd->plane[i].pre[0];
1846     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
1847   }
1848 
1849 #if CONFIG_VP9_HIGHBITDEPTH
1850   {
1851     unsigned int this_sad;
1852     tmp_mv->row = 0;
1853     tmp_mv->col = 0;
1854     this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
1855                                       xd->plane[0].pre[0].buf, ref_stride);
1856 
1857     if (scaled_ref_frame) {
1858       int i;
1859       for (i = 0; i < MAX_MB_PLANE; i++)
1860         xd->plane[i].pre[0] = backup_yv12[i];
1861     }
1862     return this_sad;
1863   }
1864 #endif
1865 
1866   // Set up prediction 1-D reference set
1867   ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
1868   for (idx = 0; idx < search_width; idx += 16) {
1869     vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
1870     ref_buf += 16;
1871   }
1872 
1873   ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
1874   for (idx = 0; idx < search_height; ++idx) {
1875     vbuf[idx] = vp9_int_pro_col(ref_buf, bw) >> norm_factor;
1876     ref_buf += ref_stride;
1877   }
1878 
1879   // Set up src 1-D reference set
1880   for (idx = 0; idx < bw; idx += 16) {
1881     src_buf = x->plane[0].src.buf + idx;
1882     vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
1883   }
1884 
1885   src_buf = x->plane[0].src.buf;
1886   for (idx = 0; idx < bh; ++idx) {
1887     src_vbuf[idx] = vp9_int_pro_col(src_buf, bw) >> norm_factor;
1888     src_buf += src_stride;
1889   }
1890 
1891   // Find the best match per 1-D search
1892   tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
1893   tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
1894 
1895   this_mv = *tmp_mv;
1896   src_buf = x->plane[0].src.buf;
1897   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
1898   best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
1899 
1900   {
1901     const uint8_t * const pos[4] = {
1902         ref_buf - ref_stride,
1903         ref_buf - 1,
1904         ref_buf + 1,
1905         ref_buf + ref_stride,
1906     };
1907 
1908     cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
1909   }
1910 
1911   for (idx = 0; idx < 4; ++idx) {
1912     if (this_sad[idx] < best_sad) {
1913       best_sad = this_sad[idx];
1914       tmp_mv->row = search_pos[idx].row + this_mv.row;
1915       tmp_mv->col = search_pos[idx].col + this_mv.col;
1916     }
1917   }
1918 
1919   if (this_sad[0] < this_sad[3])
1920     this_mv.row -= 1;
1921   else
1922     this_mv.row += 1;
1923 
1924   if (this_sad[1] < this_sad[2])
1925     this_mv.col -= 1;
1926   else
1927     this_mv.col += 1;
1928 
1929   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
1930 
1931   tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride,
1932                                    ref_buf, ref_stride);
1933   if (best_sad > tmp_sad) {
1934     *tmp_mv = this_mv;
1935     best_sad = tmp_sad;
1936   }
1937 
1938   tmp_mv->row *= 8;
1939   tmp_mv->col *= 8;
1940 
1941   if (scaled_ref_frame) {
1942     int i;
1943     for (i = 0; i < MAX_MB_PLANE; i++)
1944       xd->plane[i].pre[0] = backup_yv12[i];
1945   }
1946 
1947   return best_sad;
1948 }
1949 
1950 // Runs sequence of diamond searches in smaller steps for RD.
1951 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
1952               point as the best match, we will do a final 1-away diamond
1953               refining search  */
full_pixel_diamond(const VP9_COMP * cpi,MACROBLOCK * x,MV * mvp_full,int step_param,int sadpb,int further_steps,int do_refine,int * cost_list,const vp9_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,MV * dst_mv)1954 static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
1955                               MV *mvp_full, int step_param,
1956                               int sadpb, int further_steps, int do_refine,
1957                               int *cost_list,
1958                               const vp9_variance_fn_ptr_t *fn_ptr,
1959                               const MV *ref_mv, MV *dst_mv) {
1960   MV temp_mv;
1961   int thissme, n, num00 = 0;
1962   int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1963                                         step_param, sadpb, &n,
1964                                         fn_ptr, ref_mv);
1965   if (bestsme < INT_MAX)
1966     bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1967   *dst_mv = temp_mv;
1968 
1969   // If there won't be more n-step search, check to see if refining search is
1970   // needed.
1971   if (n > further_steps)
1972     do_refine = 0;
1973 
1974   while (n < further_steps) {
1975     ++n;
1976 
1977     if (num00) {
1978       num00--;
1979     } else {
1980       thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1981                                         step_param + n, sadpb, &num00,
1982                                         fn_ptr, ref_mv);
1983       if (thissme < INT_MAX)
1984         thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1985 
1986       // check to see if refining search is needed.
1987       if (num00 > further_steps - n)
1988         do_refine = 0;
1989 
1990       if (thissme < bestsme) {
1991         bestsme = thissme;
1992         *dst_mv = temp_mv;
1993       }
1994     }
1995   }
1996 
1997   // final 1-away diamond refining search
1998   if (do_refine) {
1999     const int search_range = 8;
2000     MV best_mv = *dst_mv;
2001     thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range,
2002                                        fn_ptr, ref_mv);
2003     if (thissme < INT_MAX)
2004       thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
2005     if (thissme < bestsme) {
2006       bestsme = thissme;
2007       *dst_mv = best_mv;
2008     }
2009   }
2010 
2011   // Return cost list.
2012   if (cost_list) {
2013     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
2014   }
2015   return bestsme;
2016 }
2017 
vp9_full_search_sad_c(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,MV * best_mv)2018 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
2019                           int sad_per_bit, int distance,
2020                           const vp9_variance_fn_ptr_t *fn_ptr,
2021                           const MV *center_mv, MV *best_mv) {
2022   int r, c;
2023   const MACROBLOCKD *const xd = &x->e_mbd;
2024   const struct buf_2d *const what = &x->plane[0].src;
2025   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2026   const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
2027   const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
2028   const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
2029   const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
2030   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
2031   int best_sad = fn_ptr->sdf(what->buf, what->stride,
2032       get_buf_from_mv(in_what, ref_mv), in_what->stride) +
2033       mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2034   *best_mv = *ref_mv;
2035 
2036   for (r = row_min; r < row_max; ++r) {
2037     for (c = col_min; c < col_max; ++c) {
2038       const MV mv = {r, c};
2039       const int sad = fn_ptr->sdf(what->buf, what->stride,
2040           get_buf_from_mv(in_what, &mv), in_what->stride) +
2041               mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2042       if (sad < best_sad) {
2043         best_sad = sad;
2044         *best_mv = mv;
2045       }
2046     }
2047   }
2048   return best_sad;
2049 }
2050 
vp9_full_search_sadx3(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,MV * best_mv)2051 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
2052                           int sad_per_bit, int distance,
2053                           const vp9_variance_fn_ptr_t *fn_ptr,
2054                           const MV *center_mv, MV *best_mv) {
2055   int r;
2056   const MACROBLOCKD *const xd = &x->e_mbd;
2057   const struct buf_2d *const what = &x->plane[0].src;
2058   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2059   const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
2060   const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
2061   const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
2062   const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
2063   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
2064   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
2065       get_buf_from_mv(in_what, ref_mv), in_what->stride) +
2066       mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2067   *best_mv = *ref_mv;
2068 
2069   for (r = row_min; r < row_max; ++r) {
2070     int c = col_min;
2071     const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2072 
2073     if (fn_ptr->sdx3f != NULL) {
2074       while ((c + 2) < col_max) {
2075         int i;
2076         DECLARE_ALIGNED(16, uint32_t, sads[3]);
2077 
2078         fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2079                       sads);
2080 
2081         for (i = 0; i < 3; ++i) {
2082           unsigned int sad = sads[i];
2083           if (sad < best_sad) {
2084             const MV mv = {r, c};
2085             sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2086             if (sad < best_sad) {
2087               best_sad = sad;
2088               *best_mv = mv;
2089             }
2090           }
2091           ++check_here;
2092           ++c;
2093         }
2094       }
2095     }
2096 
2097     while (c < col_max) {
2098       unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
2099                                      check_here, in_what->stride);
2100       if (sad < best_sad) {
2101         const MV mv = {r, c};
2102         sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2103         if (sad < best_sad) {
2104           best_sad = sad;
2105           *best_mv = mv;
2106         }
2107       }
2108       ++check_here;
2109       ++c;
2110     }
2111   }
2112 
2113   return best_sad;
2114 }
2115 
vp9_full_search_sadx8(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,MV * best_mv)2116 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
2117                           int sad_per_bit, int distance,
2118                           const vp9_variance_fn_ptr_t *fn_ptr,
2119                           const MV *center_mv, MV *best_mv) {
2120   int r;
2121   const MACROBLOCKD *const xd = &x->e_mbd;
2122   const struct buf_2d *const what = &x->plane[0].src;
2123   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2124   const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
2125   const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
2126   const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
2127   const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
2128   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
2129   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
2130       get_buf_from_mv(in_what, ref_mv), in_what->stride) +
2131       mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2132   *best_mv = *ref_mv;
2133 
2134   for (r = row_min; r < row_max; ++r) {
2135     int c = col_min;
2136     const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2137 
2138     if (fn_ptr->sdx8f != NULL) {
2139       while ((c + 7) < col_max) {
2140         int i;
2141         DECLARE_ALIGNED(16, uint32_t, sads[8]);
2142 
2143         fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
2144                       sads);
2145 
2146         for (i = 0; i < 8; ++i) {
2147           unsigned int sad = sads[i];
2148           if (sad < best_sad) {
2149             const MV mv = {r, c};
2150             sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2151             if (sad < best_sad) {
2152               best_sad = sad;
2153               *best_mv = mv;
2154             }
2155           }
2156           ++check_here;
2157           ++c;
2158         }
2159       }
2160     }
2161 
2162     if (fn_ptr->sdx3f != NULL) {
2163       while ((c + 2) < col_max) {
2164         int i;
2165         DECLARE_ALIGNED(16, uint32_t, sads[3]);
2166 
2167         fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2168                       sads);
2169 
2170         for (i = 0; i < 3; ++i) {
2171           unsigned int sad = sads[i];
2172           if (sad < best_sad) {
2173             const MV mv = {r, c};
2174             sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2175             if (sad < best_sad) {
2176               best_sad = sad;
2177               *best_mv = mv;
2178             }
2179           }
2180           ++check_here;
2181           ++c;
2182         }
2183       }
2184     }
2185 
2186     while (c < col_max) {
2187       unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
2188                                      check_here, in_what->stride);
2189       if (sad < best_sad) {
2190         const MV mv = {r, c};
2191         sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2192         if (sad < best_sad) {
2193           best_sad = sad;
2194           *best_mv = mv;
2195         }
2196       }
2197       ++check_here;
2198       ++c;
2199     }
2200   }
2201 
2202   return best_sad;
2203 }
2204 
vp9_refining_search_sad(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)2205 int vp9_refining_search_sad(const MACROBLOCK *x,
2206                             MV *ref_mv, int error_per_bit,
2207                             int search_range,
2208                             const vp9_variance_fn_ptr_t *fn_ptr,
2209                             const MV *center_mv) {
2210   const MACROBLOCKD *const xd = &x->e_mbd;
2211   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
2212   const struct buf_2d *const what = &x->plane[0].src;
2213   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2214   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
2215   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
2216   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
2217                                     in_what->stride) +
2218       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2219   int i, j;
2220 
2221   for (i = 0; i < search_range; i++) {
2222     int best_site = -1;
2223     const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
2224                        ((ref_mv->row + 1) < x->mv_row_max) &
2225                        ((ref_mv->col - 1) > x->mv_col_min) &
2226                        ((ref_mv->col + 1) < x->mv_col_max);
2227 
2228     if (all_in) {
2229       unsigned int sads[4];
2230       const uint8_t *const positions[4] = {
2231         best_address - in_what->stride,
2232         best_address - 1,
2233         best_address + 1,
2234         best_address + in_what->stride
2235       };
2236 
2237       fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
2238 
2239       for (j = 0; j < 4; ++j) {
2240         if (sads[j] < best_sad) {
2241           const MV mv = {ref_mv->row + neighbors[j].row,
2242                          ref_mv->col + neighbors[j].col};
2243           sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2244           if (sads[j] < best_sad) {
2245             best_sad = sads[j];
2246             best_site = j;
2247           }
2248         }
2249       }
2250     } else {
2251       for (j = 0; j < 4; ++j) {
2252         const MV mv = {ref_mv->row + neighbors[j].row,
2253                        ref_mv->col + neighbors[j].col};
2254 
2255         if (is_mv_in(x, &mv)) {
2256           unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
2257                                          get_buf_from_mv(in_what, &mv),
2258                                          in_what->stride);
2259           if (sad < best_sad) {
2260             sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2261             if (sad < best_sad) {
2262               best_sad = sad;
2263               best_site = j;
2264             }
2265           }
2266         }
2267       }
2268     }
2269 
2270     if (best_site == -1) {
2271       break;
2272     } else {
2273       ref_mv->row += neighbors[best_site].row;
2274       ref_mv->col += neighbors[best_site].col;
2275       best_address = get_buf_from_mv(in_what, ref_mv);
2276     }
2277   }
2278 
2279   return best_sad;
2280 }
2281 
2282 // This function is called when we do joint motion search in comp_inter_inter
2283 // mode.
vp9_refining_search_8p_c(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,const uint8_t * second_pred)2284 int vp9_refining_search_8p_c(const MACROBLOCK *x,
2285                              MV *ref_mv, int error_per_bit,
2286                              int search_range,
2287                              const vp9_variance_fn_ptr_t *fn_ptr,
2288                              const MV *center_mv,
2289                              const uint8_t *second_pred) {
2290   const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
2291                            {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
2292   const MACROBLOCKD *const xd = &x->e_mbd;
2293   const struct buf_2d *const what = &x->plane[0].src;
2294   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2295   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
2296   unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
2297       get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) +
2298       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2299   int i, j;
2300 
2301   for (i = 0; i < search_range; ++i) {
2302     int best_site = -1;
2303 
2304     for (j = 0; j < 8; ++j) {
2305       const MV mv = {ref_mv->row + neighbors[j].row,
2306                      ref_mv->col + neighbors[j].col};
2307 
2308       if (is_mv_in(x, &mv)) {
2309         unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
2310             get_buf_from_mv(in_what, &mv), in_what->stride, second_pred);
2311         if (sad < best_sad) {
2312           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2313           if (sad < best_sad) {
2314             best_sad = sad;
2315             best_site = j;
2316           }
2317         }
2318       }
2319     }
2320 
2321     if (best_site == -1) {
2322       break;
2323     } else {
2324       ref_mv->row += neighbors[best_site].row;
2325       ref_mv->col += neighbors[best_site].col;
2326     }
2327   }
2328   return best_sad;
2329 }
2330 
vp9_full_pixel_search(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,MV * mvp_full,int step_param,int error_per_bit,int * cost_list,const MV * ref_mv,MV * tmp_mv,int var_max,int rd)2331 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
2332                           BLOCK_SIZE bsize, MV *mvp_full,
2333                           int step_param, int error_per_bit,
2334                           int *cost_list,
2335                           const MV *ref_mv, MV *tmp_mv,
2336                           int var_max, int rd) {
2337   const SPEED_FEATURES *const sf = &cpi->sf;
2338   const SEARCH_METHODS method = sf->mv.search_method;
2339   vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
2340   int var = 0;
2341   if (cost_list) {
2342     cost_list[0] = INT_MAX;
2343     cost_list[1] = INT_MAX;
2344     cost_list[2] = INT_MAX;
2345     cost_list[3] = INT_MAX;
2346     cost_list[4] = INT_MAX;
2347   }
2348 
2349   switch (method) {
2350     case FAST_DIAMOND:
2351       var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
2352                             cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2353       break;
2354     case FAST_HEX:
2355       var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
2356                             cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2357       break;
2358     case HEX:
2359       var = hex_search(x, mvp_full, step_param, error_per_bit, 1,
2360                        cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2361       break;
2362     case SQUARE:
2363       var = square_search(x, mvp_full, step_param, error_per_bit, 1,
2364                           cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2365       break;
2366     case BIGDIA:
2367       var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
2368                           cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2369       break;
2370     case NSTEP:
2371       var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
2372                                MAX_MVSEARCH_STEPS - 1 - step_param,
2373                                1, cost_list, fn_ptr, ref_mv, tmp_mv);
2374       break;
2375     default:
2376       assert(0 && "Invalid search method.");
2377   }
2378 
2379   if (method != NSTEP && rd && var < var_max)
2380     var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
2381 
2382   return var;
2383 }
2384