1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <limits.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vpx_config.h"
16
17 #include "vpx_mem/vpx_mem.h"
18
19 #include "vp9/common/vp9_common.h"
20
21 #include "vp9/encoder/vp9_encoder.h"
22 #include "vp9/encoder/vp9_mcomp.h"
23
24 // #define NEW_DIAMOND_SEARCH
25
get_buf_from_mv(const struct buf_2d * buf,const MV * mv)26 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
27 const MV *mv) {
28 return &buf->buf[mv->row * buf->stride + mv->col];
29 }
30
vp9_set_mv_search_range(MACROBLOCK * x,const MV * mv)31 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
32 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
33 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
34 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
35 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
36
37 col_min = MAX(col_min, (MV_LOW >> 3) + 1);
38 row_min = MAX(row_min, (MV_LOW >> 3) + 1);
39 col_max = MIN(col_max, (MV_UPP >> 3) - 1);
40 row_max = MIN(row_max, (MV_UPP >> 3) - 1);
41
42 // Get intersection of UMV window and valid MV window to reduce # of checks
43 // in diamond search.
44 if (x->mv_col_min < col_min)
45 x->mv_col_min = col_min;
46 if (x->mv_col_max > col_max)
47 x->mv_col_max = col_max;
48 if (x->mv_row_min < row_min)
49 x->mv_row_min = row_min;
50 if (x->mv_row_max > row_max)
51 x->mv_row_max = row_max;
52 }
53
vp9_init_search_range(int size)54 int vp9_init_search_range(int size) {
55 int sr = 0;
56 // Minimum search size no matter what the passed in value.
57 size = MAX(16, size);
58
59 while ((size << sr) < MAX_FULL_PEL_VAL)
60 sr++;
61
62 sr = MIN(sr, MAX_MVSEARCH_STEPS - 2);
63 return sr;
64 }
65
mv_cost(const MV * mv,const int * joint_cost,int * const comp_cost[2])66 static INLINE int mv_cost(const MV *mv,
67 const int *joint_cost, int *const comp_cost[2]) {
68 return joint_cost[vp9_get_mv_joint(mv)] +
69 comp_cost[0][mv->row] + comp_cost[1][mv->col];
70 }
71
vp9_mv_bit_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int weight)72 int vp9_mv_bit_cost(const MV *mv, const MV *ref,
73 const int *mvjcost, int *mvcost[2], int weight) {
74 const MV diff = { mv->row - ref->row,
75 mv->col - ref->col };
76 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
77 }
78
mv_err_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int error_per_bit)79 static int mv_err_cost(const MV *mv, const MV *ref,
80 const int *mvjcost, int *mvcost[2],
81 int error_per_bit) {
82 if (mvcost) {
83 const MV diff = { mv->row - ref->row,
84 mv->col - ref->col };
85 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
86 error_per_bit, 13);
87 }
88 return 0;
89 }
90
mvsad_err_cost(const MACROBLOCK * x,const MV * mv,const MV * ref,int error_per_bit)91 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
92 int error_per_bit) {
93 if (x->nmvsadcost) {
94 const MV diff = { mv->row - ref->row,
95 mv->col - ref->col };
96 return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
97 x->nmvsadcost) * error_per_bit, 8);
98 }
99 return 0;
100 }
101
vp9_init_dsmotion_compensation(search_site_config * cfg,int stride)102 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
103 int len, ss_count = 1;
104
105 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
106 cfg->ss[0].offset = 0;
107
108 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
109 // Generate offsets for 4 search sites per step.
110 const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
111 int i;
112 for (i = 0; i < 4; ++i) {
113 search_site *const ss = &cfg->ss[ss_count++];
114 ss->mv = ss_mvs[i];
115 ss->offset = ss->mv.row * stride + ss->mv.col;
116 }
117 }
118
119 cfg->ss_count = ss_count;
120 cfg->searches_per_step = 4;
121 }
122
vp9_init3smotion_compensation(search_site_config * cfg,int stride)123 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
124 int len, ss_count = 1;
125
126 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
127 cfg->ss[0].offset = 0;
128
129 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
130 // Generate offsets for 8 search sites per step.
131 const MV ss_mvs[8] = {
132 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len},
133 {-len, -len}, {-len, len}, {len, -len}, {len, len}
134 };
135 int i;
136 for (i = 0; i < 8; ++i) {
137 search_site *const ss = &cfg->ss[ss_count++];
138 ss->mv = ss_mvs[i];
139 ss->offset = ss->mv.row * stride + ss->mv.col;
140 }
141 }
142
143 cfg->ss_count = ss_count;
144 cfg->searches_per_step = 8;
145 }
146
147 /*
148 * To avoid the penalty for crossing cache-line read, preload the reference
149 * area in a small buffer, which is aligned to make sure there won't be crossing
150 * cache-line read while reading from this buffer. This reduced the cpu
151 * cycles spent on reading ref data in sub-pixel filter functions.
152 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
153 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
154 * could reduce the area.
155 */
156
157 /* estimated cost of a motion vector (r,c) */
158 #define MVC(r, c) \
159 (mvcost ? \
160 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
161 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
162 error_per_bit + 4096) >> 13 : 0)
163
164
165 // convert motion vector component to offset for svf calc
sp(int x)166 static INLINE int sp(int x) {
167 return (x & 7) << 1;
168 }
169
pre(const uint8_t * buf,int stride,int r,int c)170 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
171 return &buf[(r >> 3) * stride + (c >> 3)];
172 }
173
174 /* checks if (r, c) has better score than previous best */
175 #define CHECK_BETTER(v, r, c) \
176 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
177 if (second_pred == NULL) \
178 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
179 src_stride, &sse); \
180 else \
181 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
182 z, src_stride, &sse, second_pred); \
183 if ((v = MVC(r, c) + thismse) < besterr) { \
184 besterr = v; \
185 br = r; \
186 bc = c; \
187 *distortion = thismse; \
188 *sse1 = sse; \
189 } \
190 } else { \
191 v = INT_MAX; \
192 }
193
194 #define FIRST_LEVEL_CHECKS \
195 { \
196 unsigned int left, right, up, down, diag; \
197 CHECK_BETTER(left, tr, tc - hstep); \
198 CHECK_BETTER(right, tr, tc + hstep); \
199 CHECK_BETTER(up, tr - hstep, tc); \
200 CHECK_BETTER(down, tr + hstep, tc); \
201 whichdir = (left < right ? 0 : 1) + \
202 (up < down ? 0 : 2); \
203 switch (whichdir) { \
204 case 0: \
205 CHECK_BETTER(diag, tr - hstep, tc - hstep); \
206 break; \
207 case 1: \
208 CHECK_BETTER(diag, tr - hstep, tc + hstep); \
209 break; \
210 case 2: \
211 CHECK_BETTER(diag, tr + hstep, tc - hstep); \
212 break; \
213 case 3: \
214 CHECK_BETTER(diag, tr + hstep, tc + hstep); \
215 break; \
216 } \
217 }
218
219 #define SECOND_LEVEL_CHECKS \
220 { \
221 int kr, kc; \
222 unsigned int second; \
223 if (tr != br && tc != bc) { \
224 kr = br - tr; \
225 kc = bc - tc; \
226 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
227 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
228 } else if (tr == br && tc != bc) { \
229 kc = bc - tc; \
230 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
231 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
232 switch (whichdir) { \
233 case 0: \
234 case 1: \
235 CHECK_BETTER(second, tr + hstep, tc + kc); \
236 break; \
237 case 2: \
238 case 3: \
239 CHECK_BETTER(second, tr - hstep, tc + kc); \
240 break; \
241 } \
242 } else if (tr != br && tc == bc) { \
243 kr = br - tr; \
244 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
245 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
246 switch (whichdir) { \
247 case 0: \
248 case 2: \
249 CHECK_BETTER(second, tr + kr, tc + hstep); \
250 break; \
251 case 1: \
252 case 3: \
253 CHECK_BETTER(second, tr + kr, tc - hstep); \
254 break; \
255 } \
256 } \
257 }
258
vp9_find_best_sub_pixel_tree(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,int w,int h)259 int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
260 MV *bestmv, const MV *ref_mv,
261 int allow_hp,
262 int error_per_bit,
263 const vp9_variance_fn_ptr_t *vfp,
264 int forced_stop,
265 int iters_per_step,
266 int *mvjcost, int *mvcost[2],
267 int *distortion,
268 unsigned int *sse1,
269 const uint8_t *second_pred,
270 int w, int h) {
271 const uint8_t *const z = x->plane[0].src.buf;
272 const int src_stride = x->plane[0].src.stride;
273 const MACROBLOCKD *xd = &x->e_mbd;
274 unsigned int besterr = INT_MAX;
275 unsigned int sse;
276 unsigned int whichdir;
277 int thismse;
278 const unsigned int halfiters = iters_per_step;
279 const unsigned int quarteriters = iters_per_step;
280 const unsigned int eighthiters = iters_per_step;
281
282 const int y_stride = xd->plane[0].pre[0].stride;
283 const int offset = bestmv->row * y_stride + bestmv->col;
284 const uint8_t *const y = xd->plane[0].pre[0].buf;
285
286 int rr = ref_mv->row;
287 int rc = ref_mv->col;
288 int br = bestmv->row * 8;
289 int bc = bestmv->col * 8;
290 int hstep = 4;
291 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
292 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
293 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
294 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
295
296 int tr = br;
297 int tc = bc;
298
299 // central mv
300 bestmv->row *= 8;
301 bestmv->col *= 8;
302
303 // calculate central point error
304 // TODO(yunqingwang): central pointer error was already calculated in full-
305 // pixel search, and can be passed in this function.
306 if (second_pred != NULL) {
307 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
308 vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
309 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
310 } else {
311 besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
312 }
313 *distortion = besterr;
314 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
315
316 // Each subsequent iteration checks at least one point in
317 // common with the last iteration could be 2 ( if diag selected)
318 // 1/2 pel
319 FIRST_LEVEL_CHECKS;
320 if (halfiters > 1) {
321 SECOND_LEVEL_CHECKS;
322 }
323 tr = br;
324 tc = bc;
325
326 // Each subsequent iteration checks at least one point in common with
327 // the last iteration could be 2 ( if diag selected) 1/4 pel
328
329 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
330 if (forced_stop != 2) {
331 hstep >>= 1;
332 FIRST_LEVEL_CHECKS;
333 if (quarteriters > 1) {
334 SECOND_LEVEL_CHECKS;
335 }
336 tr = br;
337 tc = bc;
338 }
339
340 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
341 hstep >>= 1;
342 FIRST_LEVEL_CHECKS;
343 if (eighthiters > 1) {
344 SECOND_LEVEL_CHECKS;
345 }
346 tr = br;
347 tc = bc;
348 }
349 // These lines insure static analysis doesn't warn that
350 // tr and tc aren't used after the above point.
351 (void) tr;
352 (void) tc;
353
354 bestmv->row = br;
355 bestmv->col = bc;
356
357 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
358 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
359 return INT_MAX;
360
361 return besterr;
362 }
363
364 #undef MVC
365 #undef PRE
366 #undef CHECK_BETTER
367
check_bounds(const MACROBLOCK * x,int row,int col,int range)368 static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
369 int range) {
370 return ((row - range) >= x->mv_row_min) &
371 ((row + range) <= x->mv_row_max) &
372 ((col - range) >= x->mv_col_min) &
373 ((col + range) <= x->mv_col_max);
374 }
375
is_mv_in(const MACROBLOCK * x,const MV * mv)376 static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
377 return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
378 (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
379 }
380
381 #define CHECK_BETTER \
382 {\
383 if (thissad < bestsad) {\
384 if (use_mvcost) \
385 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\
386 if (thissad < bestsad) {\
387 bestsad = thissad;\
388 best_site = i;\
389 }\
390 }\
391 }
392
393 #define MAX_PATTERN_SCALES 11
394 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
395 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
396
397 // Generic pattern search function that searches over multiple scales.
398 // Each scale can have a different number of candidates and shape of
399 // candidates as indicated in the num_candidates and candidates arrays
400 // passed into this function
vp9_pattern_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int do_refine,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv,const int num_candidates[MAX_PATTERN_SCALES],const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES])401 static int vp9_pattern_search(const MACROBLOCK *x,
402 MV *ref_mv,
403 int search_param,
404 int sad_per_bit,
405 int do_init_search, int do_refine,
406 const vp9_variance_fn_ptr_t *vfp,
407 int use_mvcost,
408 const MV *center_mv, MV *best_mv,
409 const int num_candidates[MAX_PATTERN_SCALES],
410 const MV candidates[MAX_PATTERN_SCALES]
411 [MAX_PATTERN_CANDIDATES]) {
412 const MACROBLOCKD *const xd = &x->e_mbd;
413 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
414 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
415 };
416 int i, j, s, t;
417 const struct buf_2d *const what = &x->plane[0].src;
418 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
419 int br, bc;
420 int bestsad = INT_MAX;
421 int thissad;
422 int k = -1;
423 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
424 int best_init_s = search_param_to_steps[search_param];
425 // adjust ref_mv to make sure it is within MV range
426 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
427 br = ref_mv->row;
428 bc = ref_mv->col;
429
430 // Work out the start point for the search
431 bestsad = vfp->sdf(what->buf, what->stride,
432 get_buf_from_mv(in_what, ref_mv), in_what->stride) +
433 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
434
435 // Search all possible scales upto the search param around the center point
436 // pick the scale of the point that is best as the starting scale of
437 // further steps around it.
438 if (do_init_search) {
439 s = best_init_s;
440 best_init_s = -1;
441 for (t = 0; t <= s; ++t) {
442 int best_site = -1;
443 if (check_bounds(x, br, bc, 1 << t)) {
444 for (i = 0; i < num_candidates[t]; i++) {
445 const MV this_mv = {br + candidates[t][i].row,
446 bc + candidates[t][i].col};
447 thissad = vfp->sdf(what->buf, what->stride,
448 get_buf_from_mv(in_what, &this_mv),
449 in_what->stride);
450 CHECK_BETTER
451 }
452 } else {
453 for (i = 0; i < num_candidates[t]; i++) {
454 const MV this_mv = {br + candidates[t][i].row,
455 bc + candidates[t][i].col};
456 if (!is_mv_in(x, &this_mv))
457 continue;
458 thissad = vfp->sdf(what->buf, what->stride,
459 get_buf_from_mv(in_what, &this_mv),
460 in_what->stride);
461 CHECK_BETTER
462 }
463 }
464 if (best_site == -1) {
465 continue;
466 } else {
467 best_init_s = t;
468 k = best_site;
469 }
470 }
471 if (best_init_s != -1) {
472 br += candidates[best_init_s][k].row;
473 bc += candidates[best_init_s][k].col;
474 }
475 }
476
477 // If the center point is still the best, just skip this and move to
478 // the refinement step.
479 if (best_init_s != -1) {
480 int best_site = -1;
481 s = best_init_s;
482
483 do {
484 // No need to search all 6 points the 1st time if initial search was used
485 if (!do_init_search || s != best_init_s) {
486 if (check_bounds(x, br, bc, 1 << s)) {
487 for (i = 0; i < num_candidates[s]; i++) {
488 const MV this_mv = {br + candidates[s][i].row,
489 bc + candidates[s][i].col};
490 thissad = vfp->sdf(what->buf, what->stride,
491 get_buf_from_mv(in_what, &this_mv),
492 in_what->stride);
493 CHECK_BETTER
494 }
495 } else {
496 for (i = 0; i < num_candidates[s]; i++) {
497 const MV this_mv = {br + candidates[s][i].row,
498 bc + candidates[s][i].col};
499 if (!is_mv_in(x, &this_mv))
500 continue;
501 thissad = vfp->sdf(what->buf, what->stride,
502 get_buf_from_mv(in_what, &this_mv),
503 in_what->stride);
504 CHECK_BETTER
505 }
506 }
507
508 if (best_site == -1) {
509 continue;
510 } else {
511 br += candidates[s][best_site].row;
512 bc += candidates[s][best_site].col;
513 k = best_site;
514 }
515 }
516
517 do {
518 int next_chkpts_indices[PATTERN_CANDIDATES_REF];
519 best_site = -1;
520 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
521 next_chkpts_indices[1] = k;
522 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
523
524 if (check_bounds(x, br, bc, 1 << s)) {
525 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
526 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
527 bc + candidates[s][next_chkpts_indices[i]].col};
528 thissad = vfp->sdf(what->buf, what->stride,
529 get_buf_from_mv(in_what, &this_mv),
530 in_what->stride);
531 CHECK_BETTER
532 }
533 } else {
534 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
535 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
536 bc + candidates[s][next_chkpts_indices[i]].col};
537 if (!is_mv_in(x, &this_mv))
538 continue;
539 thissad = vfp->sdf(what->buf, what->stride,
540 get_buf_from_mv(in_what, &this_mv),
541 in_what->stride);
542 CHECK_BETTER
543 }
544 }
545
546 if (best_site != -1) {
547 k = next_chkpts_indices[best_site];
548 br += candidates[s][k].row;
549 bc += candidates[s][k].col;
550 }
551 } while (best_site != -1);
552 } while (s--);
553 }
554
555 // Check 4 1-away neighbors if do_refine is true.
556 // For most well-designed schemes do_refine will not be necessary.
557 if (do_refine) {
558 static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
559
560 for (j = 0; j < 16; j++) {
561 int best_site = -1;
562 if (check_bounds(x, br, bc, 1)) {
563 for (i = 0; i < 4; i++) {
564 const MV this_mv = {br + neighbors[i].row,
565 bc + neighbors[i].col};
566 thissad = vfp->sdf(what->buf, what->stride,
567 get_buf_from_mv(in_what, &this_mv),
568 in_what->stride);
569 CHECK_BETTER
570 }
571 } else {
572 for (i = 0; i < 4; i++) {
573 const MV this_mv = {br + neighbors[i].row,
574 bc + neighbors[i].col};
575 if (!is_mv_in(x, &this_mv))
576 continue;
577 thissad = vfp->sdf(what->buf, what->stride,
578 get_buf_from_mv(in_what, &this_mv),
579 in_what->stride);
580 CHECK_BETTER
581 }
582 }
583
584 if (best_site == -1) {
585 break;
586 } else {
587 br += neighbors[best_site].row;
588 bc += neighbors[best_site].col;
589 }
590 }
591 }
592
593 best_mv->row = br;
594 best_mv->col = bc;
595
596 return bestsad;
597 }
598
vp9_get_mvpred_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const vp9_variance_fn_ptr_t * vfp,int use_mvcost)599 int vp9_get_mvpred_var(const MACROBLOCK *x,
600 const MV *best_mv, const MV *center_mv,
601 const vp9_variance_fn_ptr_t *vfp,
602 int use_mvcost) {
603 const MACROBLOCKD *const xd = &x->e_mbd;
604 const struct buf_2d *const what = &x->plane[0].src;
605 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
606 const MV mv = {best_mv->row * 8, best_mv->col * 8};
607 unsigned int unused;
608
609 return vfp->vf(what->buf, what->stride,
610 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
611 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
612 x->mvcost, x->errorperbit) : 0);
613 }
614
vp9_get_mvpred_av_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const uint8_t * second_pred,const vp9_variance_fn_ptr_t * vfp,int use_mvcost)615 int vp9_get_mvpred_av_var(const MACROBLOCK *x,
616 const MV *best_mv, const MV *center_mv,
617 const uint8_t *second_pred,
618 const vp9_variance_fn_ptr_t *vfp,
619 int use_mvcost) {
620 const MACROBLOCKD *const xd = &x->e_mbd;
621 const struct buf_2d *const what = &x->plane[0].src;
622 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
623 const MV mv = {best_mv->row * 8, best_mv->col * 8};
624 unsigned int unused;
625
626 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
627 what->buf, what->stride, &unused, second_pred) +
628 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
629 x->mvcost, x->errorperbit) : 0);
630 }
631
vp9_hex_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)632 int vp9_hex_search(const MACROBLOCK *x,
633 MV *ref_mv,
634 int search_param,
635 int sad_per_bit,
636 int do_init_search,
637 const vp9_variance_fn_ptr_t *vfp,
638 int use_mvcost,
639 const MV *center_mv, MV *best_mv) {
640 // First scale has 8-closest points, the rest have 6 points in hex shape
641 // at increasing scales
642 static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
643 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
644 };
645 // Note that the largest candidate step at each scale is 2^scale
646 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
647 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
648 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
649 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
650 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
651 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
652 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
653 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
654 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
655 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
656 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
657 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
658 { -1024, 0}},
659 };
660 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
661 do_init_search, 0, vfp, use_mvcost,
662 center_mv, best_mv,
663 hex_num_candidates, hex_candidates);
664 }
665
vp9_bigdia_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)666 int vp9_bigdia_search(const MACROBLOCK *x,
667 MV *ref_mv,
668 int search_param,
669 int sad_per_bit,
670 int do_init_search,
671 const vp9_variance_fn_ptr_t *vfp,
672 int use_mvcost,
673 const MV *center_mv,
674 MV *best_mv) {
675 // First scale has 4-closest points, the rest have 8 points in diamond
676 // shape at increasing scales
677 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
678 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
679 };
680 // Note that the largest candidate step at each scale is 2^scale
681 static const MV bigdia_candidates[MAX_PATTERN_SCALES]
682 [MAX_PATTERN_CANDIDATES] = {
683 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
684 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
685 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
686 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
687 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
688 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
689 {-16, 16}, {-32, 0}},
690 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
691 {-32, 32}, {-64, 0}},
692 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
693 {-64, 64}, {-128, 0}},
694 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
695 {-128, 128}, {-256, 0}},
696 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
697 {-256, 256}, {-512, 0}},
698 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
699 {-512, 512}, {-1024, 0}},
700 };
701 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
702 do_init_search, 0, vfp, use_mvcost,
703 center_mv, best_mv,
704 bigdia_num_candidates, bigdia_candidates);
705 }
706
vp9_square_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)707 int vp9_square_search(const MACROBLOCK *x,
708 MV *ref_mv,
709 int search_param,
710 int sad_per_bit,
711 int do_init_search,
712 const vp9_variance_fn_ptr_t *vfp,
713 int use_mvcost,
714 const MV *center_mv,
715 MV *best_mv) {
716 // All scales have 8 closest points in square shape
717 static const int square_num_candidates[MAX_PATTERN_SCALES] = {
718 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
719 };
720 // Note that the largest candidate step at each scale is 2^scale
721 static const MV square_candidates[MAX_PATTERN_SCALES]
722 [MAX_PATTERN_CANDIDATES] = {
723 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
724 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
725 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
726 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
727 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
728 {-16, 16}, {-16, 0}},
729 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
730 {-32, 32}, {-32, 0}},
731 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
732 {-64, 64}, {-64, 0}},
733 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
734 {-128, 128}, {-128, 0}},
735 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
736 {-256, 256}, {-256, 0}},
737 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
738 {-512, 512}, {-512, 0}},
739 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
740 {0, 1024}, {-1024, 1024}, {-1024, 0}},
741 };
742 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
743 do_init_search, 0, vfp, use_mvcost,
744 center_mv, best_mv,
745 square_num_candidates, square_candidates);
746 }
747
vp9_fast_hex_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)748 int vp9_fast_hex_search(const MACROBLOCK *x,
749 MV *ref_mv,
750 int search_param,
751 int sad_per_bit,
752 int do_init_search, // must be zero for fast_hex
753 const vp9_variance_fn_ptr_t *vfp,
754 int use_mvcost,
755 const MV *center_mv,
756 MV *best_mv) {
757 return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
758 sad_per_bit, do_init_search, vfp, use_mvcost,
759 center_mv, best_mv);
760 }
761
vp9_fast_dia_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)762 int vp9_fast_dia_search(const MACROBLOCK *x,
763 MV *ref_mv,
764 int search_param,
765 int sad_per_bit,
766 int do_init_search,
767 const vp9_variance_fn_ptr_t *vfp,
768 int use_mvcost,
769 const MV *center_mv,
770 MV *best_mv) {
771 return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
772 sad_per_bit, do_init_search, vfp, use_mvcost,
773 center_mv, best_mv);
774 }
775
776 #undef CHECK_BETTER
777
vp9_full_range_search_c(const MACROBLOCK * x,const search_site_config * cfg,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)778 int vp9_full_range_search_c(const MACROBLOCK *x,
779 const search_site_config *cfg,
780 MV *ref_mv, MV *best_mv,
781 int search_param, int sad_per_bit, int *num00,
782 const vp9_variance_fn_ptr_t *fn_ptr,
783 const MV *center_mv) {
784 const MACROBLOCKD *const xd = &x->e_mbd;
785 const struct buf_2d *const what = &x->plane[0].src;
786 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
787 const int range = 64;
788 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
789 unsigned int best_sad = INT_MAX;
790 int r, c, i;
791 int start_col, end_col, start_row, end_row;
792
793 // The cfg and search_param parameters are not used in this search variant
794 (void)cfg;
795 (void)search_param;
796
797 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
798 *best_mv = *ref_mv;
799 *num00 = 11;
800 best_sad = fn_ptr->sdf(what->buf, what->stride,
801 get_buf_from_mv(in_what, ref_mv), in_what->stride) +
802 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
803 start_row = MAX(-range, x->mv_row_min - ref_mv->row);
804 start_col = MAX(-range, x->mv_col_min - ref_mv->col);
805 end_row = MIN(range, x->mv_row_max - ref_mv->row);
806 end_col = MIN(range, x->mv_col_max - ref_mv->col);
807
808 for (r = start_row; r <= end_row; ++r) {
809 for (c = start_col; c <= end_col; c += 4) {
810 if (c + 3 <= end_col) {
811 unsigned int sads[4];
812 const uint8_t *addrs[4];
813 for (i = 0; i < 4; ++i) {
814 const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
815 addrs[i] = get_buf_from_mv(in_what, &mv);
816 }
817
818 fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
819
820 for (i = 0; i < 4; ++i) {
821 if (sads[i] < best_sad) {
822 const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
823 const unsigned int sad = sads[i] +
824 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
825 if (sad < best_sad) {
826 best_sad = sad;
827 *best_mv = mv;
828 }
829 }
830 }
831 } else {
832 for (i = 0; i < end_col - c; ++i) {
833 const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
834 unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
835 get_buf_from_mv(in_what, &mv), in_what->stride);
836 if (sad < best_sad) {
837 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
838 if (sad < best_sad) {
839 best_sad = sad;
840 *best_mv = mv;
841 }
842 }
843 }
844 }
845 }
846 }
847
848 return best_sad;
849 }
850
vp9_diamond_search_sad_c(const MACROBLOCK * x,const search_site_config * cfg,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)851 int vp9_diamond_search_sad_c(const MACROBLOCK *x,
852 const search_site_config *cfg,
853 MV *ref_mv, MV *best_mv, int search_param,
854 int sad_per_bit, int *num00,
855 const vp9_variance_fn_ptr_t *fn_ptr,
856 const MV *center_mv) {
857 int i, j, step;
858
859 const MACROBLOCKD *const xd = &x->e_mbd;
860 uint8_t *what = x->plane[0].src.buf;
861 const int what_stride = x->plane[0].src.stride;
862 const uint8_t *in_what;
863 const int in_what_stride = xd->plane[0].pre[0].stride;
864 const uint8_t *best_address;
865
866 unsigned int bestsad = INT_MAX;
867 int best_site = 0;
868 int last_site = 0;
869
870 int ref_row;
871 int ref_col;
872
873 // search_param determines the length of the initial step and hence the number
874 // of iterations.
875 // 0 = initial step (MAX_FIRST_STEP) pel
876 // 1 = (MAX_FIRST_STEP/2) pel,
877 // 2 = (MAX_FIRST_STEP/4) pel...
878 const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
879 const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
880
881 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
882 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
883 ref_row = ref_mv->row;
884 ref_col = ref_mv->col;
885 *num00 = 0;
886 best_mv->row = ref_row;
887 best_mv->col = ref_col;
888
889 // Work out the start point for the search
890 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
891 best_address = in_what;
892
893 // Check the starting position
894 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
895 + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
896
897 i = 1;
898
899 for (step = 0; step < tot_steps; step++) {
900 int all_in = 1, t;
901
902 // All_in is true if every one of the points we are checking are within
903 // the bounds of the image.
904 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
905 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
906 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
907 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
908
909 // If all the pixels are within the bounds we don't check whether the
910 // search point is valid in this loop, otherwise we check each point
911 // for validity..
912 if (all_in) {
913 unsigned int sad_array[4];
914
915 for (j = 0; j < cfg->searches_per_step; j += 4) {
916 unsigned char const *block_offset[4];
917
918 for (t = 0; t < 4; t++)
919 block_offset[t] = ss[i + t].offset + best_address;
920
921 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
922 sad_array);
923
924 for (t = 0; t < 4; t++, i++) {
925 if (sad_array[t] < bestsad) {
926 const MV this_mv = {best_mv->row + ss[i].mv.row,
927 best_mv->col + ss[i].mv.col};
928 sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
929 sad_per_bit);
930 if (sad_array[t] < bestsad) {
931 bestsad = sad_array[t];
932 best_site = i;
933 }
934 }
935 }
936 }
937 } else {
938 for (j = 0; j < cfg->searches_per_step; j++) {
939 // Trap illegal vectors
940 const MV this_mv = {best_mv->row + ss[i].mv.row,
941 best_mv->col + ss[i].mv.col};
942
943 if (is_mv_in(x, &this_mv)) {
944 const uint8_t *const check_here = ss[i].offset + best_address;
945 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
946 in_what_stride);
947
948 if (thissad < bestsad) {
949 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
950 if (thissad < bestsad) {
951 bestsad = thissad;
952 best_site = i;
953 }
954 }
955 }
956 i++;
957 }
958 }
959 if (best_site != last_site) {
960 best_mv->row += ss[best_site].mv.row;
961 best_mv->col += ss[best_site].mv.col;
962 best_address += ss[best_site].offset;
963 last_site = best_site;
964 #if defined(NEW_DIAMOND_SEARCH)
965 while (1) {
966 const MV this_mv = {best_mv->row + ss[best_site].mv.row,
967 best_mv->col + ss[best_site].mv.col};
968 if (is_mv_in(x, &this_mv)) {
969 const uint8_t *const check_here = ss[best_site].offset + best_address;
970 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
971 in_what_stride);
972 if (thissad < bestsad) {
973 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
974 if (thissad < bestsad) {
975 bestsad = thissad;
976 best_mv->row += ss[best_site].mv.row;
977 best_mv->col += ss[best_site].mv.col;
978 best_address += ss[best_site].offset;
979 continue;
980 }
981 }
982 }
983 break;
984 };
985 #endif
986 } else if (best_address == in_what) {
987 (*num00)++;
988 }
989 }
990 return bestsad;
991 }
992
993 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
994 point as the best match, we will do a final 1-away diamond
995 refining search */
996
vp9_full_pixel_diamond(const VP9_COMP * cpi,MACROBLOCK * x,MV * mvp_full,int step_param,int sadpb,int further_steps,int do_refine,const vp9_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,MV * dst_mv)997 int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
998 MV *mvp_full, int step_param,
999 int sadpb, int further_steps, int do_refine,
1000 const vp9_variance_fn_ptr_t *fn_ptr,
1001 const MV *ref_mv, MV *dst_mv) {
1002 MV temp_mv;
1003 int thissme, n, num00 = 0;
1004 int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1005 step_param, sadpb, &n,
1006 fn_ptr, ref_mv);
1007 if (bestsme < INT_MAX)
1008 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1009 *dst_mv = temp_mv;
1010
1011 // If there won't be more n-step search, check to see if refining search is
1012 // needed.
1013 if (n > further_steps)
1014 do_refine = 0;
1015
1016 while (n < further_steps) {
1017 ++n;
1018
1019 if (num00) {
1020 num00--;
1021 } else {
1022 thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1023 step_param + n, sadpb, &num00,
1024 fn_ptr, ref_mv);
1025 if (thissme < INT_MAX)
1026 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1027
1028 // check to see if refining search is needed.
1029 if (num00 > further_steps - n)
1030 do_refine = 0;
1031
1032 if (thissme < bestsme) {
1033 bestsme = thissme;
1034 *dst_mv = temp_mv;
1035 }
1036 }
1037 }
1038
1039 // final 1-away diamond refining search
1040 if (do_refine) {
1041 const int search_range = 8;
1042 MV best_mv = *dst_mv;
1043 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
1044 fn_ptr, ref_mv);
1045 if (thissme < INT_MAX)
1046 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
1047 if (thissme < bestsme) {
1048 bestsme = thissme;
1049 *dst_mv = best_mv;
1050 }
1051 }
1052 return bestsme;
1053 }
1054
vp9_full_search_sad_c(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,MV * best_mv)1055 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
1056 int sad_per_bit, int distance,
1057 const vp9_variance_fn_ptr_t *fn_ptr,
1058 const MV *center_mv, MV *best_mv) {
1059 int r, c;
1060 const MACROBLOCKD *const xd = &x->e_mbd;
1061 const struct buf_2d *const what = &x->plane[0].src;
1062 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1063 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1064 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1065 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1066 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1067 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1068 int best_sad = fn_ptr->sdf(what->buf, what->stride,
1069 get_buf_from_mv(in_what, ref_mv), in_what->stride) +
1070 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1071 *best_mv = *ref_mv;
1072
1073 for (r = row_min; r < row_max; ++r) {
1074 for (c = col_min; c < col_max; ++c) {
1075 const MV mv = {r, c};
1076 const int sad = fn_ptr->sdf(what->buf, what->stride,
1077 get_buf_from_mv(in_what, &mv), in_what->stride) +
1078 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1079 if (sad < best_sad) {
1080 best_sad = sad;
1081 *best_mv = mv;
1082 }
1083 }
1084 }
1085 return best_sad;
1086 }
1087
vp9_full_search_sadx3(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,MV * best_mv)1088 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
1089 int sad_per_bit, int distance,
1090 const vp9_variance_fn_ptr_t *fn_ptr,
1091 const MV *center_mv, MV *best_mv) {
1092 int r;
1093 const MACROBLOCKD *const xd = &x->e_mbd;
1094 const struct buf_2d *const what = &x->plane[0].src;
1095 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1096 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1097 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1098 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1099 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1100 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1101 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
1102 get_buf_from_mv(in_what, ref_mv), in_what->stride) +
1103 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1104 *best_mv = *ref_mv;
1105
1106 for (r = row_min; r < row_max; ++r) {
1107 int c = col_min;
1108 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
1109
1110 if (fn_ptr->sdx3f != NULL) {
1111 while ((c + 2) < col_max) {
1112 int i;
1113 unsigned int sads[3];
1114
1115 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
1116 sads);
1117
1118 for (i = 0; i < 3; ++i) {
1119 unsigned int sad = sads[i];
1120 if (sad < best_sad) {
1121 const MV mv = {r, c};
1122 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1123 if (sad < best_sad) {
1124 best_sad = sad;
1125 *best_mv = mv;
1126 }
1127 }
1128 ++check_here;
1129 ++c;
1130 }
1131 }
1132 }
1133
1134 while (c < col_max) {
1135 unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1136 check_here, in_what->stride);
1137 if (sad < best_sad) {
1138 const MV mv = {r, c};
1139 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1140 if (sad < best_sad) {
1141 best_sad = sad;
1142 *best_mv = mv;
1143 }
1144 }
1145 ++check_here;
1146 ++c;
1147 }
1148 }
1149
1150 return best_sad;
1151 }
1152
vp9_full_search_sadx8(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,MV * best_mv)1153 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
1154 int sad_per_bit, int distance,
1155 const vp9_variance_fn_ptr_t *fn_ptr,
1156 const MV *center_mv, MV *best_mv) {
1157 int r;
1158 const MACROBLOCKD *const xd = &x->e_mbd;
1159 const struct buf_2d *const what = &x->plane[0].src;
1160 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1161 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1162 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1163 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1164 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1165 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1166 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
1167 get_buf_from_mv(in_what, ref_mv), in_what->stride) +
1168 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1169 *best_mv = *ref_mv;
1170
1171 for (r = row_min; r < row_max; ++r) {
1172 int c = col_min;
1173 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
1174
1175 if (fn_ptr->sdx8f != NULL) {
1176 while ((c + 7) < col_max) {
1177 int i;
1178 unsigned int sads[8];
1179
1180 fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
1181 sads);
1182
1183 for (i = 0; i < 8; ++i) {
1184 unsigned int sad = sads[i];
1185 if (sad < best_sad) {
1186 const MV mv = {r, c};
1187 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1188 if (sad < best_sad) {
1189 best_sad = sad;
1190 *best_mv = mv;
1191 }
1192 }
1193 ++check_here;
1194 ++c;
1195 }
1196 }
1197 }
1198
1199 if (fn_ptr->sdx3f != NULL) {
1200 while ((c + 2) < col_max) {
1201 int i;
1202 unsigned int sads[3];
1203
1204 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
1205 sads);
1206
1207 for (i = 0; i < 3; ++i) {
1208 unsigned int sad = sads[i];
1209 if (sad < best_sad) {
1210 const MV mv = {r, c};
1211 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1212 if (sad < best_sad) {
1213 best_sad = sad;
1214 *best_mv = mv;
1215 }
1216 }
1217 ++check_here;
1218 ++c;
1219 }
1220 }
1221 }
1222
1223 while (c < col_max) {
1224 unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1225 check_here, in_what->stride);
1226 if (sad < best_sad) {
1227 const MV mv = {r, c};
1228 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1229 if (sad < best_sad) {
1230 best_sad = sad;
1231 *best_mv = mv;
1232 }
1233 }
1234 ++check_here;
1235 ++c;
1236 }
1237 }
1238
1239 return best_sad;
1240 }
1241
vp9_refining_search_sad_c(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)1242 int vp9_refining_search_sad_c(const MACROBLOCK *x,
1243 MV *ref_mv, int error_per_bit,
1244 int search_range,
1245 const vp9_variance_fn_ptr_t *fn_ptr,
1246 const MV *center_mv) {
1247 const MACROBLOCKD *const xd = &x->e_mbd;
1248 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1249 const struct buf_2d *const what = &x->plane[0].src;
1250 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1251 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1252 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
1253 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
1254 in_what->stride) +
1255 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
1256 int i, j;
1257
1258 for (i = 0; i < search_range; i++) {
1259 int best_site = -1;
1260 const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
1261 ((ref_mv->row + 1) < x->mv_row_max) &
1262 ((ref_mv->col - 1) > x->mv_col_min) &
1263 ((ref_mv->col + 1) < x->mv_col_max);
1264
1265 if (all_in) {
1266 unsigned int sads[4];
1267 const uint8_t *const positions[4] = {
1268 best_address - in_what->stride,
1269 best_address - 1,
1270 best_address + 1,
1271 best_address + in_what->stride
1272 };
1273
1274 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
1275
1276 for (j = 0; j < 4; ++j) {
1277 if (sads[j] < best_sad) {
1278 const MV mv = {ref_mv->row + neighbors[j].row,
1279 ref_mv->col + neighbors[j].col};
1280 sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
1281 if (sads[j] < best_sad) {
1282 best_sad = sads[j];
1283 best_site = j;
1284 }
1285 }
1286 }
1287 } else {
1288 for (j = 0; j < 4; ++j) {
1289 const MV mv = {ref_mv->row + neighbors[j].row,
1290 ref_mv->col + neighbors[j].col};
1291
1292 if (is_mv_in(x, &mv)) {
1293 unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1294 get_buf_from_mv(in_what, &mv),
1295 in_what->stride);
1296 if (sad < best_sad) {
1297 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
1298 if (sad < best_sad) {
1299 best_sad = sad;
1300 best_site = j;
1301 }
1302 }
1303 }
1304 }
1305 }
1306
1307 if (best_site == -1) {
1308 break;
1309 } else {
1310 ref_mv->row += neighbors[best_site].row;
1311 ref_mv->col += neighbors[best_site].col;
1312 best_address = get_buf_from_mv(in_what, ref_mv);
1313 }
1314 }
1315
1316 return best_sad;
1317 }
1318
1319 // This function is called when we do joint motion search in comp_inter_inter
1320 // mode.
vp9_refining_search_8p_c(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,const uint8_t * second_pred)1321 int vp9_refining_search_8p_c(const MACROBLOCK *x,
1322 MV *ref_mv, int error_per_bit,
1323 int search_range,
1324 const vp9_variance_fn_ptr_t *fn_ptr,
1325 const MV *center_mv,
1326 const uint8_t *second_pred) {
1327 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
1328 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
1329 const MACROBLOCKD *const xd = &x->e_mbd;
1330 const struct buf_2d *const what = &x->plane[0].src;
1331 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1332 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1333 unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
1334 get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) +
1335 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
1336 int i, j;
1337
1338 for (i = 0; i < search_range; ++i) {
1339 int best_site = -1;
1340
1341 for (j = 0; j < 8; ++j) {
1342 const MV mv = {ref_mv->row + neighbors[j].row,
1343 ref_mv->col + neighbors[j].col};
1344
1345 if (is_mv_in(x, &mv)) {
1346 unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
1347 get_buf_from_mv(in_what, &mv), in_what->stride, second_pred);
1348 if (sad < best_sad) {
1349 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
1350 if (sad < best_sad) {
1351 best_sad = sad;
1352 best_site = j;
1353 }
1354 }
1355 }
1356 }
1357
1358 if (best_site == -1) {
1359 break;
1360 } else {
1361 ref_mv->row += neighbors[best_site].row;
1362 ref_mv->col += neighbors[best_site].col;
1363 }
1364 }
1365 return best_sad;
1366 }
1367
vp9_full_pixel_search(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,MV * mvp_full,int step_param,int error_per_bit,const MV * ref_mv,MV * tmp_mv,int var_max,int rd)1368 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
1369 BLOCK_SIZE bsize, MV *mvp_full,
1370 int step_param, int error_per_bit,
1371 const MV *ref_mv, MV *tmp_mv,
1372 int var_max, int rd) {
1373 const SPEED_FEATURES *const sf = &cpi->sf;
1374 const SEARCH_METHODS method = sf->mv.search_method;
1375 vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
1376 int var = 0;
1377
1378 switch (method) {
1379 case FAST_DIAMOND:
1380 var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
1381 fn_ptr, 1, ref_mv, tmp_mv);
1382 break;
1383 case FAST_HEX:
1384 var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
1385 fn_ptr, 1, ref_mv, tmp_mv);
1386 break;
1387 case HEX:
1388 var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
1389 fn_ptr, 1, ref_mv, tmp_mv);
1390 break;
1391 case SQUARE:
1392 var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
1393 fn_ptr, 1, ref_mv, tmp_mv);
1394 break;
1395 case BIGDIA:
1396 var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
1397 fn_ptr, 1, ref_mv, tmp_mv);
1398 break;
1399 case NSTEP:
1400 var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
1401 MAX_MVSEARCH_STEPS - 1 - step_param,
1402 1, fn_ptr, ref_mv, tmp_mv);
1403 break;
1404 default:
1405 assert(!"Invalid search method.");
1406 }
1407
1408 if (method != NSTEP && rd && var < var_max)
1409 var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
1410
1411 return var;
1412 }
1413