1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
13 #include "onyx_int.h"
14 #include "mcomp.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
17 #include <stdio.h>
18 #include <limits.h>
19 #include <math.h>
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
23
24 #ifdef VP8_ENTROPY_STATS
25 static int mv_ref_ct[31][4][2];
26 static int mv_mode_cts[4][2];
27 #endif
28
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)29 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
30 /* MV costing is based on the distribution of vectors in the previous
31 * frame and as such will tend to over state the cost of vectors. In
32 * addition coding a new vector can have a knock on effect on the cost
33 * of subsequent vectors and the quality of prediction from NEAR and
34 * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
35 * limited extent, for some account to be taken of these factors.
36 */
37 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
38 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
39 Weight) >>
40 7;
41 }
42
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)43 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
44 int error_per_bit) {
45 /* Ignore mv costing if mvcost is NULL */
46 if (mvcost) {
47 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
48 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
49 error_per_bit +
50 128) >>
51 8;
52 }
53 return 0;
54 }
55
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)56 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
57 int error_per_bit) {
58 /* Calculate sad error cost on full pixel basis. */
59 /* Ignore mv costing if mvsadcost is NULL */
60 if (mvsadcost) {
61 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
62 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
63 error_per_bit +
64 128) >>
65 8;
66 }
67 return 0;
68 }
69
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)70 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
71 int Len;
72 int search_site_count = 0;
73
74 /* Generate offsets for 4 search sites per step. */
75 Len = MAX_FIRST_STEP;
76 x->ss[search_site_count].mv.col = 0;
77 x->ss[search_site_count].mv.row = 0;
78 x->ss[search_site_count].offset = 0;
79 search_site_count++;
80
81 while (Len > 0) {
82 /* Compute offsets for search sites. */
83 x->ss[search_site_count].mv.col = 0;
84 x->ss[search_site_count].mv.row = -Len;
85 x->ss[search_site_count].offset = -Len * stride;
86 search_site_count++;
87
88 /* Compute offsets for search sites. */
89 x->ss[search_site_count].mv.col = 0;
90 x->ss[search_site_count].mv.row = Len;
91 x->ss[search_site_count].offset = Len * stride;
92 search_site_count++;
93
94 /* Compute offsets for search sites. */
95 x->ss[search_site_count].mv.col = -Len;
96 x->ss[search_site_count].mv.row = 0;
97 x->ss[search_site_count].offset = -Len;
98 search_site_count++;
99
100 /* Compute offsets for search sites. */
101 x->ss[search_site_count].mv.col = Len;
102 x->ss[search_site_count].mv.row = 0;
103 x->ss[search_site_count].offset = Len;
104 search_site_count++;
105
106 /* Contract. */
107 Len /= 2;
108 }
109
110 x->ss_count = search_site_count;
111 x->searches_per_step = 4;
112 }
113
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)114 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
115 int Len;
116 int search_site_count = 0;
117
118 /* Generate offsets for 8 search sites per step. */
119 Len = MAX_FIRST_STEP;
120 x->ss[search_site_count].mv.col = 0;
121 x->ss[search_site_count].mv.row = 0;
122 x->ss[search_site_count].offset = 0;
123 search_site_count++;
124
125 while (Len > 0) {
126 /* Compute offsets for search sites. */
127 x->ss[search_site_count].mv.col = 0;
128 x->ss[search_site_count].mv.row = -Len;
129 x->ss[search_site_count].offset = -Len * stride;
130 search_site_count++;
131
132 /* Compute offsets for search sites. */
133 x->ss[search_site_count].mv.col = 0;
134 x->ss[search_site_count].mv.row = Len;
135 x->ss[search_site_count].offset = Len * stride;
136 search_site_count++;
137
138 /* Compute offsets for search sites. */
139 x->ss[search_site_count].mv.col = -Len;
140 x->ss[search_site_count].mv.row = 0;
141 x->ss[search_site_count].offset = -Len;
142 search_site_count++;
143
144 /* Compute offsets for search sites. */
145 x->ss[search_site_count].mv.col = Len;
146 x->ss[search_site_count].mv.row = 0;
147 x->ss[search_site_count].offset = Len;
148 search_site_count++;
149
150 /* Compute offsets for search sites. */
151 x->ss[search_site_count].mv.col = -Len;
152 x->ss[search_site_count].mv.row = -Len;
153 x->ss[search_site_count].offset = -Len * stride - Len;
154 search_site_count++;
155
156 /* Compute offsets for search sites. */
157 x->ss[search_site_count].mv.col = Len;
158 x->ss[search_site_count].mv.row = -Len;
159 x->ss[search_site_count].offset = -Len * stride + Len;
160 search_site_count++;
161
162 /* Compute offsets for search sites. */
163 x->ss[search_site_count].mv.col = -Len;
164 x->ss[search_site_count].mv.row = Len;
165 x->ss[search_site_count].offset = Len * stride - Len;
166 search_site_count++;
167
168 /* Compute offsets for search sites. */
169 x->ss[search_site_count].mv.col = Len;
170 x->ss[search_site_count].mv.row = Len;
171 x->ss[search_site_count].offset = Len * stride + Len;
172 search_site_count++;
173
174 /* Contract. */
175 Len /= 2;
176 }
177
178 x->ss_count = search_site_count;
179 x->searches_per_step = 8;
180 }
181
182 /*
183 * To avoid the penalty for crossing cache-line read, preload the reference
184 * area in a small buffer, which is aligned to make sure there won't be crossing
185 * cache-line read while reading from this buffer. This reduced the cpu
186 * cycles spent on reading ref data in sub-pixel filter functions.
187 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
188 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
189 * could reduce the area.
190 */
191
192 /* estimated cost of a motion vector (r,c) */
193 #define MVC(r, c) \
194 (mvcost \
195 ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
196 : 0)
197 /* pointer to predictor base of a motionvector */
198 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
199 /* convert motion vector component to offset for svf calc */
200 #define SP(x) (((x)&3) << 1)
201 /* returns subpixel variance error function. */
202 #define DIST(r, c) \
203 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
204 #define IFMVCV(r, c, s, e) \
205 if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
206 /* returns distortion + motion vector cost */
207 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
208 /* checks if (r,c) has better score than previous best */
209 #define CHECK_BETTER(v, r, c) \
210 IFMVCV(r, c, \
211 { \
212 thismse = DIST(r, c); \
213 if ((v = (MVC(r, c) + thismse)) < besterr) { \
214 besterr = v; \
215 br = r; \
216 bc = c; \
217 *distortion = thismse; \
218 *sse1 = sse; \
219 } \
220 }, \
221 v = UINT_MAX;)
222
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)223 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
224 int_mv *bestmv, int_mv *ref_mv,
225 int error_per_bit,
226 const vp8_variance_fn_ptr_t *vfp,
227 int *mvcost[2], int *distortion,
228 unsigned int *sse1) {
229 unsigned char *z = (*(b->base_src) + b->src);
230
231 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
232 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
233 int tr = br, tc = bc;
234 unsigned int besterr;
235 unsigned int left, right, up, down, diag;
236 unsigned int sse;
237 unsigned int whichdir;
238 unsigned int halfiters = 4;
239 unsigned int quarteriters = 4;
240 int thismse;
241
242 int minc = VPXMAX(x->mv_col_min * 4,
243 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
244 int maxc = VPXMIN(x->mv_col_max * 4,
245 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
246 int minr = VPXMAX(x->mv_row_min * 4,
247 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
248 int maxr = VPXMIN(x->mv_row_max * 4,
249 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
250
251 int y_stride;
252 int offset;
253 int pre_stride = x->e_mbd.pre.y_stride;
254 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
255
256 #if ARCH_X86 || ARCH_X86_64
257 MACROBLOCKD *xd = &x->e_mbd;
258 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
259 bestmv->as_mv.col;
260 unsigned char *y;
261 int buf_r1, buf_r2, buf_c1;
262
263 /* Clamping to avoid out-of-range data access */
264 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
265 ? (bestmv->as_mv.row - x->mv_row_min)
266 : 3;
267 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
268 ? (x->mv_row_max - bestmv->as_mv.row)
269 : 3;
270 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
271 ? (bestmv->as_mv.col - x->mv_col_min)
272 : 3;
273 y_stride = 32;
274
275 /* Copy to intermediate buffer before searching. */
276 vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
277 y_stride, 16 + buf_r1 + buf_r2);
278 y = xd->y_buf + y_stride * buf_r1 + buf_c1;
279 #else
280 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
281 bestmv->as_mv.col;
282 y_stride = pre_stride;
283 #endif
284
285 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
286
287 /* central mv */
288 bestmv->as_mv.row *= 8;
289 bestmv->as_mv.col *= 8;
290
291 /* calculate central point error */
292 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
293 *distortion = besterr;
294 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
295
296 /* TODO: Each subsequent iteration checks at least one point in common
297 * with the last iteration could be 2 ( if diag selected)
298 */
299 while (--halfiters) {
300 /* 1/2 pel */
301 CHECK_BETTER(left, tr, tc - 2);
302 CHECK_BETTER(right, tr, tc + 2);
303 CHECK_BETTER(up, tr - 2, tc);
304 CHECK_BETTER(down, tr + 2, tc);
305
306 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
307
308 switch (whichdir) {
309 case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
310 case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
311 case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
312 case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
313 }
314
315 /* no reason to check the same one again. */
316 if (tr == br && tc == bc) break;
317
318 tr = br;
319 tc = bc;
320 }
321
322 /* TODO: Each subsequent iteration checks at least one point in common
323 * with the last iteration could be 2 ( if diag selected)
324 */
325
326 /* 1/4 pel */
327 while (--quarteriters) {
328 CHECK_BETTER(left, tr, tc - 1);
329 CHECK_BETTER(right, tr, tc + 1);
330 CHECK_BETTER(up, tr - 1, tc);
331 CHECK_BETTER(down, tr + 1, tc);
332
333 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
334
335 switch (whichdir) {
336 case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
337 case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
338 case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
339 case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
340 }
341
342 /* no reason to check the same one again. */
343 if (tr == br && tc == bc) break;
344
345 tr = br;
346 tc = bc;
347 }
348
349 bestmv->as_mv.row = br * 2;
350 bestmv->as_mv.col = bc * 2;
351
352 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
353 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
354 return INT_MAX;
355 }
356
357 return besterr;
358 }
359 #undef MVC
360 #undef PRE
361 #undef SP
362 #undef DIST
363 #undef IFMVCV
364 #undef ERR
365 #undef CHECK_BETTER
366
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)367 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
368 int_mv *bestmv, int_mv *ref_mv,
369 int error_per_bit,
370 const vp8_variance_fn_ptr_t *vfp,
371 int *mvcost[2], int *distortion,
372 unsigned int *sse1) {
373 int bestmse = INT_MAX;
374 int_mv startmv;
375 int_mv this_mv;
376 unsigned char *z = (*(b->base_src) + b->src);
377 int left, right, up, down, diag;
378 unsigned int sse;
379 int whichdir;
380 int thismse;
381 int y_stride;
382 int pre_stride = x->e_mbd.pre.y_stride;
383 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
384
385 #if ARCH_X86 || ARCH_X86_64
386 MACROBLOCKD *xd = &x->e_mbd;
387 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
388 bestmv->as_mv.col;
389 unsigned char *y;
390
391 y_stride = 32;
392 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
393 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
394 y = xd->y_buf + y_stride + 1;
395 #else
396 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
397 bestmv->as_mv.col;
398 y_stride = pre_stride;
399 #endif
400
401 /* central mv */
402 bestmv->as_mv.row *= 8;
403 bestmv->as_mv.col *= 8;
404 startmv = *bestmv;
405
406 /* calculate central point error */
407 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
408 *distortion = bestmse;
409 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
410
411 /* go left then right and check error */
412 this_mv.as_mv.row = startmv.as_mv.row;
413 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
414 /* "halfpix" horizontal variance */
415 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
416 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
417
418 if (left < bestmse) {
419 *bestmv = this_mv;
420 bestmse = left;
421 *distortion = thismse;
422 *sse1 = sse;
423 }
424
425 this_mv.as_mv.col += 8;
426 /* "halfpix" horizontal variance */
427 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
428 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
429
430 if (right < bestmse) {
431 *bestmv = this_mv;
432 bestmse = right;
433 *distortion = thismse;
434 *sse1 = sse;
435 }
436
437 /* go up then down and check error */
438 this_mv.as_mv.col = startmv.as_mv.col;
439 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
440 /* "halfpix" vertical variance */
441 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
442 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
443
444 if (up < bestmse) {
445 *bestmv = this_mv;
446 bestmse = up;
447 *distortion = thismse;
448 *sse1 = sse;
449 }
450
451 this_mv.as_mv.row += 8;
452 /* "halfpix" vertical variance */
453 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
454 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
455
456 if (down < bestmse) {
457 *bestmv = this_mv;
458 bestmse = down;
459 *distortion = thismse;
460 *sse1 = sse;
461 }
462
463 /* now check 1 more diagonal */
464 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
465 this_mv = startmv;
466
467 switch (whichdir) {
468 case 0:
469 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
470 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
471 /* "halfpix" horizontal/vertical variance */
472 thismse =
473 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
474 break;
475 case 1:
476 this_mv.as_mv.col += 4;
477 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
478 /* "halfpix" horizontal/vertical variance */
479 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
480 break;
481 case 2:
482 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
483 this_mv.as_mv.row += 4;
484 /* "halfpix" horizontal/vertical variance */
485 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
486 break;
487 case 3:
488 default:
489 this_mv.as_mv.col += 4;
490 this_mv.as_mv.row += 4;
491 /* "halfpix" horizontal/vertical variance */
492 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
493 break;
494 }
495
496 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
497
498 if (diag < bestmse) {
499 *bestmv = this_mv;
500 bestmse = diag;
501 *distortion = thismse;
502 *sse1 = sse;
503 }
504
505 /* time to check quarter pels. */
506 if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
507
508 if (bestmv->as_mv.col < startmv.as_mv.col) y--;
509
510 startmv = *bestmv;
511
512 /* go left then right and check error */
513 this_mv.as_mv.row = startmv.as_mv.row;
514
515 if (startmv.as_mv.col & 7) {
516 this_mv.as_mv.col = startmv.as_mv.col - 2;
517 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
518 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
519 } else {
520 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
521 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
522 b->src_stride, &sse);
523 }
524
525 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
526
527 if (left < bestmse) {
528 *bestmv = this_mv;
529 bestmse = left;
530 *distortion = thismse;
531 *sse1 = sse;
532 }
533
534 this_mv.as_mv.col += 4;
535 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
536 z, b->src_stride, &sse);
537 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
538
539 if (right < bestmse) {
540 *bestmv = this_mv;
541 bestmse = right;
542 *distortion = thismse;
543 *sse1 = sse;
544 }
545
546 /* go up then down and check error */
547 this_mv.as_mv.col = startmv.as_mv.col;
548
549 if (startmv.as_mv.row & 7) {
550 this_mv.as_mv.row = startmv.as_mv.row - 2;
551 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
552 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
553 } else {
554 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
555 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
556 b->src_stride, &sse);
557 }
558
559 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
560
561 if (up < bestmse) {
562 *bestmv = this_mv;
563 bestmse = up;
564 *distortion = thismse;
565 *sse1 = sse;
566 }
567
568 this_mv.as_mv.row += 4;
569 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
570 z, b->src_stride, &sse);
571 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
572
573 if (down < bestmse) {
574 *bestmv = this_mv;
575 bestmse = down;
576 *distortion = thismse;
577 *sse1 = sse;
578 }
579
580 /* now check 1 more diagonal */
581 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
582
583 this_mv = startmv;
584
585 switch (whichdir) {
586 case 0:
587
588 if (startmv.as_mv.row & 7) {
589 this_mv.as_mv.row -= 2;
590
591 if (startmv.as_mv.col & 7) {
592 this_mv.as_mv.col -= 2;
593 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
594 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
595 } else {
596 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
597 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
598 b->src_stride, &sse);
599 }
600 } else {
601 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
602
603 if (startmv.as_mv.col & 7) {
604 this_mv.as_mv.col -= 2;
605 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
606 z, b->src_stride, &sse);
607 } else {
608 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
609 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
610 &sse);
611 }
612 }
613
614 break;
615 case 1:
616 this_mv.as_mv.col += 2;
617
618 if (startmv.as_mv.row & 7) {
619 this_mv.as_mv.row -= 2;
620 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
621 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
622 } else {
623 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
624 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
625 b->src_stride, &sse);
626 }
627
628 break;
629 case 2:
630 this_mv.as_mv.row += 2;
631
632 if (startmv.as_mv.col & 7) {
633 this_mv.as_mv.col -= 2;
634 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
635 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
636 } else {
637 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
638 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
639 b->src_stride, &sse);
640 }
641
642 break;
643 case 3:
644 this_mv.as_mv.col += 2;
645 this_mv.as_mv.row += 2;
646 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
647 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
648 break;
649 }
650
651 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
652
653 if (diag < bestmse) {
654 *bestmv = this_mv;
655 bestmse = diag;
656 *distortion = thismse;
657 *sse1 = sse;
658 }
659
660 return bestmse;
661 }
662
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)663 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
664 int_mv *bestmv, int_mv *ref_mv,
665 int error_per_bit,
666 const vp8_variance_fn_ptr_t *vfp,
667 int *mvcost[2], int *distortion,
668 unsigned int *sse1) {
669 int bestmse = INT_MAX;
670 int_mv startmv;
671 int_mv this_mv;
672 unsigned char *z = (*(b->base_src) + b->src);
673 int left, right, up, down, diag;
674 unsigned int sse;
675 int whichdir;
676 int thismse;
677 int y_stride;
678 int pre_stride = x->e_mbd.pre.y_stride;
679 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
680
681 #if ARCH_X86 || ARCH_X86_64
682 MACROBLOCKD *xd = &x->e_mbd;
683 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
684 bestmv->as_mv.col;
685 unsigned char *y;
686
687 y_stride = 32;
688 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
689 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
690 y = xd->y_buf + y_stride + 1;
691 #else
692 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
693 bestmv->as_mv.col;
694 y_stride = pre_stride;
695 #endif
696
697 /* central mv */
698 bestmv->as_mv.row *= 8;
699 bestmv->as_mv.col *= 8;
700 startmv = *bestmv;
701
702 /* calculate central point error */
703 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
704 *distortion = bestmse;
705 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
706
707 /* go left then right and check error */
708 this_mv.as_mv.row = startmv.as_mv.row;
709 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
710 /* "halfpix" horizontal variance */
711 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
712 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
713
714 if (left < bestmse) {
715 *bestmv = this_mv;
716 bestmse = left;
717 *distortion = thismse;
718 *sse1 = sse;
719 }
720
721 this_mv.as_mv.col += 8;
722 /* "halfpix" horizontal variance */
723 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
724 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
725
726 if (right < bestmse) {
727 *bestmv = this_mv;
728 bestmse = right;
729 *distortion = thismse;
730 *sse1 = sse;
731 }
732
733 /* go up then down and check error */
734 this_mv.as_mv.col = startmv.as_mv.col;
735 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
736 /* "halfpix" vertical variance */
737 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
738 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
739
740 if (up < bestmse) {
741 *bestmv = this_mv;
742 bestmse = up;
743 *distortion = thismse;
744 *sse1 = sse;
745 }
746
747 this_mv.as_mv.row += 8;
748 /* "halfpix" vertical variance */
749 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
750 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
751
752 if (down < bestmse) {
753 *bestmv = this_mv;
754 bestmse = down;
755 *distortion = thismse;
756 *sse1 = sse;
757 }
758
759 /* now check 1 more diagonal - */
760 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
761 this_mv = startmv;
762
763 switch (whichdir) {
764 case 0:
765 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
766 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
767 /* "halfpix" horizontal/vertical variance */
768 thismse =
769 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
770 break;
771 case 1:
772 this_mv.as_mv.col += 4;
773 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
774 /* "halfpix" horizontal/vertical variance */
775 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
776 break;
777 case 2:
778 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
779 this_mv.as_mv.row += 4;
780 /* "halfpix" horizontal/vertical variance */
781 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
782 break;
783 case 3:
784 default:
785 this_mv.as_mv.col += 4;
786 this_mv.as_mv.row += 4;
787 /* "halfpix" horizontal/vertical variance */
788 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
789 break;
790 }
791
792 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
793
794 if (diag < bestmse) {
795 *bestmv = this_mv;
796 bestmse = diag;
797 *distortion = thismse;
798 *sse1 = sse;
799 }
800
801 return bestmse;
802 }
803
804 #define CHECK_BOUNDS(range) \
805 { \
806 all_in = 1; \
807 all_in &= ((br - range) >= x->mv_row_min); \
808 all_in &= ((br + range) <= x->mv_row_max); \
809 all_in &= ((bc - range) >= x->mv_col_min); \
810 all_in &= ((bc + range) <= x->mv_col_max); \
811 }
812
813 #define CHECK_POINT \
814 { \
815 if (this_mv.as_mv.col < x->mv_col_min) continue; \
816 if (this_mv.as_mv.col > x->mv_col_max) continue; \
817 if (this_mv.as_mv.row < x->mv_row_min) continue; \
818 if (this_mv.as_mv.row > x->mv_row_max) continue; \
819 }
820
821 #define CHECK_BETTER \
822 { \
823 if (thissad < bestsad) { \
824 thissad += \
825 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
826 if (thissad < bestsad) { \
827 bestsad = thissad; \
828 best_site = i; \
829 } \
830 } \
831 }
832
833 static const MV next_chkpts[6][3] = {
834 { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
835 { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } },
836 { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } }
837 };
838
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)839 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
840 int_mv *best_mv, int search_param, int sad_per_bit,
841 const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
842 int *mvcost[2], int_mv *center_mv) {
843 MV hex[6] = {
844 { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
845 };
846 MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
847 int i, j;
848
849 unsigned char *what = (*(b->base_src) + b->src);
850 int what_stride = b->src_stride;
851 int pre_stride = x->e_mbd.pre.y_stride;
852 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
853
854 int in_what_stride = pre_stride;
855 int br, bc;
856 int_mv this_mv;
857 unsigned int bestsad;
858 unsigned int thissad;
859 unsigned char *base_offset;
860 unsigned char *this_offset;
861 int k = -1;
862 int all_in;
863 int best_site = -1;
864 int hex_range = 127;
865 int dia_range = 8;
866
867 int_mv fcenter_mv;
868 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
869 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
870
871 (void)mvcost;
872
873 /* adjust ref_mv to make sure it is within MV range */
874 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
875 x->mv_row_max);
876 br = ref_mv->as_mv.row;
877 bc = ref_mv->as_mv.col;
878
879 /* Work out the start point for the search */
880 base_offset = (unsigned char *)(base_pre + d->offset);
881 this_offset = base_offset + (br * (pre_stride)) + bc;
882 this_mv.as_mv.row = br;
883 this_mv.as_mv.col = bc;
884 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
885 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
886
887 #if CONFIG_MULTI_RES_ENCODING
888 /* Lower search range based on prediction info */
889 if (search_param >= 6)
890 goto cal_neighbors;
891 else if (search_param >= 5)
892 hex_range = 4;
893 else if (search_param >= 4)
894 hex_range = 6;
895 else if (search_param >= 3)
896 hex_range = 15;
897 else if (search_param >= 2)
898 hex_range = 31;
899 else if (search_param >= 1)
900 hex_range = 63;
901
902 dia_range = 8;
903 #else
904 (void)search_param;
905 #endif
906
907 /* hex search */
908 CHECK_BOUNDS(2)
909
910 if (all_in) {
911 for (i = 0; i < 6; ++i) {
912 this_mv.as_mv.row = br + hex[i].row;
913 this_mv.as_mv.col = bc + hex[i].col;
914 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
915 this_mv.as_mv.col;
916 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
917 CHECK_BETTER
918 }
919 } else {
920 for (i = 0; i < 6; ++i) {
921 this_mv.as_mv.row = br + hex[i].row;
922 this_mv.as_mv.col = bc + hex[i].col;
923 CHECK_POINT
924 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
925 this_mv.as_mv.col;
926 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
927 CHECK_BETTER
928 }
929 }
930
931 if (best_site == -1) {
932 goto cal_neighbors;
933 } else {
934 br += hex[best_site].row;
935 bc += hex[best_site].col;
936 k = best_site;
937 }
938
939 for (j = 1; j < hex_range; ++j) {
940 best_site = -1;
941 CHECK_BOUNDS(2)
942
943 if (all_in) {
944 for (i = 0; i < 3; ++i) {
945 this_mv.as_mv.row = br + next_chkpts[k][i].row;
946 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
947 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
948 this_mv.as_mv.col;
949 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
950 CHECK_BETTER
951 }
952 } else {
953 for (i = 0; i < 3; ++i) {
954 this_mv.as_mv.row = br + next_chkpts[k][i].row;
955 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
956 CHECK_POINT
957 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
958 this_mv.as_mv.col;
959 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
960 CHECK_BETTER
961 }
962 }
963
964 if (best_site == -1) {
965 break;
966 } else {
967 br += next_chkpts[k][best_site].row;
968 bc += next_chkpts[k][best_site].col;
969 k += 5 + best_site;
970 if (k >= 12) {
971 k -= 12;
972 } else if (k >= 6) {
973 k -= 6;
974 }
975 }
976 }
977
978 /* check 4 1-away neighbors */
979 cal_neighbors:
980 for (j = 0; j < dia_range; ++j) {
981 best_site = -1;
982 CHECK_BOUNDS(1)
983
984 if (all_in) {
985 for (i = 0; i < 4; ++i) {
986 this_mv.as_mv.row = br + neighbors[i].row;
987 this_mv.as_mv.col = bc + neighbors[i].col;
988 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
989 this_mv.as_mv.col;
990 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
991 CHECK_BETTER
992 }
993 } else {
994 for (i = 0; i < 4; ++i) {
995 this_mv.as_mv.row = br + neighbors[i].row;
996 this_mv.as_mv.col = bc + neighbors[i].col;
997 CHECK_POINT
998 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
999 this_mv.as_mv.col;
1000 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1001 CHECK_BETTER
1002 }
1003 }
1004
1005 if (best_site == -1) {
1006 break;
1007 } else {
1008 br += neighbors[best_site].row;
1009 bc += neighbors[best_site].col;
1010 }
1011 }
1012
1013 best_mv->as_mv.row = br;
1014 best_mv->as_mv.col = bc;
1015
1016 return bestsad;
1017 }
1018 #undef CHECK_BOUNDS
1019 #undef CHECK_POINT
1020 #undef CHECK_BETTER
1021
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1022 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1023 int_mv *best_mv, int search_param, int sad_per_bit,
1024 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1025 int *mvcost[2], int_mv *center_mv) {
1026 int i, j, step;
1027
1028 unsigned char *what = (*(b->base_src) + b->src);
1029 int what_stride = b->src_stride;
1030 unsigned char *in_what;
1031 int pre_stride = x->e_mbd.pre.y_stride;
1032 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1033 int in_what_stride = pre_stride;
1034 unsigned char *best_address;
1035
1036 int tot_steps;
1037 int_mv this_mv;
1038
1039 unsigned int bestsad;
1040 unsigned int thissad;
1041 int best_site = 0;
1042 int last_site = 0;
1043
1044 int ref_row;
1045 int ref_col;
1046 int this_row_offset;
1047 int this_col_offset;
1048 search_site *ss;
1049
1050 unsigned char *check_here;
1051
1052 int *mvsadcost[2];
1053 int_mv fcenter_mv;
1054
1055 mvsadcost[0] = x->mvsadcost[0];
1056 mvsadcost[1] = x->mvsadcost[1];
1057 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1058 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1059
1060 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1061 x->mv_row_max);
1062 ref_row = ref_mv->as_mv.row;
1063 ref_col = ref_mv->as_mv.col;
1064 *num00 = 0;
1065 best_mv->as_mv.row = ref_row;
1066 best_mv->as_mv.col = ref_col;
1067
1068 /* Work out the start point for the search */
1069 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1070 ref_col);
1071 best_address = in_what;
1072
1073 /* Check the starting position */
1074 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1075 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1076
1077 /* search_param determines the length of the initial step and hence
1078 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1079 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1080 */
1081 ss = &x->ss[search_param * x->searches_per_step];
1082 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1083
1084 i = 1;
1085
1086 for (step = 0; step < tot_steps; ++step) {
1087 for (j = 0; j < x->searches_per_step; ++j) {
1088 /* Trap illegal vectors */
1089 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1090 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1091
1092 if ((this_col_offset > x->mv_col_min) &&
1093 (this_col_offset < x->mv_col_max) &&
1094 (this_row_offset > x->mv_row_min) &&
1095 (this_row_offset < x->mv_row_max))
1096
1097 {
1098 check_here = ss[i].offset + best_address;
1099 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1100
1101 if (thissad < bestsad) {
1102 this_mv.as_mv.row = this_row_offset;
1103 this_mv.as_mv.col = this_col_offset;
1104 thissad +=
1105 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1106
1107 if (thissad < bestsad) {
1108 bestsad = thissad;
1109 best_site = i;
1110 }
1111 }
1112 }
1113
1114 i++;
1115 }
1116
1117 if (best_site != last_site) {
1118 best_mv->as_mv.row += ss[best_site].mv.row;
1119 best_mv->as_mv.col += ss[best_site].mv.col;
1120 best_address += ss[best_site].offset;
1121 last_site = best_site;
1122 } else if (best_address == in_what) {
1123 (*num00)++;
1124 }
1125 }
1126
1127 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1128 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1129
1130 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1131 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1132 }
1133
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1134 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1135 int_mv *best_mv, int search_param, int sad_per_bit,
1136 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1137 int *mvcost[2], int_mv *center_mv) {
1138 int i, j, step;
1139
1140 unsigned char *what = (*(b->base_src) + b->src);
1141 int what_stride = b->src_stride;
1142 unsigned char *in_what;
1143 int pre_stride = x->e_mbd.pre.y_stride;
1144 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1145 int in_what_stride = pre_stride;
1146 unsigned char *best_address;
1147
1148 int tot_steps;
1149 int_mv this_mv;
1150
1151 unsigned int bestsad;
1152 unsigned int thissad;
1153 int best_site = 0;
1154 int last_site = 0;
1155
1156 int ref_row;
1157 int ref_col;
1158 int this_row_offset;
1159 int this_col_offset;
1160 search_site *ss;
1161
1162 unsigned char *check_here;
1163
1164 int *mvsadcost[2];
1165 int_mv fcenter_mv;
1166
1167 mvsadcost[0] = x->mvsadcost[0];
1168 mvsadcost[1] = x->mvsadcost[1];
1169 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1170 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1171
1172 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1173 x->mv_row_max);
1174 ref_row = ref_mv->as_mv.row;
1175 ref_col = ref_mv->as_mv.col;
1176 *num00 = 0;
1177 best_mv->as_mv.row = ref_row;
1178 best_mv->as_mv.col = ref_col;
1179
1180 /* Work out the start point for the search */
1181 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1182 ref_col);
1183 best_address = in_what;
1184
1185 /* Check the starting position */
1186 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1187 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1188
1189 /* search_param determines the length of the initial step and hence the
1190 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1191 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1192 */
1193 ss = &x->ss[search_param * x->searches_per_step];
1194 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1195
1196 i = 1;
1197
1198 for (step = 0; step < tot_steps; ++step) {
1199 int all_in = 1, t;
1200
1201 /* To know if all neighbor points are within the bounds, 4 bounds
1202 * checking are enough instead of checking 4 bounds for each
1203 * points.
1204 */
1205 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1206 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1207 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1208 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1209
1210 if (all_in) {
1211 unsigned int sad_array[4];
1212
1213 for (j = 0; j < x->searches_per_step; j += 4) {
1214 const unsigned char *block_offset[4];
1215
1216 for (t = 0; t < 4; ++t) {
1217 block_offset[t] = ss[i + t].offset + best_address;
1218 }
1219
1220 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1221 sad_array);
1222
1223 for (t = 0; t < 4; t++, i++) {
1224 if (sad_array[t] < bestsad) {
1225 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1226 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1227 sad_array[t] +=
1228 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1229
1230 if (sad_array[t] < bestsad) {
1231 bestsad = sad_array[t];
1232 best_site = i;
1233 }
1234 }
1235 }
1236 }
1237 } else {
1238 for (j = 0; j < x->searches_per_step; ++j) {
1239 /* Trap illegal vectors */
1240 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1241 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1242
1243 if ((this_col_offset > x->mv_col_min) &&
1244 (this_col_offset < x->mv_col_max) &&
1245 (this_row_offset > x->mv_row_min) &&
1246 (this_row_offset < x->mv_row_max)) {
1247 check_here = ss[i].offset + best_address;
1248 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1249
1250 if (thissad < bestsad) {
1251 this_mv.as_mv.row = this_row_offset;
1252 this_mv.as_mv.col = this_col_offset;
1253 thissad +=
1254 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1255
1256 if (thissad < bestsad) {
1257 bestsad = thissad;
1258 best_site = i;
1259 }
1260 }
1261 }
1262 i++;
1263 }
1264 }
1265
1266 if (best_site != last_site) {
1267 best_mv->as_mv.row += ss[best_site].mv.row;
1268 best_mv->as_mv.col += ss[best_site].mv.col;
1269 best_address += ss[best_site].offset;
1270 last_site = best_site;
1271 } else if (best_address == in_what) {
1272 (*num00)++;
1273 }
1274 }
1275
1276 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1277 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1278
1279 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1280 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1281 }
1282
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1283 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1284 int sad_per_bit, int distance,
1285 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1286 int_mv *center_mv) {
1287 unsigned char *what = (*(b->base_src) + b->src);
1288 int what_stride = b->src_stride;
1289 unsigned char *in_what;
1290 int pre_stride = x->e_mbd.pre.y_stride;
1291 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1292 int in_what_stride = pre_stride;
1293 int mv_stride = pre_stride;
1294 unsigned char *bestaddress;
1295 int_mv *best_mv = &d->bmi.mv;
1296 int_mv this_mv;
1297 unsigned int bestsad;
1298 unsigned int thissad;
1299 int r, c;
1300
1301 unsigned char *check_here;
1302
1303 int ref_row = ref_mv->as_mv.row;
1304 int ref_col = ref_mv->as_mv.col;
1305
1306 int row_min = ref_row - distance;
1307 int row_max = ref_row + distance;
1308 int col_min = ref_col - distance;
1309 int col_max = ref_col + distance;
1310
1311 int *mvsadcost[2];
1312 int_mv fcenter_mv;
1313
1314 mvsadcost[0] = x->mvsadcost[0];
1315 mvsadcost[1] = x->mvsadcost[1];
1316 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1317 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1318
1319 /* Work out the mid point for the search */
1320 in_what = base_pre + d->offset;
1321 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1322
1323 best_mv->as_mv.row = ref_row;
1324 best_mv->as_mv.col = ref_col;
1325
1326 /* Baseline value at the centre */
1327 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1328 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1329
1330 /* Apply further limits to prevent us looking using vectors that
1331 * stretch beyiond the UMV border
1332 */
1333 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1334
1335 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1336
1337 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1338
1339 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1340
1341 for (r = row_min; r < row_max; ++r) {
1342 this_mv.as_mv.row = r;
1343 check_here = r * mv_stride + in_what + col_min;
1344
1345 for (c = col_min; c < col_max; ++c) {
1346 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1347
1348 this_mv.as_mv.col = c;
1349 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1350
1351 if (thissad < bestsad) {
1352 bestsad = thissad;
1353 best_mv->as_mv.row = r;
1354 best_mv->as_mv.col = c;
1355 bestaddress = check_here;
1356 }
1357
1358 check_here++;
1359 }
1360 }
1361
1362 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1363 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1364
1365 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1366 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1367 }
1368
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1369 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1370 int sad_per_bit, int distance,
1371 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1372 int_mv *center_mv) {
1373 unsigned char *what = (*(b->base_src) + b->src);
1374 int what_stride = b->src_stride;
1375 unsigned char *in_what;
1376 int pre_stride = x->e_mbd.pre.y_stride;
1377 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1378 int in_what_stride = pre_stride;
1379 int mv_stride = pre_stride;
1380 unsigned char *bestaddress;
1381 int_mv *best_mv = &d->bmi.mv;
1382 int_mv this_mv;
1383 unsigned int bestsad;
1384 unsigned int thissad;
1385 int r, c;
1386
1387 unsigned char *check_here;
1388
1389 int ref_row = ref_mv->as_mv.row;
1390 int ref_col = ref_mv->as_mv.col;
1391
1392 int row_min = ref_row - distance;
1393 int row_max = ref_row + distance;
1394 int col_min = ref_col - distance;
1395 int col_max = ref_col + distance;
1396
1397 unsigned int sad_array[3];
1398
1399 int *mvsadcost[2];
1400 int_mv fcenter_mv;
1401
1402 mvsadcost[0] = x->mvsadcost[0];
1403 mvsadcost[1] = x->mvsadcost[1];
1404 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1405 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1406
1407 /* Work out the mid point for the search */
1408 in_what = base_pre + d->offset;
1409 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1410
1411 best_mv->as_mv.row = ref_row;
1412 best_mv->as_mv.col = ref_col;
1413
1414 /* Baseline value at the centre */
1415 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1416 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1417
1418 /* Apply further limits to prevent us looking using vectors that stretch
1419 * beyond the UMV border
1420 */
1421 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1422
1423 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1424
1425 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1426
1427 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1428
1429 for (r = row_min; r < row_max; ++r) {
1430 this_mv.as_mv.row = r;
1431 check_here = r * mv_stride + in_what + col_min;
1432 c = col_min;
1433
1434 while ((c + 2) < col_max) {
1435 int i;
1436
1437 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1438
1439 for (i = 0; i < 3; ++i) {
1440 thissad = sad_array[i];
1441
1442 if (thissad < bestsad) {
1443 this_mv.as_mv.col = c;
1444 thissad +=
1445 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1446
1447 if (thissad < bestsad) {
1448 bestsad = thissad;
1449 best_mv->as_mv.row = r;
1450 best_mv->as_mv.col = c;
1451 bestaddress = check_here;
1452 }
1453 }
1454
1455 check_here++;
1456 c++;
1457 }
1458 }
1459
1460 while (c < col_max) {
1461 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1462
1463 if (thissad < bestsad) {
1464 this_mv.as_mv.col = c;
1465 thissad +=
1466 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1467
1468 if (thissad < bestsad) {
1469 bestsad = thissad;
1470 best_mv->as_mv.row = r;
1471 best_mv->as_mv.col = c;
1472 bestaddress = check_here;
1473 }
1474 }
1475
1476 check_here++;
1477 c++;
1478 }
1479 }
1480
1481 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1482 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1483
1484 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1485 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1486 }
1487
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1488 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1489 int sad_per_bit, int distance,
1490 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1491 int_mv *center_mv) {
1492 unsigned char *what = (*(b->base_src) + b->src);
1493 int what_stride = b->src_stride;
1494 int pre_stride = x->e_mbd.pre.y_stride;
1495 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1496 unsigned char *in_what;
1497 int in_what_stride = pre_stride;
1498 int mv_stride = pre_stride;
1499 unsigned char *bestaddress;
1500 int_mv *best_mv = &d->bmi.mv;
1501 int_mv this_mv;
1502 unsigned int bestsad;
1503 unsigned int thissad;
1504 int r, c;
1505
1506 unsigned char *check_here;
1507
1508 int ref_row = ref_mv->as_mv.row;
1509 int ref_col = ref_mv->as_mv.col;
1510
1511 int row_min = ref_row - distance;
1512 int row_max = ref_row + distance;
1513 int col_min = ref_col - distance;
1514 int col_max = ref_col + distance;
1515
1516 DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1517 unsigned int sad_array[3];
1518
1519 int *mvsadcost[2];
1520 int_mv fcenter_mv;
1521
1522 mvsadcost[0] = x->mvsadcost[0];
1523 mvsadcost[1] = x->mvsadcost[1];
1524 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1525 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1526
1527 /* Work out the mid point for the search */
1528 in_what = base_pre + d->offset;
1529 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1530
1531 best_mv->as_mv.row = ref_row;
1532 best_mv->as_mv.col = ref_col;
1533
1534 /* Baseline value at the centre */
1535 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1536 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1537
1538 /* Apply further limits to prevent us looking using vectors that stretch
1539 * beyond the UMV border
1540 */
1541 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1542
1543 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1544
1545 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1546
1547 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1548
1549 for (r = row_min; r < row_max; ++r) {
1550 this_mv.as_mv.row = r;
1551 check_here = r * mv_stride + in_what + col_min;
1552 c = col_min;
1553
1554 while ((c + 7) < col_max) {
1555 int i;
1556
1557 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1558
1559 for (i = 0; i < 8; ++i) {
1560 thissad = sad_array8[i];
1561
1562 if (thissad < bestsad) {
1563 this_mv.as_mv.col = c;
1564 thissad +=
1565 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1566
1567 if (thissad < bestsad) {
1568 bestsad = thissad;
1569 best_mv->as_mv.row = r;
1570 best_mv->as_mv.col = c;
1571 bestaddress = check_here;
1572 }
1573 }
1574
1575 check_here++;
1576 c++;
1577 }
1578 }
1579
1580 while ((c + 2) < col_max) {
1581 int i;
1582
1583 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1584
1585 for (i = 0; i < 3; ++i) {
1586 thissad = sad_array[i];
1587
1588 if (thissad < bestsad) {
1589 this_mv.as_mv.col = c;
1590 thissad +=
1591 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1592
1593 if (thissad < bestsad) {
1594 bestsad = thissad;
1595 best_mv->as_mv.row = r;
1596 best_mv->as_mv.col = c;
1597 bestaddress = check_here;
1598 }
1599 }
1600
1601 check_here++;
1602 c++;
1603 }
1604 }
1605
1606 while (c < col_max) {
1607 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1608
1609 if (thissad < bestsad) {
1610 this_mv.as_mv.col = c;
1611 thissad +=
1612 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1613
1614 if (thissad < bestsad) {
1615 bestsad = thissad;
1616 best_mv->as_mv.row = r;
1617 best_mv->as_mv.col = c;
1618 bestaddress = check_here;
1619 }
1620 }
1621
1622 check_here++;
1623 c++;
1624 }
1625 }
1626
1627 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1628 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1629
1630 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1631 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1632 }
1633
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1634 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1635 int_mv *ref_mv, int error_per_bit,
1636 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1637 int *mvcost[2], int_mv *center_mv) {
1638 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1639 int i, j;
1640 short this_row_offset, this_col_offset;
1641
1642 int what_stride = b->src_stride;
1643 int pre_stride = x->e_mbd.pre.y_stride;
1644 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1645 int in_what_stride = pre_stride;
1646 unsigned char *what = (*(b->base_src) + b->src);
1647 unsigned char *best_address =
1648 (unsigned char *)(base_pre + d->offset +
1649 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1650 unsigned char *check_here;
1651 int_mv this_mv;
1652 unsigned int bestsad;
1653 unsigned int thissad;
1654
1655 int *mvsadcost[2];
1656 int_mv fcenter_mv;
1657
1658 mvsadcost[0] = x->mvsadcost[0];
1659 mvsadcost[1] = x->mvsadcost[1];
1660 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1661 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1662
1663 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1664 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1665
1666 for (i = 0; i < search_range; ++i) {
1667 int best_site = -1;
1668
1669 for (j = 0; j < 4; ++j) {
1670 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1671 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1672
1673 if ((this_col_offset > x->mv_col_min) &&
1674 (this_col_offset < x->mv_col_max) &&
1675 (this_row_offset > x->mv_row_min) &&
1676 (this_row_offset < x->mv_row_max)) {
1677 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1678 best_address;
1679 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1680
1681 if (thissad < bestsad) {
1682 this_mv.as_mv.row = this_row_offset;
1683 this_mv.as_mv.col = this_col_offset;
1684 thissad +=
1685 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1686
1687 if (thissad < bestsad) {
1688 bestsad = thissad;
1689 best_site = j;
1690 }
1691 }
1692 }
1693 }
1694
1695 if (best_site == -1) {
1696 break;
1697 } else {
1698 ref_mv->as_mv.row += neighbors[best_site].row;
1699 ref_mv->as_mv.col += neighbors[best_site].col;
1700 best_address += (neighbors[best_site].row) * in_what_stride +
1701 neighbors[best_site].col;
1702 }
1703 }
1704
1705 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1706 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1707
1708 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1709 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1710 }
1711
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1712 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1713 int_mv *ref_mv, int error_per_bit,
1714 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1715 int *mvcost[2], int_mv *center_mv) {
1716 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1717 int i, j;
1718 short this_row_offset, this_col_offset;
1719
1720 int what_stride = b->src_stride;
1721 int pre_stride = x->e_mbd.pre.y_stride;
1722 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1723 int in_what_stride = pre_stride;
1724 unsigned char *what = (*(b->base_src) + b->src);
1725 unsigned char *best_address =
1726 (unsigned char *)(base_pre + d->offset +
1727 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1728 unsigned char *check_here;
1729 int_mv this_mv;
1730 unsigned int bestsad;
1731 unsigned int thissad;
1732
1733 int *mvsadcost[2];
1734 int_mv fcenter_mv;
1735
1736 mvsadcost[0] = x->mvsadcost[0];
1737 mvsadcost[1] = x->mvsadcost[1];
1738 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1739 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1740
1741 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1742 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1743
1744 for (i = 0; i < search_range; ++i) {
1745 int best_site = -1;
1746 int all_in = 1;
1747
1748 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1749 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1750 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1751 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1752
1753 if (all_in) {
1754 unsigned int sad_array[4];
1755 const unsigned char *block_offset[4];
1756 block_offset[0] = best_address - in_what_stride;
1757 block_offset[1] = best_address - 1;
1758 block_offset[2] = best_address + 1;
1759 block_offset[3] = best_address + in_what_stride;
1760
1761 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1762 sad_array);
1763
1764 for (j = 0; j < 4; ++j) {
1765 if (sad_array[j] < bestsad) {
1766 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1767 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1768 sad_array[j] +=
1769 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1770
1771 if (sad_array[j] < bestsad) {
1772 bestsad = sad_array[j];
1773 best_site = j;
1774 }
1775 }
1776 }
1777 } else {
1778 for (j = 0; j < 4; ++j) {
1779 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1780 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1781
1782 if ((this_col_offset > x->mv_col_min) &&
1783 (this_col_offset < x->mv_col_max) &&
1784 (this_row_offset > x->mv_row_min) &&
1785 (this_row_offset < x->mv_row_max)) {
1786 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1787 best_address;
1788 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1789
1790 if (thissad < bestsad) {
1791 this_mv.as_mv.row = this_row_offset;
1792 this_mv.as_mv.col = this_col_offset;
1793 thissad +=
1794 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1795
1796 if (thissad < bestsad) {
1797 bestsad = thissad;
1798 best_site = j;
1799 }
1800 }
1801 }
1802 }
1803 }
1804
1805 if (best_site == -1) {
1806 break;
1807 } else {
1808 ref_mv->as_mv.row += neighbors[best_site].row;
1809 ref_mv->as_mv.col += neighbors[best_site].col;
1810 best_address += (neighbors[best_site].row) * in_what_stride +
1811 neighbors[best_site].col;
1812 }
1813 }
1814
1815 this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1816 this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1817
1818 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1819 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1820 }
1821
1822 #ifdef VP8_ENTROPY_STATS
print_mode_context(void)1823 void print_mode_context(void) {
1824 FILE *f = fopen("modecont.c", "w");
1825 int i, j;
1826
1827 fprintf(f, "#include \"entropy.h\"\n");
1828 fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1829 fprintf(f, "{\n");
1830
1831 for (j = 0; j < 6; ++j) {
1832 fprintf(f, " { /* %d */\n", j);
1833 fprintf(f, " ");
1834
1835 for (i = 0; i < 4; ++i) {
1836 int overal_prob;
1837 int this_prob;
1838 int count;
1839
1840 /* Overall probs */
1841 count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1842
1843 if (count)
1844 overal_prob = 256 * mv_mode_cts[i][0] / count;
1845 else
1846 overal_prob = 128;
1847
1848 if (overal_prob == 0) overal_prob = 1;
1849
1850 /* context probs */
1851 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1852
1853 if (count)
1854 this_prob = 256 * mv_ref_ct[j][i][0] / count;
1855 else
1856 this_prob = 128;
1857
1858 if (this_prob == 0) this_prob = 1;
1859
1860 fprintf(f, "%5d, ", this_prob);
1861 }
1862
1863 fprintf(f, " },\n");
1864 }
1865
1866 fprintf(f, "};\n");
1867 fclose(f);
1868 }
1869
1870 /* MV ref count VP8_ENTROPY_STATS stats code */
1871 #ifdef VP8_ENTROPY_STATS
init_mv_ref_counts()1872 void init_mv_ref_counts() {
1873 memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1874 memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1875 }
1876
accum_mv_refs(MB_PREDICTION_MODE m,const int ct[4])1877 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
1878 if (m == ZEROMV) {
1879 ++mv_ref_ct[ct[0]][0][0];
1880 ++mv_mode_cts[0][0];
1881 } else {
1882 ++mv_ref_ct[ct[0]][0][1];
1883 ++mv_mode_cts[0][1];
1884
1885 if (m == NEARESTMV) {
1886 ++mv_ref_ct[ct[1]][1][0];
1887 ++mv_mode_cts[1][0];
1888 } else {
1889 ++mv_ref_ct[ct[1]][1][1];
1890 ++mv_mode_cts[1][1];
1891
1892 if (m == NEARMV) {
1893 ++mv_ref_ct[ct[2]][2][0];
1894 ++mv_mode_cts[2][0];
1895 } else {
1896 ++mv_ref_ct[ct[2]][2][1];
1897 ++mv_mode_cts[2][1];
1898
1899 if (m == NEWMV) {
1900 ++mv_ref_ct[ct[3]][3][0];
1901 ++mv_mode_cts[3][0];
1902 } else {
1903 ++mv_ref_ct[ct[3]][3][1];
1904 ++mv_mode_cts[3][1];
1905 }
1906 }
1907 }
1908 }
1909 }
1910
1911 #endif /* END MV ref count VP8_ENTROPY_STATS stats code */
1912
1913 #endif
1914