1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "./vp8_rtcd.h"
13 #include "./vpx_dsp_rtcd.h"
14 #include "onyx_int.h"
15 #include "mcomp.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vpx_config.h"
18 #include <stdio.h>
19 #include <limits.h>
20 #include <math.h>
21 #include "vp8/common/findnearmv.h"
22 #include "vp8/common/common.h"
23 #include "vpx_dsp/vpx_dsp_common.h"
24
25 #ifdef VP8_ENTROPY_STATS
26 static int mv_ref_ct [31] [4] [2];
27 static int mv_mode_cts [4] [2];
28 #endif
29
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)30 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
31 {
32 /* MV costing is based on the distribution of vectors in the previous
33 * frame and as such will tend to over state the cost of vectors. In
34 * addition coding a new vector can have a knock on effect on the cost
35 * of subsequent vectors and the quality of prediction from NEAR and
36 * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
37 * limited extent, for some account to be taken of these factors.
38 */
39 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
40 }
41
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)42 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
43 {
44 /* Ignore mv costing if mvcost is NULL */
45 if (mvcost)
46 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
47 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
48 * error_per_bit + 128) >> 8;
49 return 0;
50 }
51
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)52 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
53 {
54 /* Calculate sad error cost on full pixel basis. */
55 /* Ignore mv costing if mvsadcost is NULL */
56 if (mvsadcost)
57 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
58 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
59 * error_per_bit + 128) >> 8;
60 return 0;
61 }
62
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)63 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
64 {
65 int Len;
66 int search_site_count = 0;
67
68
69 /* Generate offsets for 4 search sites per step. */
70 Len = MAX_FIRST_STEP;
71 x->ss[search_site_count].mv.col = 0;
72 x->ss[search_site_count].mv.row = 0;
73 x->ss[search_site_count].offset = 0;
74 search_site_count++;
75
76 while (Len > 0)
77 {
78
79 /* Compute offsets for search sites. */
80 x->ss[search_site_count].mv.col = 0;
81 x->ss[search_site_count].mv.row = -Len;
82 x->ss[search_site_count].offset = -Len * stride;
83 search_site_count++;
84
85 /* Compute offsets for search sites. */
86 x->ss[search_site_count].mv.col = 0;
87 x->ss[search_site_count].mv.row = Len;
88 x->ss[search_site_count].offset = Len * stride;
89 search_site_count++;
90
91 /* Compute offsets for search sites. */
92 x->ss[search_site_count].mv.col = -Len;
93 x->ss[search_site_count].mv.row = 0;
94 x->ss[search_site_count].offset = -Len;
95 search_site_count++;
96
97 /* Compute offsets for search sites. */
98 x->ss[search_site_count].mv.col = Len;
99 x->ss[search_site_count].mv.row = 0;
100 x->ss[search_site_count].offset = Len;
101 search_site_count++;
102
103 /* Contract. */
104 Len /= 2;
105 }
106
107 x->ss_count = search_site_count;
108 x->searches_per_step = 4;
109 }
110
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)111 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
112 {
113 int Len;
114 int search_site_count = 0;
115
116 /* Generate offsets for 8 search sites per step. */
117 Len = MAX_FIRST_STEP;
118 x->ss[search_site_count].mv.col = 0;
119 x->ss[search_site_count].mv.row = 0;
120 x->ss[search_site_count].offset = 0;
121 search_site_count++;
122
123 while (Len > 0)
124 {
125
126 /* Compute offsets for search sites. */
127 x->ss[search_site_count].mv.col = 0;
128 x->ss[search_site_count].mv.row = -Len;
129 x->ss[search_site_count].offset = -Len * stride;
130 search_site_count++;
131
132 /* Compute offsets for search sites. */
133 x->ss[search_site_count].mv.col = 0;
134 x->ss[search_site_count].mv.row = Len;
135 x->ss[search_site_count].offset = Len * stride;
136 search_site_count++;
137
138 /* Compute offsets for search sites. */
139 x->ss[search_site_count].mv.col = -Len;
140 x->ss[search_site_count].mv.row = 0;
141 x->ss[search_site_count].offset = -Len;
142 search_site_count++;
143
144 /* Compute offsets for search sites. */
145 x->ss[search_site_count].mv.col = Len;
146 x->ss[search_site_count].mv.row = 0;
147 x->ss[search_site_count].offset = Len;
148 search_site_count++;
149
150 /* Compute offsets for search sites. */
151 x->ss[search_site_count].mv.col = -Len;
152 x->ss[search_site_count].mv.row = -Len;
153 x->ss[search_site_count].offset = -Len * stride - Len;
154 search_site_count++;
155
156 /* Compute offsets for search sites. */
157 x->ss[search_site_count].mv.col = Len;
158 x->ss[search_site_count].mv.row = -Len;
159 x->ss[search_site_count].offset = -Len * stride + Len;
160 search_site_count++;
161
162 /* Compute offsets for search sites. */
163 x->ss[search_site_count].mv.col = -Len;
164 x->ss[search_site_count].mv.row = Len;
165 x->ss[search_site_count].offset = Len * stride - Len;
166 search_site_count++;
167
168 /* Compute offsets for search sites. */
169 x->ss[search_site_count].mv.col = Len;
170 x->ss[search_site_count].mv.row = Len;
171 x->ss[search_site_count].offset = Len * stride + Len;
172 search_site_count++;
173
174
175 /* Contract. */
176 Len /= 2;
177 }
178
179 x->ss_count = search_site_count;
180 x->searches_per_step = 8;
181 }
182
183 /*
184 * To avoid the penalty for crossing cache-line read, preload the reference
185 * area in a small buffer, which is aligned to make sure there won't be crossing
186 * cache-line read while reading from this buffer. This reduced the cpu
187 * cycles spent on reading ref data in sub-pixel filter functions.
188 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
189 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
190 * could reduce the area.
191 */
192
193 /* estimated cost of a motion vector (r,c) */
194 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
195 /* pointer to predictor base of a motionvector */
196 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
197 /* convert motion vector component to offset for svf calc */
198 #define SP(x) (((x)&3)<<1)
199 /* returns subpixel variance error function. */
200 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
201 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
202 /* returns distortion + motion vector cost */
203 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
204 /* checks if (r,c) has better score than previous best */
205 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
206
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)207 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
208 int_mv *bestmv, int_mv *ref_mv,
209 int error_per_bit,
210 const vp8_variance_fn_ptr_t *vfp,
211 int *mvcost[2], int *distortion,
212 unsigned int *sse1)
213 {
214 unsigned char *z = (*(b->base_src) + b->src);
215
216 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
217 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
218 int tr = br, tc = bc;
219 unsigned int besterr;
220 unsigned int left, right, up, down, diag;
221 unsigned int sse;
222 unsigned int whichdir;
223 unsigned int halfiters = 4;
224 unsigned int quarteriters = 4;
225 int thismse;
226
227 int minc = VPXMAX(x->mv_col_min * 4,
228 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
229 int maxc = VPXMIN(x->mv_col_max * 4,
230 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
231 int minr = VPXMAX(x->mv_row_min * 4,
232 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
233 int maxr = VPXMIN(x->mv_row_max * 4,
234 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
235
236 int y_stride;
237 int offset;
238 int pre_stride = x->e_mbd.pre.y_stride;
239 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
240
241
242 #if ARCH_X86 || ARCH_X86_64
243 MACROBLOCKD *xd = &x->e_mbd;
244 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
245 unsigned char *y;
246 int buf_r1, buf_r2, buf_c1;
247
248 /* Clamping to avoid out-of-range data access */
249 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
250 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
251 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
252 y_stride = 32;
253
254 /* Copy to intermediate buffer before searching. */
255 vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
256 y = xd->y_buf + y_stride*buf_r1 +buf_c1;
257 #else
258 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
259 y_stride = pre_stride;
260 #endif
261
262 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
263
264 /* central mv */
265 bestmv->as_mv.row *= 8;
266 bestmv->as_mv.col *= 8;
267
268 /* calculate central point error */
269 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
270 *distortion = besterr;
271 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
272
273 /* TODO: Each subsequent iteration checks at least one point in common
274 * with the last iteration could be 2 ( if diag selected)
275 */
276 while (--halfiters)
277 {
278 /* 1/2 pel */
279 CHECK_BETTER(left, tr, tc - 2);
280 CHECK_BETTER(right, tr, tc + 2);
281 CHECK_BETTER(up, tr - 2, tc);
282 CHECK_BETTER(down, tr + 2, tc);
283
284 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
285
286 switch (whichdir)
287 {
288 case 0:
289 CHECK_BETTER(diag, tr - 2, tc - 2);
290 break;
291 case 1:
292 CHECK_BETTER(diag, tr - 2, tc + 2);
293 break;
294 case 2:
295 CHECK_BETTER(diag, tr + 2, tc - 2);
296 break;
297 case 3:
298 CHECK_BETTER(diag, tr + 2, tc + 2);
299 break;
300 }
301
302 /* no reason to check the same one again. */
303 if (tr == br && tc == bc)
304 break;
305
306 tr = br;
307 tc = bc;
308 }
309
310 /* TODO: Each subsequent iteration checks at least one point in common
311 * with the last iteration could be 2 ( if diag selected)
312 */
313
314 /* 1/4 pel */
315 while (--quarteriters)
316 {
317 CHECK_BETTER(left, tr, tc - 1);
318 CHECK_BETTER(right, tr, tc + 1);
319 CHECK_BETTER(up, tr - 1, tc);
320 CHECK_BETTER(down, tr + 1, tc);
321
322 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
323
324 switch (whichdir)
325 {
326 case 0:
327 CHECK_BETTER(diag, tr - 1, tc - 1);
328 break;
329 case 1:
330 CHECK_BETTER(diag, tr - 1, tc + 1);
331 break;
332 case 2:
333 CHECK_BETTER(diag, tr + 1, tc - 1);
334 break;
335 case 3:
336 CHECK_BETTER(diag, tr + 1, tc + 1);
337 break;
338 }
339
340 /* no reason to check the same one again. */
341 if (tr == br && tc == bc)
342 break;
343
344 tr = br;
345 tc = bc;
346 }
347
348 bestmv->as_mv.row = br * 2;
349 bestmv->as_mv.col = bc * 2;
350
351 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
352 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
353 return INT_MAX;
354
355 return besterr;
356 }
357 #undef MVC
358 #undef PRE
359 #undef SP
360 #undef DIST
361 #undef IFMVCV
362 #undef ERR
363 #undef CHECK_BETTER
364
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)365 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
366 int_mv *bestmv, int_mv *ref_mv,
367 int error_per_bit,
368 const vp8_variance_fn_ptr_t *vfp,
369 int *mvcost[2], int *distortion,
370 unsigned int *sse1)
371 {
372 int bestmse = INT_MAX;
373 int_mv startmv;
374 int_mv this_mv;
375 unsigned char *z = (*(b->base_src) + b->src);
376 int left, right, up, down, diag;
377 unsigned int sse;
378 int whichdir ;
379 int thismse;
380 int y_stride;
381 int pre_stride = x->e_mbd.pre.y_stride;
382 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
383
384 #if ARCH_X86 || ARCH_X86_64
385 MACROBLOCKD *xd = &x->e_mbd;
386 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
387 unsigned char *y;
388
389 y_stride = 32;
390 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
391 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
392 y = xd->y_buf + y_stride + 1;
393 #else
394 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
395 y_stride = pre_stride;
396 #endif
397
398 /* central mv */
399 bestmv->as_mv.row *= 8;
400 bestmv->as_mv.col *= 8;
401 startmv = *bestmv;
402
403 /* calculate central point error */
404 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
405 *distortion = bestmse;
406 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
407
408 /* go left then right and check error */
409 this_mv.as_mv.row = startmv.as_mv.row;
410 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
411 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
412 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
413
414 if (left < bestmse)
415 {
416 *bestmv = this_mv;
417 bestmse = left;
418 *distortion = thismse;
419 *sse1 = sse;
420 }
421
422 this_mv.as_mv.col += 8;
423 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
424 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
425
426 if (right < bestmse)
427 {
428 *bestmv = this_mv;
429 bestmse = right;
430 *distortion = thismse;
431 *sse1 = sse;
432 }
433
434 /* go up then down and check error */
435 this_mv.as_mv.col = startmv.as_mv.col;
436 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
437 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
438 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
439
440 if (up < bestmse)
441 {
442 *bestmv = this_mv;
443 bestmse = up;
444 *distortion = thismse;
445 *sse1 = sse;
446 }
447
448 this_mv.as_mv.row += 8;
449 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
450 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
451
452 if (down < bestmse)
453 {
454 *bestmv = this_mv;
455 bestmse = down;
456 *distortion = thismse;
457 *sse1 = sse;
458 }
459
460
461 /* now check 1 more diagonal */
462 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
463 this_mv = startmv;
464
465 switch (whichdir)
466 {
467 case 0:
468 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
469 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
470 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
471 break;
472 case 1:
473 this_mv.as_mv.col += 4;
474 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
475 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
476 break;
477 case 2:
478 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
479 this_mv.as_mv.row += 4;
480 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
481 break;
482 case 3:
483 default:
484 this_mv.as_mv.col += 4;
485 this_mv.as_mv.row += 4;
486 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
487 break;
488 }
489
490 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
491
492 if (diag < bestmse)
493 {
494 *bestmv = this_mv;
495 bestmse = diag;
496 *distortion = thismse;
497 *sse1 = sse;
498 }
499
500
501 /* time to check quarter pels. */
502 if (bestmv->as_mv.row < startmv.as_mv.row)
503 y -= y_stride;
504
505 if (bestmv->as_mv.col < startmv.as_mv.col)
506 y--;
507
508 startmv = *bestmv;
509
510
511
512 /* go left then right and check error */
513 this_mv.as_mv.row = startmv.as_mv.row;
514
515 if (startmv.as_mv.col & 7)
516 {
517 this_mv.as_mv.col = startmv.as_mv.col - 2;
518 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
519 }
520 else
521 {
522 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
523 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
524 }
525
526 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
527
528 if (left < bestmse)
529 {
530 *bestmv = this_mv;
531 bestmse = left;
532 *distortion = thismse;
533 *sse1 = sse;
534 }
535
536 this_mv.as_mv.col += 4;
537 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
538 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
539
540 if (right < bestmse)
541 {
542 *bestmv = this_mv;
543 bestmse = right;
544 *distortion = thismse;
545 *sse1 = sse;
546 }
547
548 /* go up then down and check error */
549 this_mv.as_mv.col = startmv.as_mv.col;
550
551 if (startmv.as_mv.row & 7)
552 {
553 this_mv.as_mv.row = startmv.as_mv.row - 2;
554 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
555 }
556 else
557 {
558 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
559 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
560 }
561
562 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
563
564 if (up < bestmse)
565 {
566 *bestmv = this_mv;
567 bestmse = up;
568 *distortion = thismse;
569 *sse1 = sse;
570 }
571
572 this_mv.as_mv.row += 4;
573 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
574 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
575
576 if (down < bestmse)
577 {
578 *bestmv = this_mv;
579 bestmse = down;
580 *distortion = thismse;
581 *sse1 = sse;
582 }
583
584
585 /* now check 1 more diagonal */
586 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
587
588 this_mv = startmv;
589
590 switch (whichdir)
591 {
592 case 0:
593
594 if (startmv.as_mv.row & 7)
595 {
596 this_mv.as_mv.row -= 2;
597
598 if (startmv.as_mv.col & 7)
599 {
600 this_mv.as_mv.col -= 2;
601 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
602 }
603 else
604 {
605 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
606 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
607 }
608 }
609 else
610 {
611 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
612
613 if (startmv.as_mv.col & 7)
614 {
615 this_mv.as_mv.col -= 2;
616 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
617 }
618 else
619 {
620 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
621 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
622 }
623 }
624
625 break;
626 case 1:
627 this_mv.as_mv.col += 2;
628
629 if (startmv.as_mv.row & 7)
630 {
631 this_mv.as_mv.row -= 2;
632 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
633 }
634 else
635 {
636 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
637 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
638 }
639
640 break;
641 case 2:
642 this_mv.as_mv.row += 2;
643
644 if (startmv.as_mv.col & 7)
645 {
646 this_mv.as_mv.col -= 2;
647 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
648 }
649 else
650 {
651 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
652 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
653 }
654
655 break;
656 case 3:
657 this_mv.as_mv.col += 2;
658 this_mv.as_mv.row += 2;
659 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
660 break;
661 }
662
663 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
664
665 if (diag < bestmse)
666 {
667 *bestmv = this_mv;
668 bestmse = diag;
669 *distortion = thismse;
670 *sse1 = sse;
671 }
672
673 return bestmse;
674 }
675
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)676 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
677 int_mv *bestmv, int_mv *ref_mv,
678 int error_per_bit,
679 const vp8_variance_fn_ptr_t *vfp,
680 int *mvcost[2], int *distortion,
681 unsigned int *sse1)
682 {
683 int bestmse = INT_MAX;
684 int_mv startmv;
685 int_mv this_mv;
686 unsigned char *z = (*(b->base_src) + b->src);
687 int left, right, up, down, diag;
688 unsigned int sse;
689 int whichdir ;
690 int thismse;
691 int y_stride;
692 int pre_stride = x->e_mbd.pre.y_stride;
693 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
694
695 #if ARCH_X86 || ARCH_X86_64
696 MACROBLOCKD *xd = &x->e_mbd;
697 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
698 unsigned char *y;
699
700 y_stride = 32;
701 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
702 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
703 y = xd->y_buf + y_stride + 1;
704 #else
705 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
706 y_stride = pre_stride;
707 #endif
708
709 /* central mv */
710 bestmv->as_mv.row *= 8;
711 bestmv->as_mv.col *= 8;
712 startmv = *bestmv;
713
714 /* calculate central point error */
715 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
716 *distortion = bestmse;
717 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
718
719 /* go left then right and check error */
720 this_mv.as_mv.row = startmv.as_mv.row;
721 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
722 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
723 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
724
725 if (left < bestmse)
726 {
727 *bestmv = this_mv;
728 bestmse = left;
729 *distortion = thismse;
730 *sse1 = sse;
731 }
732
733 this_mv.as_mv.col += 8;
734 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
735 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
736
737 if (right < bestmse)
738 {
739 *bestmv = this_mv;
740 bestmse = right;
741 *distortion = thismse;
742 *sse1 = sse;
743 }
744
745 /* go up then down and check error */
746 this_mv.as_mv.col = startmv.as_mv.col;
747 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
748 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
749 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
750
751 if (up < bestmse)
752 {
753 *bestmv = this_mv;
754 bestmse = up;
755 *distortion = thismse;
756 *sse1 = sse;
757 }
758
759 this_mv.as_mv.row += 8;
760 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
761 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
762
763 if (down < bestmse)
764 {
765 *bestmv = this_mv;
766 bestmse = down;
767 *distortion = thismse;
768 *sse1 = sse;
769 }
770
771 /* now check 1 more diagonal - */
772 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
773 this_mv = startmv;
774
775 switch (whichdir)
776 {
777 case 0:
778 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
779 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
780 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
781 break;
782 case 1:
783 this_mv.as_mv.col += 4;
784 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
785 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
786 break;
787 case 2:
788 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
789 this_mv.as_mv.row += 4;
790 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
791 break;
792 case 3:
793 default:
794 this_mv.as_mv.col += 4;
795 this_mv.as_mv.row += 4;
796 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
797 break;
798 }
799
800 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
801
802 if (diag < bestmse)
803 {
804 *bestmv = this_mv;
805 bestmse = diag;
806 *distortion = thismse;
807 *sse1 = sse;
808 }
809
810 return bestmse;
811 }
812
813 #define CHECK_BOUNDS(range) \
814 {\
815 all_in = 1;\
816 all_in &= ((br-range) >= x->mv_row_min);\
817 all_in &= ((br+range) <= x->mv_row_max);\
818 all_in &= ((bc-range) >= x->mv_col_min);\
819 all_in &= ((bc+range) <= x->mv_col_max);\
820 }
821
822 #define CHECK_POINT \
823 {\
824 if (this_mv.as_mv.col < x->mv_col_min) continue;\
825 if (this_mv.as_mv.col > x->mv_col_max) continue;\
826 if (this_mv.as_mv.row < x->mv_row_min) continue;\
827 if (this_mv.as_mv.row > x->mv_row_max) continue;\
828 }
829
830 #define CHECK_BETTER \
831 {\
832 if (thissad < bestsad)\
833 {\
834 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
835 if (thissad < bestsad)\
836 {\
837 bestsad = thissad;\
838 best_site = i;\
839 }\
840 }\
841 }
842
843 static const MV next_chkpts[6][3] =
844 {
845 {{ -2, 0}, { -1, -2}, {1, -2}},
846 {{ -1, -2}, {1, -2}, {2, 0}},
847 {{1, -2}, {2, 0}, {1, 2}},
848 {{2, 0}, {1, 2}, { -1, 2}},
849 {{1, 2}, { -1, 2}, { -2, 0}},
850 {{ -1, 2}, { -2, 0}, { -1, -2}}
851 };
852
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)853 int vp8_hex_search
854 (
855 MACROBLOCK *x,
856 BLOCK *b,
857 BLOCKD *d,
858 int_mv *ref_mv,
859 int_mv *best_mv,
860 int search_param,
861 int sad_per_bit,
862 const vp8_variance_fn_ptr_t *vfp,
863 int *mvsadcost[2],
864 int *mvcost[2],
865 int_mv *center_mv
866 )
867 {
868 MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
869 MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
870 int i, j;
871
872 unsigned char *what = (*(b->base_src) + b->src);
873 int what_stride = b->src_stride;
874 int pre_stride = x->e_mbd.pre.y_stride;
875 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
876
877 int in_what_stride = pre_stride;
878 int br, bc;
879 int_mv this_mv;
880 unsigned int bestsad;
881 unsigned int thissad;
882 unsigned char *base_offset;
883 unsigned char *this_offset;
884 int k = -1;
885 int all_in;
886 int best_site = -1;
887 int hex_range = 127;
888 int dia_range = 8;
889
890 int_mv fcenter_mv;
891 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
892 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
893
894 (void)mvcost;
895
896 /* adjust ref_mv to make sure it is within MV range */
897 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
898 br = ref_mv->as_mv.row;
899 bc = ref_mv->as_mv.col;
900
901 /* Work out the start point for the search */
902 base_offset = (unsigned char *)(base_pre + d->offset);
903 this_offset = base_offset + (br * (pre_stride)) + bc;
904 this_mv.as_mv.row = br;
905 this_mv.as_mv.col = bc;
906 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride)
907 + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
908
909 #if CONFIG_MULTI_RES_ENCODING
910 /* Lower search range based on prediction info */
911 if (search_param >= 6) goto cal_neighbors;
912 else if (search_param >= 5) hex_range = 4;
913 else if (search_param >= 4) hex_range = 6;
914 else if (search_param >= 3) hex_range = 15;
915 else if (search_param >= 2) hex_range = 31;
916 else if (search_param >= 1) hex_range = 63;
917
918 dia_range = 8;
919 #else
920 (void)search_param;
921 #endif
922
923 /* hex search */
924 CHECK_BOUNDS(2)
925
926 if(all_in)
927 {
928 for (i = 0; i < 6; i++)
929 {
930 this_mv.as_mv.row = br + hex[i].row;
931 this_mv.as_mv.col = bc + hex[i].col;
932 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
933 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
934 CHECK_BETTER
935 }
936 }else
937 {
938 for (i = 0; i < 6; i++)
939 {
940 this_mv.as_mv.row = br + hex[i].row;
941 this_mv.as_mv.col = bc + hex[i].col;
942 CHECK_POINT
943 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
944 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
945 CHECK_BETTER
946 }
947 }
948
949 if (best_site == -1)
950 goto cal_neighbors;
951 else
952 {
953 br += hex[best_site].row;
954 bc += hex[best_site].col;
955 k = best_site;
956 }
957
958 for (j = 1; j < hex_range; j++)
959 {
960 best_site = -1;
961 CHECK_BOUNDS(2)
962
963 if(all_in)
964 {
965 for (i = 0; i < 3; i++)
966 {
967 this_mv.as_mv.row = br + next_chkpts[k][i].row;
968 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
969 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
970 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
971 CHECK_BETTER
972 }
973 }else
974 {
975 for (i = 0; i < 3; i++)
976 {
977 this_mv.as_mv.row = br + next_chkpts[k][i].row;
978 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
979 CHECK_POINT
980 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
981 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
982 CHECK_BETTER
983 }
984 }
985
986 if (best_site == -1)
987 break;
988 else
989 {
990 br += next_chkpts[k][best_site].row;
991 bc += next_chkpts[k][best_site].col;
992 k += 5 + best_site;
993 if (k >= 12) k -= 12;
994 else if (k >= 6) k -= 6;
995 }
996 }
997
998 /* check 4 1-away neighbors */
999 cal_neighbors:
1000 for (j = 0; j < dia_range; j++)
1001 {
1002 best_site = -1;
1003 CHECK_BOUNDS(1)
1004
1005 if(all_in)
1006 {
1007 for (i = 0; i < 4; i++)
1008 {
1009 this_mv.as_mv.row = br + neighbors[i].row;
1010 this_mv.as_mv.col = bc + neighbors[i].col;
1011 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1012 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1013 CHECK_BETTER
1014 }
1015 }else
1016 {
1017 for (i = 0; i < 4; i++)
1018 {
1019 this_mv.as_mv.row = br + neighbors[i].row;
1020 this_mv.as_mv.col = bc + neighbors[i].col;
1021 CHECK_POINT
1022 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1023 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1024 CHECK_BETTER
1025 }
1026 }
1027
1028 if (best_site == -1)
1029 break;
1030 else
1031 {
1032 br += neighbors[best_site].row;
1033 bc += neighbors[best_site].col;
1034 }
1035 }
1036
1037 best_mv->as_mv.row = br;
1038 best_mv->as_mv.col = bc;
1039
1040 return bestsad;
1041 }
1042 #undef CHECK_BOUNDS
1043 #undef CHECK_POINT
1044 #undef CHECK_BETTER
1045
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1046 int vp8_diamond_search_sad_c
1047 (
1048 MACROBLOCK *x,
1049 BLOCK *b,
1050 BLOCKD *d,
1051 int_mv *ref_mv,
1052 int_mv *best_mv,
1053 int search_param,
1054 int sad_per_bit,
1055 int *num00,
1056 vp8_variance_fn_ptr_t *fn_ptr,
1057 int *mvcost[2],
1058 int_mv *center_mv
1059 )
1060 {
1061 int i, j, step;
1062
1063 unsigned char *what = (*(b->base_src) + b->src);
1064 int what_stride = b->src_stride;
1065 unsigned char *in_what;
1066 int pre_stride = x->e_mbd.pre.y_stride;
1067 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1068 int in_what_stride = pre_stride;
1069 unsigned char *best_address;
1070
1071 int tot_steps;
1072 int_mv this_mv;
1073
1074 unsigned int bestsad;
1075 unsigned int thissad;
1076 int best_site = 0;
1077 int last_site = 0;
1078
1079 int ref_row;
1080 int ref_col;
1081 int this_row_offset;
1082 int this_col_offset;
1083 search_site *ss;
1084
1085 unsigned char *check_here;
1086
1087 int *mvsadcost[2];
1088 int_mv fcenter_mv;
1089
1090 mvsadcost[0] = x->mvsadcost[0];
1091 mvsadcost[1] = x->mvsadcost[1];
1092 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1093 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1094
1095 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1096 ref_row = ref_mv->as_mv.row;
1097 ref_col = ref_mv->as_mv.col;
1098 *num00 = 0;
1099 best_mv->as_mv.row = ref_row;
1100 best_mv->as_mv.col = ref_col;
1101
1102 /* Work out the start point for the search */
1103 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
1104 best_address = in_what;
1105
1106 /* Check the starting position */
1107 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
1108 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1109
1110 /* search_param determines the length of the initial step and hence
1111 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1112 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1113 */
1114 ss = &x->ss[search_param * x->searches_per_step];
1115 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1116
1117 i = 1;
1118
1119 for (step = 0; step < tot_steps ; step++)
1120 {
1121 for (j = 0 ; j < x->searches_per_step ; j++)
1122 {
1123 /* Trap illegal vectors */
1124 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1125 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1126
1127 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1128 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1129
1130 {
1131 check_here = ss[i].offset + best_address;
1132 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1133
1134 if (thissad < bestsad)
1135 {
1136 this_mv.as_mv.row = this_row_offset;
1137 this_mv.as_mv.col = this_col_offset;
1138 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1139 mvsadcost, sad_per_bit);
1140
1141 if (thissad < bestsad)
1142 {
1143 bestsad = thissad;
1144 best_site = i;
1145 }
1146 }
1147 }
1148
1149 i++;
1150 }
1151
1152 if (best_site != last_site)
1153 {
1154 best_mv->as_mv.row += ss[best_site].mv.row;
1155 best_mv->as_mv.col += ss[best_site].mv.col;
1156 best_address += ss[best_site].offset;
1157 last_site = best_site;
1158 }
1159 else if (best_address == in_what)
1160 (*num00)++;
1161 }
1162
1163 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1164 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1165
1166 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1167 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1168 }
1169
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1170 int vp8_diamond_search_sadx4
1171 (
1172 MACROBLOCK *x,
1173 BLOCK *b,
1174 BLOCKD *d,
1175 int_mv *ref_mv,
1176 int_mv *best_mv,
1177 int search_param,
1178 int sad_per_bit,
1179 int *num00,
1180 vp8_variance_fn_ptr_t *fn_ptr,
1181 int *mvcost[2],
1182 int_mv *center_mv
1183 )
1184 {
1185 int i, j, step;
1186
1187 unsigned char *what = (*(b->base_src) + b->src);
1188 int what_stride = b->src_stride;
1189 unsigned char *in_what;
1190 int pre_stride = x->e_mbd.pre.y_stride;
1191 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1192 int in_what_stride = pre_stride;
1193 unsigned char *best_address;
1194
1195 int tot_steps;
1196 int_mv this_mv;
1197
1198 unsigned int bestsad;
1199 unsigned int thissad;
1200 int best_site = 0;
1201 int last_site = 0;
1202
1203 int ref_row;
1204 int ref_col;
1205 int this_row_offset;
1206 int this_col_offset;
1207 search_site *ss;
1208
1209 unsigned char *check_here;
1210
1211 int *mvsadcost[2];
1212 int_mv fcenter_mv;
1213
1214 mvsadcost[0] = x->mvsadcost[0];
1215 mvsadcost[1] = x->mvsadcost[1];
1216 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1217 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1218
1219 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1220 ref_row = ref_mv->as_mv.row;
1221 ref_col = ref_mv->as_mv.col;
1222 *num00 = 0;
1223 best_mv->as_mv.row = ref_row;
1224 best_mv->as_mv.col = ref_col;
1225
1226 /* Work out the start point for the search */
1227 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
1228 best_address = in_what;
1229
1230 /* Check the starting position */
1231 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
1232 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1233
1234 /* search_param determines the length of the initial step and hence the
1235 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1236 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1237 */
1238 ss = &x->ss[search_param * x->searches_per_step];
1239 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1240
1241 i = 1;
1242
1243 for (step = 0; step < tot_steps ; step++)
1244 {
1245 int all_in = 1, t;
1246
1247 /* To know if all neighbor points are within the bounds, 4 bounds
1248 * checking are enough instead of checking 4 bounds for each
1249 * points.
1250 */
1251 all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
1252 all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
1253 all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
1254 all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
1255
1256 if (all_in)
1257 {
1258 unsigned int sad_array[4];
1259
1260 for (j = 0 ; j < x->searches_per_step ; j += 4)
1261 {
1262 const unsigned char *block_offset[4];
1263
1264 for (t = 0; t < 4; t++)
1265 block_offset[t] = ss[i+t].offset + best_address;
1266
1267 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1268
1269 for (t = 0; t < 4; t++, i++)
1270 {
1271 if (sad_array[t] < bestsad)
1272 {
1273 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1274 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1275 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1276 mvsadcost, sad_per_bit);
1277
1278 if (sad_array[t] < bestsad)
1279 {
1280 bestsad = sad_array[t];
1281 best_site = i;
1282 }
1283 }
1284 }
1285 }
1286 }
1287 else
1288 {
1289 for (j = 0 ; j < x->searches_per_step ; j++)
1290 {
1291 /* Trap illegal vectors */
1292 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1293 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1294
1295 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1296 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1297 {
1298 check_here = ss[i].offset + best_address;
1299 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1300
1301 if (thissad < bestsad)
1302 {
1303 this_mv.as_mv.row = this_row_offset;
1304 this_mv.as_mv.col = this_col_offset;
1305 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1306 mvsadcost, sad_per_bit);
1307
1308 if (thissad < bestsad)
1309 {
1310 bestsad = thissad;
1311 best_site = i;
1312 }
1313 }
1314 }
1315 i++;
1316 }
1317 }
1318
1319 if (best_site != last_site)
1320 {
1321 best_mv->as_mv.row += ss[best_site].mv.row;
1322 best_mv->as_mv.col += ss[best_site].mv.col;
1323 best_address += ss[best_site].offset;
1324 last_site = best_site;
1325 }
1326 else if (best_address == in_what)
1327 (*num00)++;
1328 }
1329
1330 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1331 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1332
1333 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1334 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1335 }
1336
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1337 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1338 int sad_per_bit, int distance,
1339 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1340 int_mv *center_mv)
1341 {
1342 unsigned char *what = (*(b->base_src) + b->src);
1343 int what_stride = b->src_stride;
1344 unsigned char *in_what;
1345 int pre_stride = x->e_mbd.pre.y_stride;
1346 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1347 int in_what_stride = pre_stride;
1348 int mv_stride = pre_stride;
1349 unsigned char *bestaddress;
1350 int_mv *best_mv = &d->bmi.mv;
1351 int_mv this_mv;
1352 unsigned int bestsad;
1353 unsigned int thissad;
1354 int r, c;
1355
1356 unsigned char *check_here;
1357
1358 int ref_row = ref_mv->as_mv.row;
1359 int ref_col = ref_mv->as_mv.col;
1360
1361 int row_min = ref_row - distance;
1362 int row_max = ref_row + distance;
1363 int col_min = ref_col - distance;
1364 int col_max = ref_col + distance;
1365
1366 int *mvsadcost[2];
1367 int_mv fcenter_mv;
1368
1369 mvsadcost[0] = x->mvsadcost[0];
1370 mvsadcost[1] = x->mvsadcost[1];
1371 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1372 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1373
1374 /* Work out the mid point for the search */
1375 in_what = base_pre + d->offset;
1376 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1377
1378 best_mv->as_mv.row = ref_row;
1379 best_mv->as_mv.col = ref_col;
1380
1381 /* Baseline value at the centre */
1382 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
1383 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1384
1385 /* Apply further limits to prevent us looking using vectors that
1386 * stretch beyiond the UMV border
1387 */
1388 if (col_min < x->mv_col_min)
1389 col_min = x->mv_col_min;
1390
1391 if (col_max > x->mv_col_max)
1392 col_max = x->mv_col_max;
1393
1394 if (row_min < x->mv_row_min)
1395 row_min = x->mv_row_min;
1396
1397 if (row_max > x->mv_row_max)
1398 row_max = x->mv_row_max;
1399
1400 for (r = row_min; r < row_max ; r++)
1401 {
1402 this_mv.as_mv.row = r;
1403 check_here = r * mv_stride + in_what + col_min;
1404
1405 for (c = col_min; c < col_max; c++)
1406 {
1407 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1408
1409 this_mv.as_mv.col = c;
1410 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1411 mvsadcost, sad_per_bit);
1412
1413 if (thissad < bestsad)
1414 {
1415 bestsad = thissad;
1416 best_mv->as_mv.row = r;
1417 best_mv->as_mv.col = c;
1418 bestaddress = check_here;
1419 }
1420
1421 check_here++;
1422 }
1423 }
1424
1425 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1426 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1427
1428 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1429 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1430 }
1431
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1432 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1433 int sad_per_bit, int distance,
1434 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1435 int_mv *center_mv)
1436 {
1437 unsigned char *what = (*(b->base_src) + b->src);
1438 int what_stride = b->src_stride;
1439 unsigned char *in_what;
1440 int pre_stride = x->e_mbd.pre.y_stride;
1441 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1442 int in_what_stride = pre_stride;
1443 int mv_stride = pre_stride;
1444 unsigned char *bestaddress;
1445 int_mv *best_mv = &d->bmi.mv;
1446 int_mv this_mv;
1447 unsigned int bestsad;
1448 unsigned int thissad;
1449 int r, c;
1450
1451 unsigned char *check_here;
1452
1453 int ref_row = ref_mv->as_mv.row;
1454 int ref_col = ref_mv->as_mv.col;
1455
1456 int row_min = ref_row - distance;
1457 int row_max = ref_row + distance;
1458 int col_min = ref_col - distance;
1459 int col_max = ref_col + distance;
1460
1461 unsigned int sad_array[3];
1462
1463 int *mvsadcost[2];
1464 int_mv fcenter_mv;
1465
1466 mvsadcost[0] = x->mvsadcost[0];
1467 mvsadcost[1] = x->mvsadcost[1];
1468 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1469 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1470
1471 /* Work out the mid point for the search */
1472 in_what = base_pre + d->offset;
1473 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1474
1475 best_mv->as_mv.row = ref_row;
1476 best_mv->as_mv.col = ref_col;
1477
1478 /* Baseline value at the centre */
1479 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
1480 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1481
1482 /* Apply further limits to prevent us looking using vectors that stretch
1483 * beyond the UMV border
1484 */
1485 if (col_min < x->mv_col_min)
1486 col_min = x->mv_col_min;
1487
1488 if (col_max > x->mv_col_max)
1489 col_max = x->mv_col_max;
1490
1491 if (row_min < x->mv_row_min)
1492 row_min = x->mv_row_min;
1493
1494 if (row_max > x->mv_row_max)
1495 row_max = x->mv_row_max;
1496
1497 for (r = row_min; r < row_max ; r++)
1498 {
1499 this_mv.as_mv.row = r;
1500 check_here = r * mv_stride + in_what + col_min;
1501 c = col_min;
1502
1503 while ((c + 2) < col_max)
1504 {
1505 int i;
1506
1507 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1508
1509 for (i = 0; i < 3; i++)
1510 {
1511 thissad = sad_array[i];
1512
1513 if (thissad < bestsad)
1514 {
1515 this_mv.as_mv.col = c;
1516 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1517 mvsadcost, sad_per_bit);
1518
1519 if (thissad < bestsad)
1520 {
1521 bestsad = thissad;
1522 best_mv->as_mv.row = r;
1523 best_mv->as_mv.col = c;
1524 bestaddress = check_here;
1525 }
1526 }
1527
1528 check_here++;
1529 c++;
1530 }
1531 }
1532
1533 while (c < col_max)
1534 {
1535 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1536
1537 if (thissad < bestsad)
1538 {
1539 this_mv.as_mv.col = c;
1540 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1541 mvsadcost, sad_per_bit);
1542
1543 if (thissad < bestsad)
1544 {
1545 bestsad = thissad;
1546 best_mv->as_mv.row = r;
1547 best_mv->as_mv.col = c;
1548 bestaddress = check_here;
1549 }
1550 }
1551
1552 check_here ++;
1553 c ++;
1554 }
1555
1556 }
1557
1558 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1559 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1560
1561 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1562 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1563 }
1564
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1565 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1566 int sad_per_bit, int distance,
1567 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1568 int_mv *center_mv)
1569 {
1570 unsigned char *what = (*(b->base_src) + b->src);
1571 int what_stride = b->src_stride;
1572 int pre_stride = x->e_mbd.pre.y_stride;
1573 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1574 unsigned char *in_what;
1575 int in_what_stride = pre_stride;
1576 int mv_stride = pre_stride;
1577 unsigned char *bestaddress;
1578 int_mv *best_mv = &d->bmi.mv;
1579 int_mv this_mv;
1580 unsigned int bestsad;
1581 unsigned int thissad;
1582 int r, c;
1583
1584 unsigned char *check_here;
1585
1586 int ref_row = ref_mv->as_mv.row;
1587 int ref_col = ref_mv->as_mv.col;
1588
1589 int row_min = ref_row - distance;
1590 int row_max = ref_row + distance;
1591 int col_min = ref_col - distance;
1592 int col_max = ref_col + distance;
1593
1594 // TODO(johannkoenig): check if this alignment is necessary.
1595 DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1596 unsigned int sad_array[3];
1597
1598 int *mvsadcost[2];
1599 int_mv fcenter_mv;
1600
1601 mvsadcost[0] = x->mvsadcost[0];
1602 mvsadcost[1] = x->mvsadcost[1];
1603 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1604 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1605
1606 /* Work out the mid point for the search */
1607 in_what = base_pre + d->offset;
1608 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1609
1610 best_mv->as_mv.row = ref_row;
1611 best_mv->as_mv.col = ref_col;
1612
1613 /* Baseline value at the centre */
1614 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
1615 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1616
1617 /* Apply further limits to prevent us looking using vectors that stretch
1618 * beyond the UMV border
1619 */
1620 if (col_min < x->mv_col_min)
1621 col_min = x->mv_col_min;
1622
1623 if (col_max > x->mv_col_max)
1624 col_max = x->mv_col_max;
1625
1626 if (row_min < x->mv_row_min)
1627 row_min = x->mv_row_min;
1628
1629 if (row_max > x->mv_row_max)
1630 row_max = x->mv_row_max;
1631
1632 for (r = row_min; r < row_max ; r++)
1633 {
1634 this_mv.as_mv.row = r;
1635 check_here = r * mv_stride + in_what + col_min;
1636 c = col_min;
1637
1638 while ((c + 7) < col_max)
1639 {
1640 int i;
1641
1642 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1643
1644 for (i = 0; i < 8; i++)
1645 {
1646 thissad = sad_array8[i];
1647
1648 if (thissad < bestsad)
1649 {
1650 this_mv.as_mv.col = c;
1651 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1652 mvsadcost, sad_per_bit);
1653
1654 if (thissad < bestsad)
1655 {
1656 bestsad = thissad;
1657 best_mv->as_mv.row = r;
1658 best_mv->as_mv.col = c;
1659 bestaddress = check_here;
1660 }
1661 }
1662
1663 check_here++;
1664 c++;
1665 }
1666 }
1667
1668 while ((c + 2) < col_max)
1669 {
1670 int i;
1671
1672 fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1673
1674 for (i = 0; i < 3; i++)
1675 {
1676 thissad = sad_array[i];
1677
1678 if (thissad < bestsad)
1679 {
1680 this_mv.as_mv.col = c;
1681 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1682 mvsadcost, sad_per_bit);
1683
1684 if (thissad < bestsad)
1685 {
1686 bestsad = thissad;
1687 best_mv->as_mv.row = r;
1688 best_mv->as_mv.col = c;
1689 bestaddress = check_here;
1690 }
1691 }
1692
1693 check_here++;
1694 c++;
1695 }
1696 }
1697
1698 while (c < col_max)
1699 {
1700 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
1701
1702 if (thissad < bestsad)
1703 {
1704 this_mv.as_mv.col = c;
1705 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1706 mvsadcost, sad_per_bit);
1707
1708 if (thissad < bestsad)
1709 {
1710 bestsad = thissad;
1711 best_mv->as_mv.row = r;
1712 best_mv->as_mv.col = c;
1713 bestaddress = check_here;
1714 }
1715 }
1716
1717 check_here ++;
1718 c ++;
1719 }
1720 }
1721
1722 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1723 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1724
1725 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1726 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1727 }
1728
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1729 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1730 int error_per_bit, int search_range,
1731 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1732 int_mv *center_mv)
1733 {
1734 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1735 int i, j;
1736 short this_row_offset, this_col_offset;
1737
1738 int what_stride = b->src_stride;
1739 int pre_stride = x->e_mbd.pre.y_stride;
1740 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1741 int in_what_stride = pre_stride;
1742 unsigned char *what = (*(b->base_src) + b->src);
1743 unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
1744 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1745 unsigned char *check_here;
1746 int_mv this_mv;
1747 unsigned int bestsad;
1748 unsigned int thissad;
1749
1750 int *mvsadcost[2];
1751 int_mv fcenter_mv;
1752
1753 mvsadcost[0] = x->mvsadcost[0];
1754 mvsadcost[1] = x->mvsadcost[1];
1755 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1756 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1757
1758 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
1759 + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1760
1761 for (i=0; i<search_range; i++)
1762 {
1763 int best_site = -1;
1764
1765 for (j = 0 ; j < 4 ; j++)
1766 {
1767 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1768 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1769
1770 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1771 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1772 {
1773 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1774 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
1775
1776 if (thissad < bestsad)
1777 {
1778 this_mv.as_mv.row = this_row_offset;
1779 this_mv.as_mv.col = this_col_offset;
1780 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1781
1782 if (thissad < bestsad)
1783 {
1784 bestsad = thissad;
1785 best_site = j;
1786 }
1787 }
1788 }
1789 }
1790
1791 if (best_site == -1)
1792 break;
1793 else
1794 {
1795 ref_mv->as_mv.row += neighbors[best_site].row;
1796 ref_mv->as_mv.col += neighbors[best_site].col;
1797 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1798 }
1799 }
1800
1801 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1802 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1803
1804 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1805 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1806 }
1807
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1808 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1809 int_mv *ref_mv, int error_per_bit,
1810 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1811 int *mvcost[2], int_mv *center_mv)
1812 {
1813 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1814 int i, j;
1815 short this_row_offset, this_col_offset;
1816
1817 int what_stride = b->src_stride;
1818 int pre_stride = x->e_mbd.pre.y_stride;
1819 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1820 int in_what_stride = pre_stride;
1821 unsigned char *what = (*(b->base_src) + b->src);
1822 unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
1823 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1824 unsigned char *check_here;
1825 int_mv this_mv;
1826 unsigned int bestsad;
1827 unsigned int thissad;
1828
1829 int *mvsadcost[2];
1830 int_mv fcenter_mv;
1831
1832 mvsadcost[0] = x->mvsadcost[0];
1833 mvsadcost[1] = x->mvsadcost[1];
1834 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1835 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1836
1837 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
1838 + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1839
1840 for (i=0; i<search_range; i++)
1841 {
1842 int best_site = -1;
1843 int all_in = 1;
1844
1845 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1846 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1847 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1848 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1849
1850 if(all_in)
1851 {
1852 unsigned int sad_array[4];
1853 const unsigned char *block_offset[4];
1854 block_offset[0] = best_address - in_what_stride;
1855 block_offset[1] = best_address - 1;
1856 block_offset[2] = best_address + 1;
1857 block_offset[3] = best_address + in_what_stride;
1858
1859 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1860
1861 for (j = 0; j < 4; j++)
1862 {
1863 if (sad_array[j] < bestsad)
1864 {
1865 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1866 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1867 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1868
1869 if (sad_array[j] < bestsad)
1870 {
1871 bestsad = sad_array[j];
1872 best_site = j;
1873 }
1874 }
1875 }
1876 }
1877 else
1878 {
1879 for (j = 0 ; j < 4 ; j++)
1880 {
1881 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1882 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1883
1884 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1885 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1886 {
1887 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1888 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
1889
1890 if (thissad < bestsad)
1891 {
1892 this_mv.as_mv.row = this_row_offset;
1893 this_mv.as_mv.col = this_col_offset;
1894 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1895
1896 if (thissad < bestsad)
1897 {
1898 bestsad = thissad;
1899 best_site = j;
1900 }
1901 }
1902 }
1903 }
1904 }
1905
1906 if (best_site == -1)
1907 break;
1908 else
1909 {
1910 ref_mv->as_mv.row += neighbors[best_site].row;
1911 ref_mv->as_mv.col += neighbors[best_site].col;
1912 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1913 }
1914 }
1915
1916 this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1917 this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1918
1919 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1920 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1921 }
1922
1923 #ifdef VP8_ENTROPY_STATS
print_mode_context(void)1924 void print_mode_context(void)
1925 {
1926 FILE *f = fopen("modecont.c", "w");
1927 int i, j;
1928
1929 fprintf(f, "#include \"entropy.h\"\n");
1930 fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1931 fprintf(f, "{\n");
1932
1933 for (j = 0; j < 6; j++)
1934 {
1935 fprintf(f, " { /* %d */\n", j);
1936 fprintf(f, " ");
1937
1938 for (i = 0; i < 4; i++)
1939 {
1940 int overal_prob;
1941 int this_prob;
1942 int count;
1943
1944 /* Overall probs */
1945 count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1946
1947 if (count)
1948 overal_prob = 256 * mv_mode_cts[i][0] / count;
1949 else
1950 overal_prob = 128;
1951
1952 if (overal_prob == 0)
1953 overal_prob = 1;
1954
1955 /* context probs */
1956 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1957
1958 if (count)
1959 this_prob = 256 * mv_ref_ct[j][i][0] / count;
1960 else
1961 this_prob = 128;
1962
1963 if (this_prob == 0)
1964 this_prob = 1;
1965
1966 fprintf(f, "%5d, ", this_prob);
1967 }
1968
1969 fprintf(f, " },\n");
1970 }
1971
1972 fprintf(f, "};\n");
1973 fclose(f);
1974 }
1975
1976 /* MV ref count VP8_ENTROPY_STATS stats code */
1977 #ifdef VP8_ENTROPY_STATS
init_mv_ref_counts()1978 void init_mv_ref_counts()
1979 {
1980 memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1981 memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1982 }
1983
accum_mv_refs(MB_PREDICTION_MODE m,const int ct[4])1984 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
1985 {
1986 if (m == ZEROMV)
1987 {
1988 ++mv_ref_ct [ct[0]] [0] [0];
1989 ++mv_mode_cts[0][0];
1990 }
1991 else
1992 {
1993 ++mv_ref_ct [ct[0]] [0] [1];
1994 ++mv_mode_cts[0][1];
1995
1996 if (m == NEARESTMV)
1997 {
1998 ++mv_ref_ct [ct[1]] [1] [0];
1999 ++mv_mode_cts[1][0];
2000 }
2001 else
2002 {
2003 ++mv_ref_ct [ct[1]] [1] [1];
2004 ++mv_mode_cts[1][1];
2005
2006 if (m == NEARMV)
2007 {
2008 ++mv_ref_ct [ct[2]] [2] [0];
2009 ++mv_mode_cts[2][0];
2010 }
2011 else
2012 {
2013 ++mv_ref_ct [ct[2]] [2] [1];
2014 ++mv_mode_cts[2][1];
2015
2016 if (m == NEWMV)
2017 {
2018 ++mv_ref_ct [ct[3]] [3] [0];
2019 ++mv_mode_cts[3][0];
2020 }
2021 else
2022 {
2023 ++mv_ref_ct [ct[3]] [3] [1];
2024 ++mv_mode_cts[3][1];
2025 }
2026 }
2027 }
2028 }
2029 }
2030
2031 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
2032
2033 #endif
2034