1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "onyx_int.h"
13 #include "mcomp.h"
14 #include "vpx_mem/vpx_mem.h"
15 #include "vpx_config.h"
16 #include <stdio.h>
17 #include <limits.h>
18 #include <math.h>
19 #include "vp8/common/findnearmv.h"
20
21 #ifdef VP8_ENTROPY_STATS
22 static int mv_ref_ct [31] [4] [2];
23 static int mv_mode_cts [4] [2];
24 #endif
25
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)26 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
27 {
28 /* MV costing is based on the distribution of vectors in the previous
29 * frame and as such will tend to over state the cost of vectors. In
30 * addition coding a new vector can have a knock on effect on the cost
31 * of subsequent vectors and the quality of prediction from NEAR and
32 * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
33 * limited extent, for some account to be taken of these factors.
34 */
35 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
36 }
37
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)38 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
39 {
40 /* Ignore mv costing if mvcost is NULL */
41 if (mvcost)
42 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
43 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
44 * error_per_bit + 128) >> 8;
45 return 0;
46 }
47
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)48 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
49 {
50 /* Calculate sad error cost on full pixel basis. */
51 /* Ignore mv costing if mvsadcost is NULL */
52 if (mvsadcost)
53 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
54 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
55 * error_per_bit + 128) >> 8;
56 return 0;
57 }
58
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)59 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
60 {
61 int Len;
62 int search_site_count = 0;
63
64
65 /* Generate offsets for 4 search sites per step. */
66 Len = MAX_FIRST_STEP;
67 x->ss[search_site_count].mv.col = 0;
68 x->ss[search_site_count].mv.row = 0;
69 x->ss[search_site_count].offset = 0;
70 search_site_count++;
71
72 while (Len > 0)
73 {
74
75 /* Compute offsets for search sites. */
76 x->ss[search_site_count].mv.col = 0;
77 x->ss[search_site_count].mv.row = -Len;
78 x->ss[search_site_count].offset = -Len * stride;
79 search_site_count++;
80
81 /* Compute offsets for search sites. */
82 x->ss[search_site_count].mv.col = 0;
83 x->ss[search_site_count].mv.row = Len;
84 x->ss[search_site_count].offset = Len * stride;
85 search_site_count++;
86
87 /* Compute offsets for search sites. */
88 x->ss[search_site_count].mv.col = -Len;
89 x->ss[search_site_count].mv.row = 0;
90 x->ss[search_site_count].offset = -Len;
91 search_site_count++;
92
93 /* Compute offsets for search sites. */
94 x->ss[search_site_count].mv.col = Len;
95 x->ss[search_site_count].mv.row = 0;
96 x->ss[search_site_count].offset = Len;
97 search_site_count++;
98
99 /* Contract. */
100 Len /= 2;
101 }
102
103 x->ss_count = search_site_count;
104 x->searches_per_step = 4;
105 }
106
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)107 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
108 {
109 int Len;
110 int search_site_count = 0;
111
112 /* Generate offsets for 8 search sites per step. */
113 Len = MAX_FIRST_STEP;
114 x->ss[search_site_count].mv.col = 0;
115 x->ss[search_site_count].mv.row = 0;
116 x->ss[search_site_count].offset = 0;
117 search_site_count++;
118
119 while (Len > 0)
120 {
121
122 /* Compute offsets for search sites. */
123 x->ss[search_site_count].mv.col = 0;
124 x->ss[search_site_count].mv.row = -Len;
125 x->ss[search_site_count].offset = -Len * stride;
126 search_site_count++;
127
128 /* Compute offsets for search sites. */
129 x->ss[search_site_count].mv.col = 0;
130 x->ss[search_site_count].mv.row = Len;
131 x->ss[search_site_count].offset = Len * stride;
132 search_site_count++;
133
134 /* Compute offsets for search sites. */
135 x->ss[search_site_count].mv.col = -Len;
136 x->ss[search_site_count].mv.row = 0;
137 x->ss[search_site_count].offset = -Len;
138 search_site_count++;
139
140 /* Compute offsets for search sites. */
141 x->ss[search_site_count].mv.col = Len;
142 x->ss[search_site_count].mv.row = 0;
143 x->ss[search_site_count].offset = Len;
144 search_site_count++;
145
146 /* Compute offsets for search sites. */
147 x->ss[search_site_count].mv.col = -Len;
148 x->ss[search_site_count].mv.row = -Len;
149 x->ss[search_site_count].offset = -Len * stride - Len;
150 search_site_count++;
151
152 /* Compute offsets for search sites. */
153 x->ss[search_site_count].mv.col = Len;
154 x->ss[search_site_count].mv.row = -Len;
155 x->ss[search_site_count].offset = -Len * stride + Len;
156 search_site_count++;
157
158 /* Compute offsets for search sites. */
159 x->ss[search_site_count].mv.col = -Len;
160 x->ss[search_site_count].mv.row = Len;
161 x->ss[search_site_count].offset = Len * stride - Len;
162 search_site_count++;
163
164 /* Compute offsets for search sites. */
165 x->ss[search_site_count].mv.col = Len;
166 x->ss[search_site_count].mv.row = Len;
167 x->ss[search_site_count].offset = Len * stride + Len;
168 search_site_count++;
169
170
171 /* Contract. */
172 Len /= 2;
173 }
174
175 x->ss_count = search_site_count;
176 x->searches_per_step = 8;
177 }
178
179 /*
180 * To avoid the penalty for crossing cache-line read, preload the reference
181 * area in a small buffer, which is aligned to make sure there won't be crossing
182 * cache-line read while reading from this buffer. This reduced the cpu
183 * cycles spent on reading ref data in sub-pixel filter functions.
184 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
185 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
186 * could reduce the area.
187 */
188
189 /* estimated cost of a motion vector (r,c) */
190 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
191 /* pointer to predictor base of a motionvector */
192 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
193 /* convert motion vector component to offset for svf calc */
194 #define SP(x) (((x)&3)<<1)
195 /* returns subpixel variance error function. */
196 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
197 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
198 /* returns distortion + motion vector cost */
199 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
200 /* checks if (r,c) has better score than previous best */
201 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
202
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)203 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
204 int_mv *bestmv, int_mv *ref_mv,
205 int error_per_bit,
206 const vp8_variance_fn_ptr_t *vfp,
207 int *mvcost[2], int *distortion,
208 unsigned int *sse1)
209 {
210 unsigned char *z = (*(b->base_src) + b->src);
211
212 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
213 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
214 int tr = br, tc = bc;
215 unsigned int besterr;
216 unsigned int left, right, up, down, diag;
217 unsigned int sse;
218 unsigned int whichdir;
219 unsigned int halfiters = 4;
220 unsigned int quarteriters = 4;
221 int thismse;
222
223 int minc = MAX(x->mv_col_min * 4,
224 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
225 int maxc = MIN(x->mv_col_max * 4,
226 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
227 int minr = MAX(x->mv_row_min * 4,
228 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
229 int maxr = MIN(x->mv_row_max * 4,
230 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
231
232 int y_stride;
233 int offset;
234 int pre_stride = x->e_mbd.pre.y_stride;
235 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
236
237
238 #if ARCH_X86_32 || ARCH_X86_64
239 MACROBLOCKD *xd = &x->e_mbd;
240 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
241 unsigned char *y;
242 int buf_r1, buf_r2, buf_c1;
243
244 /* Clamping to avoid out-of-range data access */
245 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
246 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
247 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
248 y_stride = 32;
249
250 /* Copy to intermediate buffer before searching. */
251 vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
252 y = xd->y_buf + y_stride*buf_r1 +buf_c1;
253 #else
254 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
255 y_stride = pre_stride;
256 #endif
257
258 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
259
260 /* central mv */
261 bestmv->as_mv.row *= 8;
262 bestmv->as_mv.col *= 8;
263
264 /* calculate central point error */
265 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
266 *distortion = besterr;
267 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
268
269 /* TODO: Each subsequent iteration checks at least one point in common
270 * with the last iteration could be 2 ( if diag selected)
271 */
272 while (--halfiters)
273 {
274 /* 1/2 pel */
275 CHECK_BETTER(left, tr, tc - 2);
276 CHECK_BETTER(right, tr, tc + 2);
277 CHECK_BETTER(up, tr - 2, tc);
278 CHECK_BETTER(down, tr + 2, tc);
279
280 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
281
282 switch (whichdir)
283 {
284 case 0:
285 CHECK_BETTER(diag, tr - 2, tc - 2);
286 break;
287 case 1:
288 CHECK_BETTER(diag, tr - 2, tc + 2);
289 break;
290 case 2:
291 CHECK_BETTER(diag, tr + 2, tc - 2);
292 break;
293 case 3:
294 CHECK_BETTER(diag, tr + 2, tc + 2);
295 break;
296 }
297
298 /* no reason to check the same one again. */
299 if (tr == br && tc == bc)
300 break;
301
302 tr = br;
303 tc = bc;
304 }
305
306 /* TODO: Each subsequent iteration checks at least one point in common
307 * with the last iteration could be 2 ( if diag selected)
308 */
309
310 /* 1/4 pel */
311 while (--quarteriters)
312 {
313 CHECK_BETTER(left, tr, tc - 1);
314 CHECK_BETTER(right, tr, tc + 1);
315 CHECK_BETTER(up, tr - 1, tc);
316 CHECK_BETTER(down, tr + 1, tc);
317
318 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
319
320 switch (whichdir)
321 {
322 case 0:
323 CHECK_BETTER(diag, tr - 1, tc - 1);
324 break;
325 case 1:
326 CHECK_BETTER(diag, tr - 1, tc + 1);
327 break;
328 case 2:
329 CHECK_BETTER(diag, tr + 1, tc - 1);
330 break;
331 case 3:
332 CHECK_BETTER(diag, tr + 1, tc + 1);
333 break;
334 }
335
336 /* no reason to check the same one again. */
337 if (tr == br && tc == bc)
338 break;
339
340 tr = br;
341 tc = bc;
342 }
343
344 bestmv->as_mv.row = br * 2;
345 bestmv->as_mv.col = bc * 2;
346
347 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
348 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
349 return INT_MAX;
350
351 return besterr;
352 }
353 #undef MVC
354 #undef PRE
355 #undef SP
356 #undef DIST
357 #undef IFMVCV
358 #undef ERR
359 #undef CHECK_BETTER
360
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)361 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
362 int_mv *bestmv, int_mv *ref_mv,
363 int error_per_bit,
364 const vp8_variance_fn_ptr_t *vfp,
365 int *mvcost[2], int *distortion,
366 unsigned int *sse1)
367 {
368 int bestmse = INT_MAX;
369 int_mv startmv;
370 int_mv this_mv;
371 unsigned char *z = (*(b->base_src) + b->src);
372 int left, right, up, down, diag;
373 unsigned int sse;
374 int whichdir ;
375 int thismse;
376 int y_stride;
377 int pre_stride = x->e_mbd.pre.y_stride;
378 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
379
380 #if ARCH_X86_32 || ARCH_X86_64
381 MACROBLOCKD *xd = &x->e_mbd;
382 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
383 unsigned char *y;
384
385 y_stride = 32;
386 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
387 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
388 y = xd->y_buf + y_stride + 1;
389 #else
390 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
391 y_stride = pre_stride;
392 #endif
393
394 /* central mv */
395 bestmv->as_mv.row <<= 3;
396 bestmv->as_mv.col <<= 3;
397 startmv = *bestmv;
398
399 /* calculate central point error */
400 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
401 *distortion = bestmse;
402 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
403
404 /* go left then right and check error */
405 this_mv.as_mv.row = startmv.as_mv.row;
406 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
407 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
408 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
409
410 if (left < bestmse)
411 {
412 *bestmv = this_mv;
413 bestmse = left;
414 *distortion = thismse;
415 *sse1 = sse;
416 }
417
418 this_mv.as_mv.col += 8;
419 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
420 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
421
422 if (right < bestmse)
423 {
424 *bestmv = this_mv;
425 bestmse = right;
426 *distortion = thismse;
427 *sse1 = sse;
428 }
429
430 /* go up then down and check error */
431 this_mv.as_mv.col = startmv.as_mv.col;
432 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
433 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
434 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
435
436 if (up < bestmse)
437 {
438 *bestmv = this_mv;
439 bestmse = up;
440 *distortion = thismse;
441 *sse1 = sse;
442 }
443
444 this_mv.as_mv.row += 8;
445 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
446 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
447
448 if (down < bestmse)
449 {
450 *bestmv = this_mv;
451 bestmse = down;
452 *distortion = thismse;
453 *sse1 = sse;
454 }
455
456
457 /* now check 1 more diagonal */
458 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
459 this_mv = startmv;
460
461 switch (whichdir)
462 {
463 case 0:
464 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
465 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
466 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
467 break;
468 case 1:
469 this_mv.as_mv.col += 4;
470 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
471 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
472 break;
473 case 2:
474 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
475 this_mv.as_mv.row += 4;
476 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
477 break;
478 case 3:
479 default:
480 this_mv.as_mv.col += 4;
481 this_mv.as_mv.row += 4;
482 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
483 break;
484 }
485
486 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
487
488 if (diag < bestmse)
489 {
490 *bestmv = this_mv;
491 bestmse = diag;
492 *distortion = thismse;
493 *sse1 = sse;
494 }
495
496
497 /* time to check quarter pels. */
498 if (bestmv->as_mv.row < startmv.as_mv.row)
499 y -= y_stride;
500
501 if (bestmv->as_mv.col < startmv.as_mv.col)
502 y--;
503
504 startmv = *bestmv;
505
506
507
508 /* go left then right and check error */
509 this_mv.as_mv.row = startmv.as_mv.row;
510
511 if (startmv.as_mv.col & 7)
512 {
513 this_mv.as_mv.col = startmv.as_mv.col - 2;
514 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
515 }
516 else
517 {
518 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
519 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
520 }
521
522 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
523
524 if (left < bestmse)
525 {
526 *bestmv = this_mv;
527 bestmse = left;
528 *distortion = thismse;
529 *sse1 = sse;
530 }
531
532 this_mv.as_mv.col += 4;
533 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
534 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
535
536 if (right < bestmse)
537 {
538 *bestmv = this_mv;
539 bestmse = right;
540 *distortion = thismse;
541 *sse1 = sse;
542 }
543
544 /* go up then down and check error */
545 this_mv.as_mv.col = startmv.as_mv.col;
546
547 if (startmv.as_mv.row & 7)
548 {
549 this_mv.as_mv.row = startmv.as_mv.row - 2;
550 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
551 }
552 else
553 {
554 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
555 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
556 }
557
558 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
559
560 if (up < bestmse)
561 {
562 *bestmv = this_mv;
563 bestmse = up;
564 *distortion = thismse;
565 *sse1 = sse;
566 }
567
568 this_mv.as_mv.row += 4;
569 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
570 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
571
572 if (down < bestmse)
573 {
574 *bestmv = this_mv;
575 bestmse = down;
576 *distortion = thismse;
577 *sse1 = sse;
578 }
579
580
581 /* now check 1 more diagonal */
582 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
583
584 this_mv = startmv;
585
586 switch (whichdir)
587 {
588 case 0:
589
590 if (startmv.as_mv.row & 7)
591 {
592 this_mv.as_mv.row -= 2;
593
594 if (startmv.as_mv.col & 7)
595 {
596 this_mv.as_mv.col -= 2;
597 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
598 }
599 else
600 {
601 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
602 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
603 }
604 }
605 else
606 {
607 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
608
609 if (startmv.as_mv.col & 7)
610 {
611 this_mv.as_mv.col -= 2;
612 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
613 }
614 else
615 {
616 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
617 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
618 }
619 }
620
621 break;
622 case 1:
623 this_mv.as_mv.col += 2;
624
625 if (startmv.as_mv.row & 7)
626 {
627 this_mv.as_mv.row -= 2;
628 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
629 }
630 else
631 {
632 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
633 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
634 }
635
636 break;
637 case 2:
638 this_mv.as_mv.row += 2;
639
640 if (startmv.as_mv.col & 7)
641 {
642 this_mv.as_mv.col -= 2;
643 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
644 }
645 else
646 {
647 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
648 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
649 }
650
651 break;
652 case 3:
653 this_mv.as_mv.col += 2;
654 this_mv.as_mv.row += 2;
655 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
656 break;
657 }
658
659 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
660
661 if (diag < bestmse)
662 {
663 *bestmv = this_mv;
664 bestmse = diag;
665 *distortion = thismse;
666 *sse1 = sse;
667 }
668
669 return bestmse;
670 }
671
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)672 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
673 int_mv *bestmv, int_mv *ref_mv,
674 int error_per_bit,
675 const vp8_variance_fn_ptr_t *vfp,
676 int *mvcost[2], int *distortion,
677 unsigned int *sse1)
678 {
679 int bestmse = INT_MAX;
680 int_mv startmv;
681 int_mv this_mv;
682 unsigned char *z = (*(b->base_src) + b->src);
683 int left, right, up, down, diag;
684 unsigned int sse;
685 int whichdir ;
686 int thismse;
687 int y_stride;
688 int pre_stride = x->e_mbd.pre.y_stride;
689 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
690
691 #if ARCH_X86_32 || ARCH_X86_64
692 MACROBLOCKD *xd = &x->e_mbd;
693 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
694 unsigned char *y;
695
696 y_stride = 32;
697 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
698 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
699 y = xd->y_buf + y_stride + 1;
700 #else
701 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
702 y_stride = pre_stride;
703 #endif
704
705 /* central mv */
706 bestmv->as_mv.row *= 8;
707 bestmv->as_mv.col *= 8;
708 startmv = *bestmv;
709
710 /* calculate central point error */
711 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
712 *distortion = bestmse;
713 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
714
715 /* go left then right and check error */
716 this_mv.as_mv.row = startmv.as_mv.row;
717 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
718 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
719 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
720
721 if (left < bestmse)
722 {
723 *bestmv = this_mv;
724 bestmse = left;
725 *distortion = thismse;
726 *sse1 = sse;
727 }
728
729 this_mv.as_mv.col += 8;
730 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
731 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
732
733 if (right < bestmse)
734 {
735 *bestmv = this_mv;
736 bestmse = right;
737 *distortion = thismse;
738 *sse1 = sse;
739 }
740
741 /* go up then down and check error */
742 this_mv.as_mv.col = startmv.as_mv.col;
743 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
744 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
745 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
746
747 if (up < bestmse)
748 {
749 *bestmv = this_mv;
750 bestmse = up;
751 *distortion = thismse;
752 *sse1 = sse;
753 }
754
755 this_mv.as_mv.row += 8;
756 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
757 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
758
759 if (down < bestmse)
760 {
761 *bestmv = this_mv;
762 bestmse = down;
763 *distortion = thismse;
764 *sse1 = sse;
765 }
766
767 /* now check 1 more diagonal - */
768 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
769 this_mv = startmv;
770
771 switch (whichdir)
772 {
773 case 0:
774 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
775 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
776 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
777 break;
778 case 1:
779 this_mv.as_mv.col += 4;
780 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
781 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
782 break;
783 case 2:
784 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
785 this_mv.as_mv.row += 4;
786 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
787 break;
788 case 3:
789 default:
790 this_mv.as_mv.col += 4;
791 this_mv.as_mv.row += 4;
792 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
793 break;
794 }
795
796 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
797
798 if (diag < bestmse)
799 {
800 *bestmv = this_mv;
801 bestmse = diag;
802 *distortion = thismse;
803 *sse1 = sse;
804 }
805
806 return bestmse;
807 }
808
809 #define CHECK_BOUNDS(range) \
810 {\
811 all_in = 1;\
812 all_in &= ((br-range) >= x->mv_row_min);\
813 all_in &= ((br+range) <= x->mv_row_max);\
814 all_in &= ((bc-range) >= x->mv_col_min);\
815 all_in &= ((bc+range) <= x->mv_col_max);\
816 }
817
818 #define CHECK_POINT \
819 {\
820 if (this_mv.as_mv.col < x->mv_col_min) continue;\
821 if (this_mv.as_mv.col > x->mv_col_max) continue;\
822 if (this_mv.as_mv.row < x->mv_row_min) continue;\
823 if (this_mv.as_mv.row > x->mv_row_max) continue;\
824 }
825
826 #define CHECK_BETTER \
827 {\
828 if (thissad < bestsad)\
829 {\
830 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
831 if (thissad < bestsad)\
832 {\
833 bestsad = thissad;\
834 best_site = i;\
835 }\
836 }\
837 }
838
839 static const MV next_chkpts[6][3] =
840 {
841 {{ -2, 0}, { -1, -2}, {1, -2}},
842 {{ -1, -2}, {1, -2}, {2, 0}},
843 {{1, -2}, {2, 0}, {1, 2}},
844 {{2, 0}, {1, 2}, { -1, 2}},
845 {{1, 2}, { -1, 2}, { -2, 0}},
846 {{ -1, 2}, { -2, 0}, { -1, -2}}
847 };
848
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)849 int vp8_hex_search
850 (
851 MACROBLOCK *x,
852 BLOCK *b,
853 BLOCKD *d,
854 int_mv *ref_mv,
855 int_mv *best_mv,
856 int search_param,
857 int sad_per_bit,
858 const vp8_variance_fn_ptr_t *vfp,
859 int *mvsadcost[2],
860 int *mvcost[2],
861 int_mv *center_mv
862 )
863 {
864 MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
865 MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
866 int i, j;
867
868 unsigned char *what = (*(b->base_src) + b->src);
869 int what_stride = b->src_stride;
870 int pre_stride = x->e_mbd.pre.y_stride;
871 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
872
873 int in_what_stride = pre_stride;
874 int br, bc;
875 int_mv this_mv;
876 unsigned int bestsad;
877 unsigned int thissad;
878 unsigned char *base_offset;
879 unsigned char *this_offset;
880 int k = -1;
881 int all_in;
882 int best_site = -1;
883 int hex_range = 127;
884 int dia_range = 8;
885
886 int_mv fcenter_mv;
887 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
888 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
889
890 (void)mvcost;
891 (void)search_param;
892 /* adjust ref_mv to make sure it is within MV range */
893 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
894 br = ref_mv->as_mv.row;
895 bc = ref_mv->as_mv.col;
896
897 /* Work out the start point for the search */
898 base_offset = (unsigned char *)(base_pre + d->offset);
899 this_offset = base_offset + (br * (pre_stride)) + bc;
900 this_mv.as_mv.row = br;
901 this_mv.as_mv.col = bc;
902 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
903 + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
904
905 #if CONFIG_MULTI_RES_ENCODING
906 /* Lower search range based on prediction info */
907 if (search_param >= 6) goto cal_neighbors;
908 else if (search_param >= 5) hex_range = 4;
909 else if (search_param >= 4) hex_range = 6;
910 else if (search_param >= 3) hex_range = 15;
911 else if (search_param >= 2) hex_range = 31;
912 else if (search_param >= 1) hex_range = 63;
913
914 dia_range = 8;
915 #endif
916
917 /* hex search */
918 CHECK_BOUNDS(2)
919
920 if(all_in)
921 {
922 for (i = 0; i < 6; i++)
923 {
924 this_mv.as_mv.row = br + hex[i].row;
925 this_mv.as_mv.col = bc + hex[i].col;
926 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
927 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
928 CHECK_BETTER
929 }
930 }else
931 {
932 for (i = 0; i < 6; i++)
933 {
934 this_mv.as_mv.row = br + hex[i].row;
935 this_mv.as_mv.col = bc + hex[i].col;
936 CHECK_POINT
937 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
938 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
939 CHECK_BETTER
940 }
941 }
942
943 if (best_site == -1)
944 goto cal_neighbors;
945 else
946 {
947 br += hex[best_site].row;
948 bc += hex[best_site].col;
949 k = best_site;
950 }
951
952 for (j = 1; j < hex_range; j++)
953 {
954 best_site = -1;
955 CHECK_BOUNDS(2)
956
957 if(all_in)
958 {
959 for (i = 0; i < 3; i++)
960 {
961 this_mv.as_mv.row = br + next_chkpts[k][i].row;
962 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
963 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
964 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
965 CHECK_BETTER
966 }
967 }else
968 {
969 for (i = 0; i < 3; i++)
970 {
971 this_mv.as_mv.row = br + next_chkpts[k][i].row;
972 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
973 CHECK_POINT
974 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
975 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
976 CHECK_BETTER
977 }
978 }
979
980 if (best_site == -1)
981 break;
982 else
983 {
984 br += next_chkpts[k][best_site].row;
985 bc += next_chkpts[k][best_site].col;
986 k += 5 + best_site;
987 if (k >= 12) k -= 12;
988 else if (k >= 6) k -= 6;
989 }
990 }
991
992 /* check 4 1-away neighbors */
993 cal_neighbors:
994 for (j = 0; j < dia_range; j++)
995 {
996 best_site = -1;
997 CHECK_BOUNDS(1)
998
999 if(all_in)
1000 {
1001 for (i = 0; i < 4; i++)
1002 {
1003 this_mv.as_mv.row = br + neighbors[i].row;
1004 this_mv.as_mv.col = bc + neighbors[i].col;
1005 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1006 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1007 CHECK_BETTER
1008 }
1009 }else
1010 {
1011 for (i = 0; i < 4; i++)
1012 {
1013 this_mv.as_mv.row = br + neighbors[i].row;
1014 this_mv.as_mv.col = bc + neighbors[i].col;
1015 CHECK_POINT
1016 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1017 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1018 CHECK_BETTER
1019 }
1020 }
1021
1022 if (best_site == -1)
1023 break;
1024 else
1025 {
1026 br += neighbors[best_site].row;
1027 bc += neighbors[best_site].col;
1028 }
1029 }
1030
1031 best_mv->as_mv.row = br;
1032 best_mv->as_mv.col = bc;
1033
1034 return bestsad;
1035 }
1036 #undef CHECK_BOUNDS
1037 #undef CHECK_POINT
1038 #undef CHECK_BETTER
1039
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1040 int vp8_diamond_search_sad_c
1041 (
1042 MACROBLOCK *x,
1043 BLOCK *b,
1044 BLOCKD *d,
1045 int_mv *ref_mv,
1046 int_mv *best_mv,
1047 int search_param,
1048 int sad_per_bit,
1049 int *num00,
1050 vp8_variance_fn_ptr_t *fn_ptr,
1051 int *mvcost[2],
1052 int_mv *center_mv
1053 )
1054 {
1055 int i, j, step;
1056
1057 unsigned char *what = (*(b->base_src) + b->src);
1058 int what_stride = b->src_stride;
1059 unsigned char *in_what;
1060 int pre_stride = x->e_mbd.pre.y_stride;
1061 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1062 int in_what_stride = pre_stride;
1063 unsigned char *best_address;
1064
1065 int tot_steps;
1066 int_mv this_mv;
1067
1068 unsigned int bestsad;
1069 unsigned int thissad;
1070 int best_site = 0;
1071 int last_site = 0;
1072
1073 int ref_row;
1074 int ref_col;
1075 int this_row_offset;
1076 int this_col_offset;
1077 search_site *ss;
1078
1079 unsigned char *check_here;
1080
1081 int *mvsadcost[2];
1082 int_mv fcenter_mv;
1083
1084 mvsadcost[0] = x->mvsadcost[0];
1085 mvsadcost[1] = x->mvsadcost[1];
1086 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1087 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1088
1089 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1090 ref_row = ref_mv->as_mv.row;
1091 ref_col = ref_mv->as_mv.col;
1092 *num00 = 0;
1093 best_mv->as_mv.row = ref_row;
1094 best_mv->as_mv.col = ref_col;
1095
1096 /* Work out the start point for the search */
1097 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
1098 best_address = in_what;
1099
1100 /* Check the starting position */
1101 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
1102 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1103
1104 /* search_param determines the length of the initial step and hence
1105 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1106 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1107 */
1108 ss = &x->ss[search_param * x->searches_per_step];
1109 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1110
1111 i = 1;
1112
1113 for (step = 0; step < tot_steps ; step++)
1114 {
1115 for (j = 0 ; j < x->searches_per_step ; j++)
1116 {
1117 /* Trap illegal vectors */
1118 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1119 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1120
1121 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1122 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1123
1124 {
1125 check_here = ss[i].offset + best_address;
1126 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1127
1128 if (thissad < bestsad)
1129 {
1130 this_mv.as_mv.row = this_row_offset;
1131 this_mv.as_mv.col = this_col_offset;
1132 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1133 mvsadcost, sad_per_bit);
1134
1135 if (thissad < bestsad)
1136 {
1137 bestsad = thissad;
1138 best_site = i;
1139 }
1140 }
1141 }
1142
1143 i++;
1144 }
1145
1146 if (best_site != last_site)
1147 {
1148 best_mv->as_mv.row += ss[best_site].mv.row;
1149 best_mv->as_mv.col += ss[best_site].mv.col;
1150 best_address += ss[best_site].offset;
1151 last_site = best_site;
1152 }
1153 else if (best_address == in_what)
1154 (*num00)++;
1155 }
1156
1157 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1158 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1159
1160 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1161 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1162 }
1163
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1164 int vp8_diamond_search_sadx4
1165 (
1166 MACROBLOCK *x,
1167 BLOCK *b,
1168 BLOCKD *d,
1169 int_mv *ref_mv,
1170 int_mv *best_mv,
1171 int search_param,
1172 int sad_per_bit,
1173 int *num00,
1174 vp8_variance_fn_ptr_t *fn_ptr,
1175 int *mvcost[2],
1176 int_mv *center_mv
1177 )
1178 {
1179 int i, j, step;
1180
1181 unsigned char *what = (*(b->base_src) + b->src);
1182 int what_stride = b->src_stride;
1183 unsigned char *in_what;
1184 int pre_stride = x->e_mbd.pre.y_stride;
1185 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1186 int in_what_stride = pre_stride;
1187 unsigned char *best_address;
1188
1189 int tot_steps;
1190 int_mv this_mv;
1191
1192 unsigned int bestsad;
1193 unsigned int thissad;
1194 int best_site = 0;
1195 int last_site = 0;
1196
1197 int ref_row;
1198 int ref_col;
1199 int this_row_offset;
1200 int this_col_offset;
1201 search_site *ss;
1202
1203 unsigned char *check_here;
1204
1205 int *mvsadcost[2];
1206 int_mv fcenter_mv;
1207
1208 mvsadcost[0] = x->mvsadcost[0];
1209 mvsadcost[1] = x->mvsadcost[1];
1210 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1211 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1212
1213 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1214 ref_row = ref_mv->as_mv.row;
1215 ref_col = ref_mv->as_mv.col;
1216 *num00 = 0;
1217 best_mv->as_mv.row = ref_row;
1218 best_mv->as_mv.col = ref_col;
1219
1220 /* Work out the start point for the search */
1221 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
1222 best_address = in_what;
1223
1224 /* Check the starting position */
1225 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
1226 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1227
1228 /* search_param determines the length of the initial step and hence the
1229 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1230 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1231 */
1232 ss = &x->ss[search_param * x->searches_per_step];
1233 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1234
1235 i = 1;
1236
1237 for (step = 0; step < tot_steps ; step++)
1238 {
1239 int all_in = 1, t;
1240
1241 /* To know if all neighbor points are within the bounds, 4 bounds
1242 * checking are enough instead of checking 4 bounds for each
1243 * points.
1244 */
1245 all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
1246 all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
1247 all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
1248 all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
1249
1250 if (all_in)
1251 {
1252 unsigned int sad_array[4];
1253
1254 for (j = 0 ; j < x->searches_per_step ; j += 4)
1255 {
1256 const unsigned char *block_offset[4];
1257
1258 for (t = 0; t < 4; t++)
1259 block_offset[t] = ss[i+t].offset + best_address;
1260
1261 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1262
1263 for (t = 0; t < 4; t++, i++)
1264 {
1265 if (sad_array[t] < bestsad)
1266 {
1267 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1268 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1269 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1270 mvsadcost, sad_per_bit);
1271
1272 if (sad_array[t] < bestsad)
1273 {
1274 bestsad = sad_array[t];
1275 best_site = i;
1276 }
1277 }
1278 }
1279 }
1280 }
1281 else
1282 {
1283 for (j = 0 ; j < x->searches_per_step ; j++)
1284 {
1285 /* Trap illegal vectors */
1286 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1287 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1288
1289 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1290 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1291 {
1292 check_here = ss[i].offset + best_address;
1293 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1294
1295 if (thissad < bestsad)
1296 {
1297 this_mv.as_mv.row = this_row_offset;
1298 this_mv.as_mv.col = this_col_offset;
1299 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1300 mvsadcost, sad_per_bit);
1301
1302 if (thissad < bestsad)
1303 {
1304 bestsad = thissad;
1305 best_site = i;
1306 }
1307 }
1308 }
1309 i++;
1310 }
1311 }
1312
1313 if (best_site != last_site)
1314 {
1315 best_mv->as_mv.row += ss[best_site].mv.row;
1316 best_mv->as_mv.col += ss[best_site].mv.col;
1317 best_address += ss[best_site].offset;
1318 last_site = best_site;
1319 }
1320 else if (best_address == in_what)
1321 (*num00)++;
1322 }
1323
1324 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1325 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1326
1327 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1328 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1329 }
1330
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1331 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1332 int sad_per_bit, int distance,
1333 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1334 int_mv *center_mv)
1335 {
1336 unsigned char *what = (*(b->base_src) + b->src);
1337 int what_stride = b->src_stride;
1338 unsigned char *in_what;
1339 int pre_stride = x->e_mbd.pre.y_stride;
1340 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1341 int in_what_stride = pre_stride;
1342 int mv_stride = pre_stride;
1343 unsigned char *bestaddress;
1344 int_mv *best_mv = &d->bmi.mv;
1345 int_mv this_mv;
1346 unsigned int bestsad;
1347 unsigned int thissad;
1348 int r, c;
1349
1350 unsigned char *check_here;
1351
1352 int ref_row = ref_mv->as_mv.row;
1353 int ref_col = ref_mv->as_mv.col;
1354
1355 int row_min = ref_row - distance;
1356 int row_max = ref_row + distance;
1357 int col_min = ref_col - distance;
1358 int col_max = ref_col + distance;
1359
1360 int *mvsadcost[2];
1361 int_mv fcenter_mv;
1362
1363 mvsadcost[0] = x->mvsadcost[0];
1364 mvsadcost[1] = x->mvsadcost[1];
1365 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1366 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1367
1368 /* Work out the mid point for the search */
1369 in_what = base_pre + d->offset;
1370 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1371
1372 best_mv->as_mv.row = ref_row;
1373 best_mv->as_mv.col = ref_col;
1374
1375 /* Baseline value at the centre */
1376 bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1377 in_what_stride, UINT_MAX)
1378 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1379
1380 /* Apply further limits to prevent us looking using vectors that
1381 * stretch beyiond the UMV border
1382 */
1383 if (col_min < x->mv_col_min)
1384 col_min = x->mv_col_min;
1385
1386 if (col_max > x->mv_col_max)
1387 col_max = x->mv_col_max;
1388
1389 if (row_min < x->mv_row_min)
1390 row_min = x->mv_row_min;
1391
1392 if (row_max > x->mv_row_max)
1393 row_max = x->mv_row_max;
1394
1395 for (r = row_min; r < row_max ; r++)
1396 {
1397 this_mv.as_mv.row = r;
1398 check_here = r * mv_stride + in_what + col_min;
1399
1400 for (c = col_min; c < col_max; c++)
1401 {
1402 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1403
1404 this_mv.as_mv.col = c;
1405 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1406 mvsadcost, sad_per_bit);
1407
1408 if (thissad < bestsad)
1409 {
1410 bestsad = thissad;
1411 best_mv->as_mv.row = r;
1412 best_mv->as_mv.col = c;
1413 bestaddress = check_here;
1414 }
1415
1416 check_here++;
1417 }
1418 }
1419
1420 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1421 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1422
1423 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1424 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1425 }
1426
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1427 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1428 int sad_per_bit, int distance,
1429 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1430 int_mv *center_mv)
1431 {
1432 unsigned char *what = (*(b->base_src) + b->src);
1433 int what_stride = b->src_stride;
1434 unsigned char *in_what;
1435 int pre_stride = x->e_mbd.pre.y_stride;
1436 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1437 int in_what_stride = pre_stride;
1438 int mv_stride = pre_stride;
1439 unsigned char *bestaddress;
1440 int_mv *best_mv = &d->bmi.mv;
1441 int_mv this_mv;
1442 unsigned int bestsad;
1443 unsigned int thissad;
1444 int r, c;
1445
1446 unsigned char *check_here;
1447
1448 int ref_row = ref_mv->as_mv.row;
1449 int ref_col = ref_mv->as_mv.col;
1450
1451 int row_min = ref_row - distance;
1452 int row_max = ref_row + distance;
1453 int col_min = ref_col - distance;
1454 int col_max = ref_col + distance;
1455
1456 unsigned int sad_array[3];
1457
1458 int *mvsadcost[2];
1459 int_mv fcenter_mv;
1460
1461 mvsadcost[0] = x->mvsadcost[0];
1462 mvsadcost[1] = x->mvsadcost[1];
1463 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1464 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1465
1466 /* Work out the mid point for the search */
1467 in_what = base_pre + d->offset;
1468 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1469
1470 best_mv->as_mv.row = ref_row;
1471 best_mv->as_mv.col = ref_col;
1472
1473 /* Baseline value at the centre */
1474 bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1475 in_what_stride, UINT_MAX)
1476 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1477
1478 /* Apply further limits to prevent us looking using vectors that stretch
1479 * beyond the UMV border
1480 */
1481 if (col_min < x->mv_col_min)
1482 col_min = x->mv_col_min;
1483
1484 if (col_max > x->mv_col_max)
1485 col_max = x->mv_col_max;
1486
1487 if (row_min < x->mv_row_min)
1488 row_min = x->mv_row_min;
1489
1490 if (row_max > x->mv_row_max)
1491 row_max = x->mv_row_max;
1492
1493 for (r = row_min; r < row_max ; r++)
1494 {
1495 this_mv.as_mv.row = r;
1496 check_here = r * mv_stride + in_what + col_min;
1497 c = col_min;
1498
1499 while ((c + 2) < col_max)
1500 {
1501 int i;
1502
1503 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1504
1505 for (i = 0; i < 3; i++)
1506 {
1507 thissad = sad_array[i];
1508
1509 if (thissad < bestsad)
1510 {
1511 this_mv.as_mv.col = c;
1512 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1513 mvsadcost, sad_per_bit);
1514
1515 if (thissad < bestsad)
1516 {
1517 bestsad = thissad;
1518 best_mv->as_mv.row = r;
1519 best_mv->as_mv.col = c;
1520 bestaddress = check_here;
1521 }
1522 }
1523
1524 check_here++;
1525 c++;
1526 }
1527 }
1528
1529 while (c < col_max)
1530 {
1531 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1532
1533 if (thissad < bestsad)
1534 {
1535 this_mv.as_mv.col = c;
1536 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1537 mvsadcost, sad_per_bit);
1538
1539 if (thissad < bestsad)
1540 {
1541 bestsad = thissad;
1542 best_mv->as_mv.row = r;
1543 best_mv->as_mv.col = c;
1544 bestaddress = check_here;
1545 }
1546 }
1547
1548 check_here ++;
1549 c ++;
1550 }
1551
1552 }
1553
1554 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1555 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1556
1557 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1558 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1559 }
1560
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1561 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1562 int sad_per_bit, int distance,
1563 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1564 int_mv *center_mv)
1565 {
1566 unsigned char *what = (*(b->base_src) + b->src);
1567 int what_stride = b->src_stride;
1568 int pre_stride = x->e_mbd.pre.y_stride;
1569 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1570 unsigned char *in_what;
1571 int in_what_stride = pre_stride;
1572 int mv_stride = pre_stride;
1573 unsigned char *bestaddress;
1574 int_mv *best_mv = &d->bmi.mv;
1575 int_mv this_mv;
1576 unsigned int bestsad;
1577 unsigned int thissad;
1578 int r, c;
1579
1580 unsigned char *check_here;
1581
1582 int ref_row = ref_mv->as_mv.row;
1583 int ref_col = ref_mv->as_mv.col;
1584
1585 int row_min = ref_row - distance;
1586 int row_max = ref_row + distance;
1587 int col_min = ref_col - distance;
1588 int col_max = ref_col + distance;
1589
1590 DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
1591 unsigned int sad_array[3];
1592
1593 int *mvsadcost[2];
1594 int_mv fcenter_mv;
1595
1596 mvsadcost[0] = x->mvsadcost[0];
1597 mvsadcost[1] = x->mvsadcost[1];
1598 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1599 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1600
1601 /* Work out the mid point for the search */
1602 in_what = base_pre + d->offset;
1603 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1604
1605 best_mv->as_mv.row = ref_row;
1606 best_mv->as_mv.col = ref_col;
1607
1608 /* Baseline value at the centre */
1609 bestsad = fn_ptr->sdf(what, what_stride,
1610 bestaddress, in_what_stride, UINT_MAX)
1611 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1612
1613 /* Apply further limits to prevent us looking using vectors that stretch
1614 * beyond the UMV border
1615 */
1616 if (col_min < x->mv_col_min)
1617 col_min = x->mv_col_min;
1618
1619 if (col_max > x->mv_col_max)
1620 col_max = x->mv_col_max;
1621
1622 if (row_min < x->mv_row_min)
1623 row_min = x->mv_row_min;
1624
1625 if (row_max > x->mv_row_max)
1626 row_max = x->mv_row_max;
1627
1628 for (r = row_min; r < row_max ; r++)
1629 {
1630 this_mv.as_mv.row = r;
1631 check_here = r * mv_stride + in_what + col_min;
1632 c = col_min;
1633
1634 while ((c + 7) < col_max)
1635 {
1636 int i;
1637
1638 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1639
1640 for (i = 0; i < 8; i++)
1641 {
1642 thissad = sad_array8[i];
1643
1644 if (thissad < bestsad)
1645 {
1646 this_mv.as_mv.col = c;
1647 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1648 mvsadcost, sad_per_bit);
1649
1650 if (thissad < bestsad)
1651 {
1652 bestsad = thissad;
1653 best_mv->as_mv.row = r;
1654 best_mv->as_mv.col = c;
1655 bestaddress = check_here;
1656 }
1657 }
1658
1659 check_here++;
1660 c++;
1661 }
1662 }
1663
1664 while ((c + 2) < col_max)
1665 {
1666 int i;
1667
1668 fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1669
1670 for (i = 0; i < 3; i++)
1671 {
1672 thissad = sad_array[i];
1673
1674 if (thissad < bestsad)
1675 {
1676 this_mv.as_mv.col = c;
1677 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1678 mvsadcost, sad_per_bit);
1679
1680 if (thissad < bestsad)
1681 {
1682 bestsad = thissad;
1683 best_mv->as_mv.row = r;
1684 best_mv->as_mv.col = c;
1685 bestaddress = check_here;
1686 }
1687 }
1688
1689 check_here++;
1690 c++;
1691 }
1692 }
1693
1694 while (c < col_max)
1695 {
1696 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1697
1698 if (thissad < bestsad)
1699 {
1700 this_mv.as_mv.col = c;
1701 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1702 mvsadcost, sad_per_bit);
1703
1704 if (thissad < bestsad)
1705 {
1706 bestsad = thissad;
1707 best_mv->as_mv.row = r;
1708 best_mv->as_mv.col = c;
1709 bestaddress = check_here;
1710 }
1711 }
1712
1713 check_here ++;
1714 c ++;
1715 }
1716 }
1717
1718 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1719 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1720
1721 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1722 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1723 }
1724
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1725 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1726 int error_per_bit, int search_range,
1727 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1728 int_mv *center_mv)
1729 {
1730 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1731 int i, j;
1732 short this_row_offset, this_col_offset;
1733
1734 int what_stride = b->src_stride;
1735 int pre_stride = x->e_mbd.pre.y_stride;
1736 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1737 int in_what_stride = pre_stride;
1738 unsigned char *what = (*(b->base_src) + b->src);
1739 unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
1740 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1741 unsigned char *check_here;
1742 int_mv this_mv;
1743 unsigned int bestsad;
1744 unsigned int thissad;
1745
1746 int *mvsadcost[2];
1747 int_mv fcenter_mv;
1748
1749 mvsadcost[0] = x->mvsadcost[0];
1750 mvsadcost[1] = x->mvsadcost[1];
1751 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1752 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1753
1754 bestsad = fn_ptr->sdf(what, what_stride, best_address,
1755 in_what_stride, UINT_MAX)
1756 + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1757
1758 for (i=0; i<search_range; i++)
1759 {
1760 int best_site = -1;
1761
1762 for (j = 0 ; j < 4 ; j++)
1763 {
1764 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1765 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1766
1767 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1768 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1769 {
1770 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1771 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1772
1773 if (thissad < bestsad)
1774 {
1775 this_mv.as_mv.row = this_row_offset;
1776 this_mv.as_mv.col = this_col_offset;
1777 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1778
1779 if (thissad < bestsad)
1780 {
1781 bestsad = thissad;
1782 best_site = j;
1783 }
1784 }
1785 }
1786 }
1787
1788 if (best_site == -1)
1789 break;
1790 else
1791 {
1792 ref_mv->as_mv.row += neighbors[best_site].row;
1793 ref_mv->as_mv.col += neighbors[best_site].col;
1794 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1795 }
1796 }
1797
1798 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1799 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1800
1801 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1802 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1803 }
1804
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1805 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1806 int_mv *ref_mv, int error_per_bit,
1807 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1808 int *mvcost[2], int_mv *center_mv)
1809 {
1810 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1811 int i, j;
1812 short this_row_offset, this_col_offset;
1813
1814 int what_stride = b->src_stride;
1815 int pre_stride = x->e_mbd.pre.y_stride;
1816 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1817 int in_what_stride = pre_stride;
1818 unsigned char *what = (*(b->base_src) + b->src);
1819 unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
1820 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1821 unsigned char *check_here;
1822 int_mv this_mv;
1823 unsigned int bestsad;
1824 unsigned int thissad;
1825
1826 int *mvsadcost[2];
1827 int_mv fcenter_mv;
1828
1829 mvsadcost[0] = x->mvsadcost[0];
1830 mvsadcost[1] = x->mvsadcost[1];
1831 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1832 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1833
1834 bestsad = fn_ptr->sdf(what, what_stride, best_address,
1835 in_what_stride, UINT_MAX)
1836 + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1837
1838 for (i=0; i<search_range; i++)
1839 {
1840 int best_site = -1;
1841 int all_in = 1;
1842
1843 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1844 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1845 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1846 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1847
1848 if(all_in)
1849 {
1850 unsigned int sad_array[4];
1851 const unsigned char *block_offset[4];
1852 block_offset[0] = best_address - in_what_stride;
1853 block_offset[1] = best_address - 1;
1854 block_offset[2] = best_address + 1;
1855 block_offset[3] = best_address + in_what_stride;
1856
1857 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1858
1859 for (j = 0; j < 4; j++)
1860 {
1861 if (sad_array[j] < bestsad)
1862 {
1863 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1864 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1865 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1866
1867 if (sad_array[j] < bestsad)
1868 {
1869 bestsad = sad_array[j];
1870 best_site = j;
1871 }
1872 }
1873 }
1874 }
1875 else
1876 {
1877 for (j = 0 ; j < 4 ; j++)
1878 {
1879 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1880 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1881
1882 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1883 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1884 {
1885 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1886 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1887
1888 if (thissad < bestsad)
1889 {
1890 this_mv.as_mv.row = this_row_offset;
1891 this_mv.as_mv.col = this_col_offset;
1892 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1893
1894 if (thissad < bestsad)
1895 {
1896 bestsad = thissad;
1897 best_site = j;
1898 }
1899 }
1900 }
1901 }
1902 }
1903
1904 if (best_site == -1)
1905 break;
1906 else
1907 {
1908 ref_mv->as_mv.row += neighbors[best_site].row;
1909 ref_mv->as_mv.col += neighbors[best_site].col;
1910 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1911 }
1912 }
1913
1914 this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1915 this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1916
1917 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1918 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1919 }
1920
1921 #ifdef VP8_ENTROPY_STATS
print_mode_context(void)1922 void print_mode_context(void)
1923 {
1924 FILE *f = fopen("modecont.c", "w");
1925 int i, j;
1926
1927 fprintf(f, "#include \"entropy.h\"\n");
1928 fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1929 fprintf(f, "{\n");
1930
1931 for (j = 0; j < 6; j++)
1932 {
1933 fprintf(f, " { /* %d */\n", j);
1934 fprintf(f, " ");
1935
1936 for (i = 0; i < 4; i++)
1937 {
1938 int overal_prob;
1939 int this_prob;
1940 int count;
1941
1942 /* Overall probs */
1943 count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1944
1945 if (count)
1946 overal_prob = 256 * mv_mode_cts[i][0] / count;
1947 else
1948 overal_prob = 128;
1949
1950 if (overal_prob == 0)
1951 overal_prob = 1;
1952
1953 /* context probs */
1954 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1955
1956 if (count)
1957 this_prob = 256 * mv_ref_ct[j][i][0] / count;
1958 else
1959 this_prob = 128;
1960
1961 if (this_prob == 0)
1962 this_prob = 1;
1963
1964 fprintf(f, "%5d, ", this_prob);
1965 }
1966
1967 fprintf(f, " },\n");
1968 }
1969
1970 fprintf(f, "};\n");
1971 fclose(f);
1972 }
1973
1974 /* MV ref count VP8_ENTROPY_STATS stats code */
1975 #ifdef VP8_ENTROPY_STATS
init_mv_ref_counts()1976 void init_mv_ref_counts()
1977 {
1978 vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1979 vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1980 }
1981
accum_mv_refs(MB_PREDICTION_MODE m,const int ct[4])1982 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
1983 {
1984 if (m == ZEROMV)
1985 {
1986 ++mv_ref_ct [ct[0]] [0] [0];
1987 ++mv_mode_cts[0][0];
1988 }
1989 else
1990 {
1991 ++mv_ref_ct [ct[0]] [0] [1];
1992 ++mv_mode_cts[0][1];
1993
1994 if (m == NEARESTMV)
1995 {
1996 ++mv_ref_ct [ct[1]] [1] [0];
1997 ++mv_mode_cts[1][0];
1998 }
1999 else
2000 {
2001 ++mv_ref_ct [ct[1]] [1] [1];
2002 ++mv_mode_cts[1][1];
2003
2004 if (m == NEARMV)
2005 {
2006 ++mv_ref_ct [ct[2]] [2] [0];
2007 ++mv_mode_cts[2][0];
2008 }
2009 else
2010 {
2011 ++mv_ref_ct [ct[2]] [2] [1];
2012 ++mv_mode_cts[2][1];
2013
2014 if (m == NEWMV)
2015 {
2016 ++mv_ref_ct [ct[3]] [3] [0];
2017 ++mv_mode_cts[3][0];
2018 }
2019 else
2020 {
2021 ++mv_ref_ct [ct[3]] [3] [1];
2022 ++mv_mode_cts[3][1];
2023 }
2024 }
2025 }
2026 }
2027 }
2028
2029 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
2030
2031 #endif
2032