1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
13 #include "onyx_int.h"
14 #include "mcomp.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
17 #include <stdio.h>
18 #include <limits.h>
19 #include <math.h>
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
23 
24 #ifdef VP8_ENTROPY_STATS
25 static int mv_ref_ct[31][4][2];
26 static int mv_mode_cts[4][2];
27 #endif
28 
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)29 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
30   /* MV costing is based on the distribution of vectors in the previous
31    * frame and as such will tend to over state the cost of vectors. In
32    * addition coding a new vector can have a knock on effect on the cost
33    * of subsequent vectors and the quality of prediction from NEAR and
34    * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
35    * limited extent, for some account to be taken of these factors.
36    */
37   return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
38            mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
39           Weight) >>
40          7;
41 }
42 
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)43 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
44                        int error_per_bit) {
45   /* Ignore mv costing if mvcost is NULL */
46   if (mvcost) {
47     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
48              mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
49                 error_per_bit +
50             128) >>
51            8;
52   }
53   return 0;
54 }
55 
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)56 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
57                           int error_per_bit) {
58   /* Calculate sad error cost on full pixel basis. */
59   /* Ignore mv costing if mvsadcost is NULL */
60   if (mvsadcost) {
61     return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
62              mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
63                 error_per_bit +
64             128) >>
65            8;
66   }
67   return 0;
68 }
69 
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)70 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
71   int Len;
72   int search_site_count = 0;
73 
74   /* Generate offsets for 4 search sites per step. */
75   Len = MAX_FIRST_STEP;
76   x->ss[search_site_count].mv.col = 0;
77   x->ss[search_site_count].mv.row = 0;
78   x->ss[search_site_count].offset = 0;
79   search_site_count++;
80 
81   while (Len > 0) {
82     /* Compute offsets for search sites. */
83     x->ss[search_site_count].mv.col = 0;
84     x->ss[search_site_count].mv.row = -Len;
85     x->ss[search_site_count].offset = -Len * stride;
86     search_site_count++;
87 
88     /* Compute offsets for search sites. */
89     x->ss[search_site_count].mv.col = 0;
90     x->ss[search_site_count].mv.row = Len;
91     x->ss[search_site_count].offset = Len * stride;
92     search_site_count++;
93 
94     /* Compute offsets for search sites. */
95     x->ss[search_site_count].mv.col = -Len;
96     x->ss[search_site_count].mv.row = 0;
97     x->ss[search_site_count].offset = -Len;
98     search_site_count++;
99 
100     /* Compute offsets for search sites. */
101     x->ss[search_site_count].mv.col = Len;
102     x->ss[search_site_count].mv.row = 0;
103     x->ss[search_site_count].offset = Len;
104     search_site_count++;
105 
106     /* Contract. */
107     Len /= 2;
108   }
109 
110   x->ss_count = search_site_count;
111   x->searches_per_step = 4;
112 }
113 
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)114 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
115   int Len;
116   int search_site_count = 0;
117 
118   /* Generate offsets for 8 search sites per step. */
119   Len = MAX_FIRST_STEP;
120   x->ss[search_site_count].mv.col = 0;
121   x->ss[search_site_count].mv.row = 0;
122   x->ss[search_site_count].offset = 0;
123   search_site_count++;
124 
125   while (Len > 0) {
126     /* Compute offsets for search sites. */
127     x->ss[search_site_count].mv.col = 0;
128     x->ss[search_site_count].mv.row = -Len;
129     x->ss[search_site_count].offset = -Len * stride;
130     search_site_count++;
131 
132     /* Compute offsets for search sites. */
133     x->ss[search_site_count].mv.col = 0;
134     x->ss[search_site_count].mv.row = Len;
135     x->ss[search_site_count].offset = Len * stride;
136     search_site_count++;
137 
138     /* Compute offsets for search sites. */
139     x->ss[search_site_count].mv.col = -Len;
140     x->ss[search_site_count].mv.row = 0;
141     x->ss[search_site_count].offset = -Len;
142     search_site_count++;
143 
144     /* Compute offsets for search sites. */
145     x->ss[search_site_count].mv.col = Len;
146     x->ss[search_site_count].mv.row = 0;
147     x->ss[search_site_count].offset = Len;
148     search_site_count++;
149 
150     /* Compute offsets for search sites. */
151     x->ss[search_site_count].mv.col = -Len;
152     x->ss[search_site_count].mv.row = -Len;
153     x->ss[search_site_count].offset = -Len * stride - Len;
154     search_site_count++;
155 
156     /* Compute offsets for search sites. */
157     x->ss[search_site_count].mv.col = Len;
158     x->ss[search_site_count].mv.row = -Len;
159     x->ss[search_site_count].offset = -Len * stride + Len;
160     search_site_count++;
161 
162     /* Compute offsets for search sites. */
163     x->ss[search_site_count].mv.col = -Len;
164     x->ss[search_site_count].mv.row = Len;
165     x->ss[search_site_count].offset = Len * stride - Len;
166     search_site_count++;
167 
168     /* Compute offsets for search sites. */
169     x->ss[search_site_count].mv.col = Len;
170     x->ss[search_site_count].mv.row = Len;
171     x->ss[search_site_count].offset = Len * stride + Len;
172     search_site_count++;
173 
174     /* Contract. */
175     Len /= 2;
176   }
177 
178   x->ss_count = search_site_count;
179   x->searches_per_step = 8;
180 }
181 
182 /*
183  * To avoid the penalty for crossing cache-line read, preload the reference
184  * area in a small buffer, which is aligned to make sure there won't be crossing
185  * cache-line read while reading from this buffer. This reduced the cpu
186  * cycles spent on reading ref data in sub-pixel filter functions.
187  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
188  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
189  * could reduce the area.
190  */
191 
192 /* estimated cost of a motion vector (r,c) */
193 #define MVC(r, c)                                                             \
194   (mvcost                                                                     \
195        ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
196        : 0)
197 /* pointer to predictor base of a motionvector */
198 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
199 /* convert motion vector component to offset for svf calc */
200 #define SP(x) (((x)&3) << 1)
201 /* returns subpixel variance error function. */
202 #define DIST(r, c) \
203   vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
204 #define IFMVCV(r, c, s, e) \
205   if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
206 /* returns distortion + motion vector cost */
207 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
208 /* checks if (r,c) has better score than previous best */
209 #define CHECK_BETTER(v, r, c)                           \
210   IFMVCV(r, c,                                          \
211          {                                              \
212            thismse = DIST(r, c);                        \
213            if ((v = (MVC(r, c) + thismse)) < besterr) { \
214              besterr = v;                               \
215              br = r;                                    \
216              bc = c;                                    \
217              *distortion = thismse;                     \
218              *sse1 = sse;                               \
219            }                                            \
220          },                                             \
221          v = UINT_MAX;)
222 
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)223 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
224                                              int_mv *bestmv, int_mv *ref_mv,
225                                              int error_per_bit,
226                                              const vp8_variance_fn_ptr_t *vfp,
227                                              int *mvcost[2], int *distortion,
228                                              unsigned int *sse1) {
229   unsigned char *z = (*(b->base_src) + b->src);
230 
231   int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
232   int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
233   int tr = br, tc = bc;
234   unsigned int besterr;
235   unsigned int left, right, up, down, diag;
236   unsigned int sse;
237   unsigned int whichdir;
238   unsigned int halfiters = 4;
239   unsigned int quarteriters = 4;
240   int thismse;
241 
242   int minc = VPXMAX(x->mv_col_min * 4,
243                     (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
244   int maxc = VPXMIN(x->mv_col_max * 4,
245                     (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
246   int minr = VPXMAX(x->mv_row_min * 4,
247                     (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
248   int maxr = VPXMIN(x->mv_row_max * 4,
249                     (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
250 
251   int y_stride;
252   int offset;
253   int pre_stride = x->e_mbd.pre.y_stride;
254   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
255 
256 #if ARCH_X86 || ARCH_X86_64
257   MACROBLOCKD *xd = &x->e_mbd;
258   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
259                        bestmv->as_mv.col;
260   unsigned char *y;
261   int buf_r1, buf_r2, buf_c1;
262 
263   /* Clamping to avoid out-of-range data access */
264   buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
265                ? (bestmv->as_mv.row - x->mv_row_min)
266                : 3;
267   buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
268                ? (x->mv_row_max - bestmv->as_mv.row)
269                : 3;
270   buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
271                ? (bestmv->as_mv.col - x->mv_col_min)
272                : 3;
273   y_stride = 32;
274 
275   /* Copy to intermediate buffer before searching. */
276   vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
277                y_stride, 16 + buf_r1 + buf_r2);
278   y = xd->y_buf + y_stride * buf_r1 + buf_c1;
279 #else
280   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
281                      bestmv->as_mv.col;
282   y_stride = pre_stride;
283 #endif
284 
285   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
286 
287   /* central mv */
288   bestmv->as_mv.row *= 8;
289   bestmv->as_mv.col *= 8;
290 
291   /* calculate central point error */
292   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
293   *distortion = besterr;
294   besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
295 
296   /* TODO: Each subsequent iteration checks at least one point in common
297    * with the last iteration could be 2 ( if diag selected)
298    */
299   while (--halfiters) {
300     /* 1/2 pel */
301     CHECK_BETTER(left, tr, tc - 2);
302     CHECK_BETTER(right, tr, tc + 2);
303     CHECK_BETTER(up, tr - 2, tc);
304     CHECK_BETTER(down, tr + 2, tc);
305 
306     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
307 
308     switch (whichdir) {
309       case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
310       case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
311       case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
312       case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
313     }
314 
315     /* no reason to check the same one again. */
316     if (tr == br && tc == bc) break;
317 
318     tr = br;
319     tc = bc;
320   }
321 
322   /* TODO: Each subsequent iteration checks at least one point in common
323    * with the last iteration could be 2 ( if diag selected)
324    */
325 
326   /* 1/4 pel */
327   while (--quarteriters) {
328     CHECK_BETTER(left, tr, tc - 1);
329     CHECK_BETTER(right, tr, tc + 1);
330     CHECK_BETTER(up, tr - 1, tc);
331     CHECK_BETTER(down, tr + 1, tc);
332 
333     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
334 
335     switch (whichdir) {
336       case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
337       case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
338       case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
339       case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
340     }
341 
342     /* no reason to check the same one again. */
343     if (tr == br && tc == bc) break;
344 
345     tr = br;
346     tc = bc;
347   }
348 
349   bestmv->as_mv.row = br * 2;
350   bestmv->as_mv.col = bc * 2;
351 
352   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
353       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
354     return INT_MAX;
355   }
356 
357   return besterr;
358 }
359 #undef MVC
360 #undef PRE
361 #undef SP
362 #undef DIST
363 #undef IFMVCV
364 #undef ERR
365 #undef CHECK_BETTER
366 
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)367 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
368                                  int_mv *bestmv, int_mv *ref_mv,
369                                  int error_per_bit,
370                                  const vp8_variance_fn_ptr_t *vfp,
371                                  int *mvcost[2], int *distortion,
372                                  unsigned int *sse1) {
373   int bestmse = INT_MAX;
374   int_mv startmv;
375   int_mv this_mv;
376   unsigned char *z = (*(b->base_src) + b->src);
377   int left, right, up, down, diag;
378   unsigned int sse;
379   int whichdir;
380   int thismse;
381   int y_stride;
382   int pre_stride = x->e_mbd.pre.y_stride;
383   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
384 
385 #if ARCH_X86 || ARCH_X86_64
386   MACROBLOCKD *xd = &x->e_mbd;
387   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
388                        bestmv->as_mv.col;
389   unsigned char *y;
390 
391   y_stride = 32;
392   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
393   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
394   y = xd->y_buf + y_stride + 1;
395 #else
396   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
397                      bestmv->as_mv.col;
398   y_stride = pre_stride;
399 #endif
400 
401   /* central mv */
402   bestmv->as_mv.row *= 8;
403   bestmv->as_mv.col *= 8;
404   startmv = *bestmv;
405 
406   /* calculate central point error */
407   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
408   *distortion = bestmse;
409   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
410 
411   /* go left then right and check error */
412   this_mv.as_mv.row = startmv.as_mv.row;
413   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
414   /* "halfpix" horizontal variance */
415   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
416   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
417 
418   if (left < bestmse) {
419     *bestmv = this_mv;
420     bestmse = left;
421     *distortion = thismse;
422     *sse1 = sse;
423   }
424 
425   this_mv.as_mv.col += 8;
426   /* "halfpix" horizontal variance */
427   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
428   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
429 
430   if (right < bestmse) {
431     *bestmv = this_mv;
432     bestmse = right;
433     *distortion = thismse;
434     *sse1 = sse;
435   }
436 
437   /* go up then down and check error */
438   this_mv.as_mv.col = startmv.as_mv.col;
439   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
440   /* "halfpix" vertical variance */
441   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
442   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
443 
444   if (up < bestmse) {
445     *bestmv = this_mv;
446     bestmse = up;
447     *distortion = thismse;
448     *sse1 = sse;
449   }
450 
451   this_mv.as_mv.row += 8;
452   /* "halfpix" vertical variance */
453   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
454   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
455 
456   if (down < bestmse) {
457     *bestmv = this_mv;
458     bestmse = down;
459     *distortion = thismse;
460     *sse1 = sse;
461   }
462 
463   /* now check 1 more diagonal */
464   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
465   this_mv = startmv;
466 
467   switch (whichdir) {
468     case 0:
469       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
470       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
471       /* "halfpix" horizontal/vertical variance */
472       thismse =
473           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
474       break;
475     case 1:
476       this_mv.as_mv.col += 4;
477       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
478       /* "halfpix" horizontal/vertical variance */
479       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
480       break;
481     case 2:
482       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
483       this_mv.as_mv.row += 4;
484       /* "halfpix" horizontal/vertical variance */
485       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
486       break;
487     case 3:
488     default:
489       this_mv.as_mv.col += 4;
490       this_mv.as_mv.row += 4;
491       /* "halfpix" horizontal/vertical variance */
492       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
493       break;
494   }
495 
496   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
497 
498   if (diag < bestmse) {
499     *bestmv = this_mv;
500     bestmse = diag;
501     *distortion = thismse;
502     *sse1 = sse;
503   }
504 
505   /* time to check quarter pels. */
506   if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
507 
508   if (bestmv->as_mv.col < startmv.as_mv.col) y--;
509 
510   startmv = *bestmv;
511 
512   /* go left then right and check error */
513   this_mv.as_mv.row = startmv.as_mv.row;
514 
515   if (startmv.as_mv.col & 7) {
516     this_mv.as_mv.col = startmv.as_mv.col - 2;
517     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
518                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
519   } else {
520     this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
521     thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
522                        b->src_stride, &sse);
523   }
524 
525   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
526 
527   if (left < bestmse) {
528     *bestmv = this_mv;
529     bestmse = left;
530     *distortion = thismse;
531     *sse1 = sse;
532   }
533 
534   this_mv.as_mv.col += 4;
535   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
536                      z, b->src_stride, &sse);
537   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
538 
539   if (right < bestmse) {
540     *bestmv = this_mv;
541     bestmse = right;
542     *distortion = thismse;
543     *sse1 = sse;
544   }
545 
546   /* go up then down and check error */
547   this_mv.as_mv.col = startmv.as_mv.col;
548 
549   if (startmv.as_mv.row & 7) {
550     this_mv.as_mv.row = startmv.as_mv.row - 2;
551     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
552                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
553   } else {
554     this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
555     thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
556                        b->src_stride, &sse);
557   }
558 
559   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
560 
561   if (up < bestmse) {
562     *bestmv = this_mv;
563     bestmse = up;
564     *distortion = thismse;
565     *sse1 = sse;
566   }
567 
568   this_mv.as_mv.row += 4;
569   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
570                      z, b->src_stride, &sse);
571   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
572 
573   if (down < bestmse) {
574     *bestmv = this_mv;
575     bestmse = down;
576     *distortion = thismse;
577     *sse1 = sse;
578   }
579 
580   /* now check 1 more diagonal */
581   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
582 
583   this_mv = startmv;
584 
585   switch (whichdir) {
586     case 0:
587 
588       if (startmv.as_mv.row & 7) {
589         this_mv.as_mv.row -= 2;
590 
591         if (startmv.as_mv.col & 7) {
592           this_mv.as_mv.col -= 2;
593           thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
594                              this_mv.as_mv.row & 7, z, b->src_stride, &sse);
595         } else {
596           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
597           thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
598                              b->src_stride, &sse);
599         }
600       } else {
601         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
602 
603         if (startmv.as_mv.col & 7) {
604           this_mv.as_mv.col -= 2;
605           thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
606                              z, b->src_stride, &sse);
607         } else {
608           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
609           thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
610                              &sse);
611         }
612       }
613 
614       break;
615     case 1:
616       this_mv.as_mv.col += 2;
617 
618       if (startmv.as_mv.row & 7) {
619         this_mv.as_mv.row -= 2;
620         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
621                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
622       } else {
623         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
624         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
625                            b->src_stride, &sse);
626       }
627 
628       break;
629     case 2:
630       this_mv.as_mv.row += 2;
631 
632       if (startmv.as_mv.col & 7) {
633         this_mv.as_mv.col -= 2;
634         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
635                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
636       } else {
637         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
638         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
639                            b->src_stride, &sse);
640       }
641 
642       break;
643     case 3:
644       this_mv.as_mv.col += 2;
645       this_mv.as_mv.row += 2;
646       thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
647                          this_mv.as_mv.row & 7, z, b->src_stride, &sse);
648       break;
649   }
650 
651   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
652 
653   if (diag < bestmse) {
654     *bestmv = this_mv;
655     bestmse = diag;
656     *distortion = thismse;
657     *sse1 = sse;
658   }
659 
660   return bestmse;
661 }
662 
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)663 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
664                                   int_mv *bestmv, int_mv *ref_mv,
665                                   int error_per_bit,
666                                   const vp8_variance_fn_ptr_t *vfp,
667                                   int *mvcost[2], int *distortion,
668                                   unsigned int *sse1) {
669   int bestmse = INT_MAX;
670   int_mv startmv;
671   int_mv this_mv;
672   unsigned char *z = (*(b->base_src) + b->src);
673   int left, right, up, down, diag;
674   unsigned int sse;
675   int whichdir;
676   int thismse;
677   int y_stride;
678   int pre_stride = x->e_mbd.pre.y_stride;
679   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
680 
681 #if ARCH_X86 || ARCH_X86_64
682   MACROBLOCKD *xd = &x->e_mbd;
683   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
684                        bestmv->as_mv.col;
685   unsigned char *y;
686 
687   y_stride = 32;
688   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
689   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
690   y = xd->y_buf + y_stride + 1;
691 #else
692   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
693                      bestmv->as_mv.col;
694   y_stride = pre_stride;
695 #endif
696 
697   /* central mv */
698   bestmv->as_mv.row *= 8;
699   bestmv->as_mv.col *= 8;
700   startmv = *bestmv;
701 
702   /* calculate central point error */
703   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
704   *distortion = bestmse;
705   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
706 
707   /* go left then right and check error */
708   this_mv.as_mv.row = startmv.as_mv.row;
709   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
710   /* "halfpix" horizontal variance */
711   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
712   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
713 
714   if (left < bestmse) {
715     *bestmv = this_mv;
716     bestmse = left;
717     *distortion = thismse;
718     *sse1 = sse;
719   }
720 
721   this_mv.as_mv.col += 8;
722   /* "halfpix" horizontal variance */
723   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
724   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
725 
726   if (right < bestmse) {
727     *bestmv = this_mv;
728     bestmse = right;
729     *distortion = thismse;
730     *sse1 = sse;
731   }
732 
733   /* go up then down and check error */
734   this_mv.as_mv.col = startmv.as_mv.col;
735   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
736   /* "halfpix" vertical variance */
737   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
738   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
739 
740   if (up < bestmse) {
741     *bestmv = this_mv;
742     bestmse = up;
743     *distortion = thismse;
744     *sse1 = sse;
745   }
746 
747   this_mv.as_mv.row += 8;
748   /* "halfpix" vertical variance */
749   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
750   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
751 
752   if (down < bestmse) {
753     *bestmv = this_mv;
754     bestmse = down;
755     *distortion = thismse;
756     *sse1 = sse;
757   }
758 
759   /* now check 1 more diagonal - */
760   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
761   this_mv = startmv;
762 
763   switch (whichdir) {
764     case 0:
765       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
766       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
767       /* "halfpix" horizontal/vertical variance */
768       thismse =
769           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
770       break;
771     case 1:
772       this_mv.as_mv.col += 4;
773       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
774       /* "halfpix" horizontal/vertical variance */
775       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
776       break;
777     case 2:
778       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
779       this_mv.as_mv.row += 4;
780       /* "halfpix" horizontal/vertical variance */
781       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
782       break;
783     case 3:
784     default:
785       this_mv.as_mv.col += 4;
786       this_mv.as_mv.row += 4;
787       /* "halfpix" horizontal/vertical variance */
788       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
789       break;
790   }
791 
792   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
793 
794   if (diag < bestmse) {
795     *bestmv = this_mv;
796     bestmse = diag;
797     *distortion = thismse;
798     *sse1 = sse;
799   }
800 
801   return bestmse;
802 }
803 
804 #define CHECK_BOUNDS(range)                    \
805   {                                            \
806     all_in = 1;                                \
807     all_in &= ((br - range) >= x->mv_row_min); \
808     all_in &= ((br + range) <= x->mv_row_max); \
809     all_in &= ((bc - range) >= x->mv_col_min); \
810     all_in &= ((bc + range) <= x->mv_col_max); \
811   }
812 
813 #define CHECK_POINT                                  \
814   {                                                  \
815     if (this_mv.as_mv.col < x->mv_col_min) continue; \
816     if (this_mv.as_mv.col > x->mv_col_max) continue; \
817     if (this_mv.as_mv.row < x->mv_row_min) continue; \
818     if (this_mv.as_mv.row > x->mv_row_max) continue; \
819   }
820 
821 #define CHECK_BETTER                                                     \
822   {                                                                      \
823     if (thissad < bestsad) {                                             \
824       thissad +=                                                         \
825           mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
826       if (thissad < bestsad) {                                           \
827         bestsad = thissad;                                               \
828         best_site = i;                                                   \
829       }                                                                  \
830     }                                                                    \
831   }
832 
833 static const MV next_chkpts[6][3] = {
834   { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
835   { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
836   { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
837 };
838 
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)839 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
840                    int_mv *best_mv, int search_param, int sad_per_bit,
841                    const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
842                    int *mvcost[2], int_mv *center_mv) {
843   MV hex[6] = {
844     { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
845   };
846   MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
847   int i, j;
848 
849   unsigned char *what = (*(b->base_src) + b->src);
850   int what_stride = b->src_stride;
851   int pre_stride = x->e_mbd.pre.y_stride;
852   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
853 
854   int in_what_stride = pre_stride;
855   int br, bc;
856   int_mv this_mv;
857   unsigned int bestsad;
858   unsigned int thissad;
859   unsigned char *base_offset;
860   unsigned char *this_offset;
861   int k = -1;
862   int all_in;
863   int best_site = -1;
864   int hex_range = 127;
865   int dia_range = 8;
866 
867   int_mv fcenter_mv;
868   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
869   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
870 
871   (void)mvcost;
872 
873   /* adjust ref_mv to make sure it is within MV range */
874   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
875                x->mv_row_max);
876   br = ref_mv->as_mv.row;
877   bc = ref_mv->as_mv.col;
878 
879   /* Work out the start point for the search */
880   base_offset = (unsigned char *)(base_pre + d->offset);
881   this_offset = base_offset + (br * (pre_stride)) + bc;
882   this_mv.as_mv.row = br;
883   this_mv.as_mv.col = bc;
884   bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
885             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
886 
887 #if CONFIG_MULTI_RES_ENCODING
888   /* Lower search range based on prediction info */
889   if (search_param >= 6)
890     goto cal_neighbors;
891   else if (search_param >= 5)
892     hex_range = 4;
893   else if (search_param >= 4)
894     hex_range = 6;
895   else if (search_param >= 3)
896     hex_range = 15;
897   else if (search_param >= 2)
898     hex_range = 31;
899   else if (search_param >= 1)
900     hex_range = 63;
901 
902   dia_range = 8;
903 #else
904   (void)search_param;
905 #endif
906 
907   /* hex search */
908   CHECK_BOUNDS(2)
909 
910   if (all_in) {
911     for (i = 0; i < 6; ++i) {
912       this_mv.as_mv.row = br + hex[i].row;
913       this_mv.as_mv.col = bc + hex[i].col;
914       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
915                     this_mv.as_mv.col;
916       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
917       CHECK_BETTER
918     }
919   } else {
920     for (i = 0; i < 6; ++i) {
921       this_mv.as_mv.row = br + hex[i].row;
922       this_mv.as_mv.col = bc + hex[i].col;
923       CHECK_POINT
924       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
925                     this_mv.as_mv.col;
926       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
927       CHECK_BETTER
928     }
929   }
930 
931   if (best_site == -1) {
932     goto cal_neighbors;
933   } else {
934     br += hex[best_site].row;
935     bc += hex[best_site].col;
936     k = best_site;
937   }
938 
939   for (j = 1; j < hex_range; ++j) {
940     best_site = -1;
941     CHECK_BOUNDS(2)
942 
943     if (all_in) {
944       for (i = 0; i < 3; ++i) {
945         this_mv.as_mv.row = br + next_chkpts[k][i].row;
946         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
947         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
948                       this_mv.as_mv.col;
949         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
950         CHECK_BETTER
951       }
952     } else {
953       for (i = 0; i < 3; ++i) {
954         this_mv.as_mv.row = br + next_chkpts[k][i].row;
955         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
956         CHECK_POINT
957         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
958                       this_mv.as_mv.col;
959         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
960         CHECK_BETTER
961       }
962     }
963 
964     if (best_site == -1) {
965       break;
966     } else {
967       br += next_chkpts[k][best_site].row;
968       bc += next_chkpts[k][best_site].col;
969       k += 5 + best_site;
970       if (k >= 12) {
971         k -= 12;
972       } else if (k >= 6) {
973         k -= 6;
974       }
975     }
976   }
977 
978 /* check 4 1-away neighbors */
979 cal_neighbors:
980   for (j = 0; j < dia_range; ++j) {
981     best_site = -1;
982     CHECK_BOUNDS(1)
983 
984     if (all_in) {
985       for (i = 0; i < 4; ++i) {
986         this_mv.as_mv.row = br + neighbors[i].row;
987         this_mv.as_mv.col = bc + neighbors[i].col;
988         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
989                       this_mv.as_mv.col;
990         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
991         CHECK_BETTER
992       }
993     } else {
994       for (i = 0; i < 4; ++i) {
995         this_mv.as_mv.row = br + neighbors[i].row;
996         this_mv.as_mv.col = bc + neighbors[i].col;
997         CHECK_POINT
998         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
999                       this_mv.as_mv.col;
1000         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1001         CHECK_BETTER
1002       }
1003     }
1004 
1005     if (best_site == -1) {
1006       break;
1007     } else {
1008       br += neighbors[best_site].row;
1009       bc += neighbors[best_site].col;
1010     }
1011   }
1012 
1013   best_mv->as_mv.row = br;
1014   best_mv->as_mv.col = bc;
1015 
1016   return bestsad;
1017 }
1018 #undef CHECK_BOUNDS
1019 #undef CHECK_POINT
1020 #undef CHECK_BETTER
1021 
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1022 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1023                              int_mv *best_mv, int search_param, int sad_per_bit,
1024                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1025                              int *mvcost[2], int_mv *center_mv) {
1026   int i, j, step;
1027 
1028   unsigned char *what = (*(b->base_src) + b->src);
1029   int what_stride = b->src_stride;
1030   unsigned char *in_what;
1031   int pre_stride = x->e_mbd.pre.y_stride;
1032   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1033   int in_what_stride = pre_stride;
1034   unsigned char *best_address;
1035 
1036   int tot_steps;
1037   int_mv this_mv;
1038 
1039   unsigned int bestsad;
1040   unsigned int thissad;
1041   int best_site = 0;
1042   int last_site = 0;
1043 
1044   int ref_row;
1045   int ref_col;
1046   int this_row_offset;
1047   int this_col_offset;
1048   search_site *ss;
1049 
1050   unsigned char *check_here;
1051 
1052   int *mvsadcost[2];
1053   int_mv fcenter_mv;
1054 
1055   mvsadcost[0] = x->mvsadcost[0];
1056   mvsadcost[1] = x->mvsadcost[1];
1057   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1058   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1059 
1060   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1061                x->mv_row_max);
1062   ref_row = ref_mv->as_mv.row;
1063   ref_col = ref_mv->as_mv.col;
1064   *num00 = 0;
1065   best_mv->as_mv.row = ref_row;
1066   best_mv->as_mv.col = ref_col;
1067 
1068   /* Work out the start point for the search */
1069   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1070                               ref_col);
1071   best_address = in_what;
1072 
1073   /* Check the starting position */
1074   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1075             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1076 
1077   /* search_param determines the length of the initial step and hence
1078    * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1079    * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1080    */
1081   ss = &x->ss[search_param * x->searches_per_step];
1082   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1083 
1084   i = 1;
1085 
1086   for (step = 0; step < tot_steps; ++step) {
1087     for (j = 0; j < x->searches_per_step; ++j) {
1088       /* Trap illegal vectors */
1089       this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1090       this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1091 
1092       if ((this_col_offset > x->mv_col_min) &&
1093           (this_col_offset < x->mv_col_max) &&
1094           (this_row_offset > x->mv_row_min) &&
1095           (this_row_offset < x->mv_row_max))
1096 
1097       {
1098         check_here = ss[i].offset + best_address;
1099         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1100 
1101         if (thissad < bestsad) {
1102           this_mv.as_mv.row = this_row_offset;
1103           this_mv.as_mv.col = this_col_offset;
1104           thissad +=
1105               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1106 
1107           if (thissad < bestsad) {
1108             bestsad = thissad;
1109             best_site = i;
1110           }
1111         }
1112       }
1113 
1114       i++;
1115     }
1116 
1117     if (best_site != last_site) {
1118       best_mv->as_mv.row += ss[best_site].mv.row;
1119       best_mv->as_mv.col += ss[best_site].mv.col;
1120       best_address += ss[best_site].offset;
1121       last_site = best_site;
1122     } else if (best_address == in_what) {
1123       (*num00)++;
1124     }
1125   }
1126 
1127   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1128   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1129 
1130   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1131          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1132 }
1133 
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1134 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1135                              int_mv *best_mv, int search_param, int sad_per_bit,
1136                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1137                              int *mvcost[2], int_mv *center_mv) {
1138   int i, j, step;
1139 
1140   unsigned char *what = (*(b->base_src) + b->src);
1141   int what_stride = b->src_stride;
1142   unsigned char *in_what;
1143   int pre_stride = x->e_mbd.pre.y_stride;
1144   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1145   int in_what_stride = pre_stride;
1146   unsigned char *best_address;
1147 
1148   int tot_steps;
1149   int_mv this_mv;
1150 
1151   unsigned int bestsad;
1152   unsigned int thissad;
1153   int best_site = 0;
1154   int last_site = 0;
1155 
1156   int ref_row;
1157   int ref_col;
1158   int this_row_offset;
1159   int this_col_offset;
1160   search_site *ss;
1161 
1162   unsigned char *check_here;
1163 
1164   int *mvsadcost[2];
1165   int_mv fcenter_mv;
1166 
1167   mvsadcost[0] = x->mvsadcost[0];
1168   mvsadcost[1] = x->mvsadcost[1];
1169   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1170   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1171 
1172   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1173                x->mv_row_max);
1174   ref_row = ref_mv->as_mv.row;
1175   ref_col = ref_mv->as_mv.col;
1176   *num00 = 0;
1177   best_mv->as_mv.row = ref_row;
1178   best_mv->as_mv.col = ref_col;
1179 
1180   /* Work out the start point for the search */
1181   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1182                               ref_col);
1183   best_address = in_what;
1184 
1185   /* Check the starting position */
1186   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1187             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1188 
1189   /* search_param determines the length of the initial step and hence the
1190    * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1191    * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1192    */
1193   ss = &x->ss[search_param * x->searches_per_step];
1194   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1195 
1196   i = 1;
1197 
1198   for (step = 0; step < tot_steps; ++step) {
1199     int all_in = 1, t;
1200 
1201     /* To know if all neighbor points are within the bounds, 4 bounds
1202      * checking are enough instead of checking 4 bounds for each
1203      * points.
1204      */
1205     all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1206     all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1207     all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1208     all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1209 
1210     if (all_in) {
1211       unsigned int sad_array[4];
1212 
1213       for (j = 0; j < x->searches_per_step; j += 4) {
1214         const unsigned char *block_offset[4];
1215 
1216         for (t = 0; t < 4; ++t) {
1217           block_offset[t] = ss[i + t].offset + best_address;
1218         }
1219 
1220         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1221                        sad_array);
1222 
1223         for (t = 0; t < 4; t++, i++) {
1224           if (sad_array[t] < bestsad) {
1225             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1226             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1227             sad_array[t] +=
1228                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1229 
1230             if (sad_array[t] < bestsad) {
1231               bestsad = sad_array[t];
1232               best_site = i;
1233             }
1234           }
1235         }
1236       }
1237     } else {
1238       for (j = 0; j < x->searches_per_step; ++j) {
1239         /* Trap illegal vectors */
1240         this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1241         this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1242 
1243         if ((this_col_offset > x->mv_col_min) &&
1244             (this_col_offset < x->mv_col_max) &&
1245             (this_row_offset > x->mv_row_min) &&
1246             (this_row_offset < x->mv_row_max)) {
1247           check_here = ss[i].offset + best_address;
1248           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1249 
1250           if (thissad < bestsad) {
1251             this_mv.as_mv.row = this_row_offset;
1252             this_mv.as_mv.col = this_col_offset;
1253             thissad +=
1254                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1255 
1256             if (thissad < bestsad) {
1257               bestsad = thissad;
1258               best_site = i;
1259             }
1260           }
1261         }
1262         i++;
1263       }
1264     }
1265 
1266     if (best_site != last_site) {
1267       best_mv->as_mv.row += ss[best_site].mv.row;
1268       best_mv->as_mv.col += ss[best_site].mv.col;
1269       best_address += ss[best_site].offset;
1270       last_site = best_site;
1271     } else if (best_address == in_what) {
1272       (*num00)++;
1273     }
1274   }
1275 
1276   this_mv.as_mv.row = best_mv->as_mv.row * 8;
1277   this_mv.as_mv.col = best_mv->as_mv.col * 8;
1278 
1279   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1280          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1281 }
1282 
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1283 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1284                           int sad_per_bit, int distance,
1285                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1286                           int_mv *center_mv) {
1287   unsigned char *what = (*(b->base_src) + b->src);
1288   int what_stride = b->src_stride;
1289   unsigned char *in_what;
1290   int pre_stride = x->e_mbd.pre.y_stride;
1291   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1292   int in_what_stride = pre_stride;
1293   int mv_stride = pre_stride;
1294   unsigned char *bestaddress;
1295   int_mv *best_mv = &d->bmi.mv;
1296   int_mv this_mv;
1297   unsigned int bestsad;
1298   unsigned int thissad;
1299   int r, c;
1300 
1301   unsigned char *check_here;
1302 
1303   int ref_row = ref_mv->as_mv.row;
1304   int ref_col = ref_mv->as_mv.col;
1305 
1306   int row_min = ref_row - distance;
1307   int row_max = ref_row + distance;
1308   int col_min = ref_col - distance;
1309   int col_max = ref_col + distance;
1310 
1311   int *mvsadcost[2];
1312   int_mv fcenter_mv;
1313 
1314   mvsadcost[0] = x->mvsadcost[0];
1315   mvsadcost[1] = x->mvsadcost[1];
1316   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1317   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1318 
1319   /* Work out the mid point for the search */
1320   in_what = base_pre + d->offset;
1321   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1322 
1323   best_mv->as_mv.row = ref_row;
1324   best_mv->as_mv.col = ref_col;
1325 
1326   /* Baseline value at the centre */
1327   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1328             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1329 
1330   /* Apply further limits to prevent us looking using vectors that
1331    * stretch beyiond the UMV border
1332    */
1333   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1334 
1335   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1336 
1337   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1338 
1339   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1340 
1341   for (r = row_min; r < row_max; ++r) {
1342     this_mv.as_mv.row = r;
1343     check_here = r * mv_stride + in_what + col_min;
1344 
1345     for (c = col_min; c < col_max; ++c) {
1346       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1347 
1348       this_mv.as_mv.col = c;
1349       thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1350 
1351       if (thissad < bestsad) {
1352         bestsad = thissad;
1353         best_mv->as_mv.row = r;
1354         best_mv->as_mv.col = c;
1355         bestaddress = check_here;
1356       }
1357 
1358       check_here++;
1359     }
1360   }
1361 
1362   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1363   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1364 
1365   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1366          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1367 }
1368 
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1369 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1370                           int sad_per_bit, int distance,
1371                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1372                           int_mv *center_mv) {
1373   unsigned char *what = (*(b->base_src) + b->src);
1374   int what_stride = b->src_stride;
1375   unsigned char *in_what;
1376   int pre_stride = x->e_mbd.pre.y_stride;
1377   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1378   int in_what_stride = pre_stride;
1379   int mv_stride = pre_stride;
1380   unsigned char *bestaddress;
1381   int_mv *best_mv = &d->bmi.mv;
1382   int_mv this_mv;
1383   unsigned int bestsad;
1384   unsigned int thissad;
1385   int r, c;
1386 
1387   unsigned char *check_here;
1388 
1389   int ref_row = ref_mv->as_mv.row;
1390   int ref_col = ref_mv->as_mv.col;
1391 
1392   int row_min = ref_row - distance;
1393   int row_max = ref_row + distance;
1394   int col_min = ref_col - distance;
1395   int col_max = ref_col + distance;
1396 
1397   unsigned int sad_array[3];
1398 
1399   int *mvsadcost[2];
1400   int_mv fcenter_mv;
1401 
1402   mvsadcost[0] = x->mvsadcost[0];
1403   mvsadcost[1] = x->mvsadcost[1];
1404   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1405   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1406 
1407   /* Work out the mid point for the search */
1408   in_what = base_pre + d->offset;
1409   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1410 
1411   best_mv->as_mv.row = ref_row;
1412   best_mv->as_mv.col = ref_col;
1413 
1414   /* Baseline value at the centre */
1415   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1416             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1417 
1418   /* Apply further limits to prevent us looking using vectors that stretch
1419    * beyond the UMV border
1420    */
1421   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1422 
1423   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1424 
1425   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1426 
1427   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1428 
1429   for (r = row_min; r < row_max; ++r) {
1430     this_mv.as_mv.row = r;
1431     check_here = r * mv_stride + in_what + col_min;
1432     c = col_min;
1433 
1434     while ((c + 2) < col_max) {
1435       int i;
1436 
1437       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1438 
1439       for (i = 0; i < 3; ++i) {
1440         thissad = sad_array[i];
1441 
1442         if (thissad < bestsad) {
1443           this_mv.as_mv.col = c;
1444           thissad +=
1445               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1446 
1447           if (thissad < bestsad) {
1448             bestsad = thissad;
1449             best_mv->as_mv.row = r;
1450             best_mv->as_mv.col = c;
1451             bestaddress = check_here;
1452           }
1453         }
1454 
1455         check_here++;
1456         c++;
1457       }
1458     }
1459 
1460     while (c < col_max) {
1461       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1462 
1463       if (thissad < bestsad) {
1464         this_mv.as_mv.col = c;
1465         thissad +=
1466             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1467 
1468         if (thissad < bestsad) {
1469           bestsad = thissad;
1470           best_mv->as_mv.row = r;
1471           best_mv->as_mv.col = c;
1472           bestaddress = check_here;
1473         }
1474       }
1475 
1476       check_here++;
1477       c++;
1478     }
1479   }
1480 
1481   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1482   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1483 
1484   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1485          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1486 }
1487 
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1488 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1489                           int sad_per_bit, int distance,
1490                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1491                           int_mv *center_mv) {
1492   unsigned char *what = (*(b->base_src) + b->src);
1493   int what_stride = b->src_stride;
1494   int pre_stride = x->e_mbd.pre.y_stride;
1495   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1496   unsigned char *in_what;
1497   int in_what_stride = pre_stride;
1498   int mv_stride = pre_stride;
1499   unsigned char *bestaddress;
1500   int_mv *best_mv = &d->bmi.mv;
1501   int_mv this_mv;
1502   unsigned int bestsad;
1503   unsigned int thissad;
1504   int r, c;
1505 
1506   unsigned char *check_here;
1507 
1508   int ref_row = ref_mv->as_mv.row;
1509   int ref_col = ref_mv->as_mv.col;
1510 
1511   int row_min = ref_row - distance;
1512   int row_max = ref_row + distance;
1513   int col_min = ref_col - distance;
1514   int col_max = ref_col + distance;
1515 
1516   DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1517   unsigned int sad_array[3];
1518 
1519   int *mvsadcost[2];
1520   int_mv fcenter_mv;
1521 
1522   mvsadcost[0] = x->mvsadcost[0];
1523   mvsadcost[1] = x->mvsadcost[1];
1524   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1525   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1526 
1527   /* Work out the mid point for the search */
1528   in_what = base_pre + d->offset;
1529   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1530 
1531   best_mv->as_mv.row = ref_row;
1532   best_mv->as_mv.col = ref_col;
1533 
1534   /* Baseline value at the centre */
1535   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1536             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1537 
1538   /* Apply further limits to prevent us looking using vectors that stretch
1539    * beyond the UMV border
1540    */
1541   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1542 
1543   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1544 
1545   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1546 
1547   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1548 
1549   for (r = row_min; r < row_max; ++r) {
1550     this_mv.as_mv.row = r;
1551     check_here = r * mv_stride + in_what + col_min;
1552     c = col_min;
1553 
1554     while ((c + 7) < col_max) {
1555       int i;
1556 
1557       fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1558 
1559       for (i = 0; i < 8; ++i) {
1560         thissad = sad_array8[i];
1561 
1562         if (thissad < bestsad) {
1563           this_mv.as_mv.col = c;
1564           thissad +=
1565               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1566 
1567           if (thissad < bestsad) {
1568             bestsad = thissad;
1569             best_mv->as_mv.row = r;
1570             best_mv->as_mv.col = c;
1571             bestaddress = check_here;
1572           }
1573         }
1574 
1575         check_here++;
1576         c++;
1577       }
1578     }
1579 
1580     while ((c + 2) < col_max) {
1581       int i;
1582 
1583       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1584 
1585       for (i = 0; i < 3; ++i) {
1586         thissad = sad_array[i];
1587 
1588         if (thissad < bestsad) {
1589           this_mv.as_mv.col = c;
1590           thissad +=
1591               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1592 
1593           if (thissad < bestsad) {
1594             bestsad = thissad;
1595             best_mv->as_mv.row = r;
1596             best_mv->as_mv.col = c;
1597             bestaddress = check_here;
1598           }
1599         }
1600 
1601         check_here++;
1602         c++;
1603       }
1604     }
1605 
1606     while (c < col_max) {
1607       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1608 
1609       if (thissad < bestsad) {
1610         this_mv.as_mv.col = c;
1611         thissad +=
1612             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1613 
1614         if (thissad < bestsad) {
1615           bestsad = thissad;
1616           best_mv->as_mv.row = r;
1617           best_mv->as_mv.col = c;
1618           bestaddress = check_here;
1619         }
1620       }
1621 
1622       check_here++;
1623       c++;
1624     }
1625   }
1626 
1627   this_mv.as_mv.row = best_mv->as_mv.row * 8;
1628   this_mv.as_mv.col = best_mv->as_mv.col * 8;
1629 
1630   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1631          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1632 }
1633 
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1634 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1635                               int_mv *ref_mv, int error_per_bit,
1636                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1637                               int *mvcost[2], int_mv *center_mv) {
1638   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1639   int i, j;
1640   short this_row_offset, this_col_offset;
1641 
1642   int what_stride = b->src_stride;
1643   int pre_stride = x->e_mbd.pre.y_stride;
1644   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1645   int in_what_stride = pre_stride;
1646   unsigned char *what = (*(b->base_src) + b->src);
1647   unsigned char *best_address =
1648       (unsigned char *)(base_pre + d->offset +
1649                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1650   unsigned char *check_here;
1651   int_mv this_mv;
1652   unsigned int bestsad;
1653   unsigned int thissad;
1654 
1655   int *mvsadcost[2];
1656   int_mv fcenter_mv;
1657 
1658   mvsadcost[0] = x->mvsadcost[0];
1659   mvsadcost[1] = x->mvsadcost[1];
1660   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1661   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1662 
1663   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1664             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1665 
1666   for (i = 0; i < search_range; ++i) {
1667     int best_site = -1;
1668 
1669     for (j = 0; j < 4; ++j) {
1670       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1671       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1672 
1673       if ((this_col_offset > x->mv_col_min) &&
1674           (this_col_offset < x->mv_col_max) &&
1675           (this_row_offset > x->mv_row_min) &&
1676           (this_row_offset < x->mv_row_max)) {
1677         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1678                      best_address;
1679         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1680 
1681         if (thissad < bestsad) {
1682           this_mv.as_mv.row = this_row_offset;
1683           this_mv.as_mv.col = this_col_offset;
1684           thissad +=
1685               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1686 
1687           if (thissad < bestsad) {
1688             bestsad = thissad;
1689             best_site = j;
1690           }
1691         }
1692       }
1693     }
1694 
1695     if (best_site == -1) {
1696       break;
1697     } else {
1698       ref_mv->as_mv.row += neighbors[best_site].row;
1699       ref_mv->as_mv.col += neighbors[best_site].col;
1700       best_address += (neighbors[best_site].row) * in_what_stride +
1701                       neighbors[best_site].col;
1702     }
1703   }
1704 
1705   this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1706   this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1707 
1708   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1709          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1710 }
1711 
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1712 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1713                               int_mv *ref_mv, int error_per_bit,
1714                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1715                               int *mvcost[2], int_mv *center_mv) {
1716   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1717   int i, j;
1718   short this_row_offset, this_col_offset;
1719 
1720   int what_stride = b->src_stride;
1721   int pre_stride = x->e_mbd.pre.y_stride;
1722   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1723   int in_what_stride = pre_stride;
1724   unsigned char *what = (*(b->base_src) + b->src);
1725   unsigned char *best_address =
1726       (unsigned char *)(base_pre + d->offset +
1727                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1728   unsigned char *check_here;
1729   int_mv this_mv;
1730   unsigned int bestsad;
1731   unsigned int thissad;
1732 
1733   int *mvsadcost[2];
1734   int_mv fcenter_mv;
1735 
1736   mvsadcost[0] = x->mvsadcost[0];
1737   mvsadcost[1] = x->mvsadcost[1];
1738   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1739   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1740 
1741   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1742             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1743 
1744   for (i = 0; i < search_range; ++i) {
1745     int best_site = -1;
1746     int all_in = 1;
1747 
1748     all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1749     all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1750     all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1751     all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1752 
1753     if (all_in) {
1754       unsigned int sad_array[4];
1755       const unsigned char *block_offset[4];
1756       block_offset[0] = best_address - in_what_stride;
1757       block_offset[1] = best_address - 1;
1758       block_offset[2] = best_address + 1;
1759       block_offset[3] = best_address + in_what_stride;
1760 
1761       fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1762                      sad_array);
1763 
1764       for (j = 0; j < 4; ++j) {
1765         if (sad_array[j] < bestsad) {
1766           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1767           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1768           sad_array[j] +=
1769               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1770 
1771           if (sad_array[j] < bestsad) {
1772             bestsad = sad_array[j];
1773             best_site = j;
1774           }
1775         }
1776       }
1777     } else {
1778       for (j = 0; j < 4; ++j) {
1779         this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1780         this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1781 
1782         if ((this_col_offset > x->mv_col_min) &&
1783             (this_col_offset < x->mv_col_max) &&
1784             (this_row_offset > x->mv_row_min) &&
1785             (this_row_offset < x->mv_row_max)) {
1786           check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1787                        best_address;
1788           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1789 
1790           if (thissad < bestsad) {
1791             this_mv.as_mv.row = this_row_offset;
1792             this_mv.as_mv.col = this_col_offset;
1793             thissad +=
1794                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1795 
1796             if (thissad < bestsad) {
1797               bestsad = thissad;
1798               best_site = j;
1799             }
1800           }
1801         }
1802       }
1803     }
1804 
1805     if (best_site == -1) {
1806       break;
1807     } else {
1808       ref_mv->as_mv.row += neighbors[best_site].row;
1809       ref_mv->as_mv.col += neighbors[best_site].col;
1810       best_address += (neighbors[best_site].row) * in_what_stride +
1811                       neighbors[best_site].col;
1812     }
1813   }
1814 
1815   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1816   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1817 
1818   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1819          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1820 }
1821 
1822 #ifdef VP8_ENTROPY_STATS
print_mode_context(void)1823 void print_mode_context(void) {
1824   FILE *f = fopen("modecont.c", "w");
1825   int i, j;
1826 
1827   fprintf(f, "#include \"entropy.h\"\n");
1828   fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1829   fprintf(f, "{\n");
1830 
1831   for (j = 0; j < 6; ++j) {
1832     fprintf(f, "  { /* %d */\n", j);
1833     fprintf(f, "    ");
1834 
1835     for (i = 0; i < 4; ++i) {
1836       int overal_prob;
1837       int this_prob;
1838       int count;
1839 
1840       /* Overall probs */
1841       count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1842 
1843       if (count)
1844         overal_prob = 256 * mv_mode_cts[i][0] / count;
1845       else
1846         overal_prob = 128;
1847 
1848       if (overal_prob == 0) overal_prob = 1;
1849 
1850       /* context probs */
1851       count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1852 
1853       if (count)
1854         this_prob = 256 * mv_ref_ct[j][i][0] / count;
1855       else
1856         this_prob = 128;
1857 
1858       if (this_prob == 0) this_prob = 1;
1859 
1860       fprintf(f, "%5d, ", this_prob);
1861     }
1862 
1863     fprintf(f, "  },\n");
1864   }
1865 
1866   fprintf(f, "};\n");
1867   fclose(f);
1868 }
1869 
1870 /* MV ref count VP8_ENTROPY_STATS stats code */
1871 #ifdef VP8_ENTROPY_STATS
init_mv_ref_counts()1872 void init_mv_ref_counts() {
1873   memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1874   memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1875 }
1876 
accum_mv_refs(MB_PREDICTION_MODE m,const int ct[4])1877 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
1878   if (m == ZEROMV) {
1879     ++mv_ref_ct[ct[0]][0][0];
1880     ++mv_mode_cts[0][0];
1881   } else {
1882     ++mv_ref_ct[ct[0]][0][1];
1883     ++mv_mode_cts[0][1];
1884 
1885     if (m == NEARESTMV) {
1886       ++mv_ref_ct[ct[1]][1][0];
1887       ++mv_mode_cts[1][0];
1888     } else {
1889       ++mv_ref_ct[ct[1]][1][1];
1890       ++mv_mode_cts[1][1];
1891 
1892       if (m == NEARMV) {
1893         ++mv_ref_ct[ct[2]][2][0];
1894         ++mv_mode_cts[2][0];
1895       } else {
1896         ++mv_ref_ct[ct[2]][2][1];
1897         ++mv_mode_cts[2][1];
1898 
1899         if (m == NEWMV) {
1900           ++mv_ref_ct[ct[3]][3][0];
1901           ++mv_mode_cts[3][0];
1902         } else {
1903           ++mv_ref_ct[ct[3]][3][1];
1904           ++mv_mode_cts[3][1];
1905         }
1906       }
1907     }
1908   }
1909 }
1910 
1911 #endif /* END MV ref count VP8_ENTROPY_STATS stats code */
1912 
1913 #endif
1914