1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
13 #include "onyx_int.h"
14 #include "mcomp.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
17 #include <stdio.h>
18 #include <limits.h>
19 #include <math.h>
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
23 
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)24 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
25   /* MV costing is based on the distribution of vectors in the previous
26    * frame and as such will tend to over state the cost of vectors. In
27    * addition coding a new vector can have a knock on effect on the cost
28    * of subsequent vectors and the quality of prediction from NEAR and
29    * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
30    * limited extent, for some account to be taken of these factors.
31    */
32   const int mv_idx_row =
33       clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
34   const int mv_idx_col =
35       clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
36   return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
37 }
38 
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
40                        int error_per_bit) {
41   /* Ignore mv costing if mvcost is NULL */
42   if (mvcost) {
43     const int mv_idx_row =
44         clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
45     const int mv_idx_col =
46         clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
47     return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
48             128) >>
49            8;
50   }
51   return 0;
52 }
53 
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)54 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
55                           int error_per_bit) {
56   /* Calculate sad error cost on full pixel basis. */
57   /* Ignore mv costing if mvsadcost is NULL */
58   if (mvsadcost) {
59     return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
60              mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
61                 error_per_bit +
62             128) >>
63            8;
64   }
65   return 0;
66 }
67 
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)68 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
69   int Len;
70   int search_site_count = 0;
71 
72   /* Generate offsets for 4 search sites per step. */
73   Len = MAX_FIRST_STEP;
74   x->ss[search_site_count].mv.col = 0;
75   x->ss[search_site_count].mv.row = 0;
76   x->ss[search_site_count].offset = 0;
77   search_site_count++;
78 
79   while (Len > 0) {
80     /* Compute offsets for search sites. */
81     x->ss[search_site_count].mv.col = 0;
82     x->ss[search_site_count].mv.row = -Len;
83     x->ss[search_site_count].offset = -Len * stride;
84     search_site_count++;
85 
86     /* Compute offsets for search sites. */
87     x->ss[search_site_count].mv.col = 0;
88     x->ss[search_site_count].mv.row = Len;
89     x->ss[search_site_count].offset = Len * stride;
90     search_site_count++;
91 
92     /* Compute offsets for search sites. */
93     x->ss[search_site_count].mv.col = -Len;
94     x->ss[search_site_count].mv.row = 0;
95     x->ss[search_site_count].offset = -Len;
96     search_site_count++;
97 
98     /* Compute offsets for search sites. */
99     x->ss[search_site_count].mv.col = Len;
100     x->ss[search_site_count].mv.row = 0;
101     x->ss[search_site_count].offset = Len;
102     search_site_count++;
103 
104     /* Contract. */
105     Len /= 2;
106   }
107 
108   x->ss_count = search_site_count;
109   x->searches_per_step = 4;
110 }
111 
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)112 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
113   int Len;
114   int search_site_count = 0;
115 
116   /* Generate offsets for 8 search sites per step. */
117   Len = MAX_FIRST_STEP;
118   x->ss[search_site_count].mv.col = 0;
119   x->ss[search_site_count].mv.row = 0;
120   x->ss[search_site_count].offset = 0;
121   search_site_count++;
122 
123   while (Len > 0) {
124     /* Compute offsets for search sites. */
125     x->ss[search_site_count].mv.col = 0;
126     x->ss[search_site_count].mv.row = -Len;
127     x->ss[search_site_count].offset = -Len * stride;
128     search_site_count++;
129 
130     /* Compute offsets for search sites. */
131     x->ss[search_site_count].mv.col = 0;
132     x->ss[search_site_count].mv.row = Len;
133     x->ss[search_site_count].offset = Len * stride;
134     search_site_count++;
135 
136     /* Compute offsets for search sites. */
137     x->ss[search_site_count].mv.col = -Len;
138     x->ss[search_site_count].mv.row = 0;
139     x->ss[search_site_count].offset = -Len;
140     search_site_count++;
141 
142     /* Compute offsets for search sites. */
143     x->ss[search_site_count].mv.col = Len;
144     x->ss[search_site_count].mv.row = 0;
145     x->ss[search_site_count].offset = Len;
146     search_site_count++;
147 
148     /* Compute offsets for search sites. */
149     x->ss[search_site_count].mv.col = -Len;
150     x->ss[search_site_count].mv.row = -Len;
151     x->ss[search_site_count].offset = -Len * stride - Len;
152     search_site_count++;
153 
154     /* Compute offsets for search sites. */
155     x->ss[search_site_count].mv.col = Len;
156     x->ss[search_site_count].mv.row = -Len;
157     x->ss[search_site_count].offset = -Len * stride + Len;
158     search_site_count++;
159 
160     /* Compute offsets for search sites. */
161     x->ss[search_site_count].mv.col = -Len;
162     x->ss[search_site_count].mv.row = Len;
163     x->ss[search_site_count].offset = Len * stride - Len;
164     search_site_count++;
165 
166     /* Compute offsets for search sites. */
167     x->ss[search_site_count].mv.col = Len;
168     x->ss[search_site_count].mv.row = Len;
169     x->ss[search_site_count].offset = Len * stride + Len;
170     search_site_count++;
171 
172     /* Contract. */
173     Len /= 2;
174   }
175 
176   x->ss_count = search_site_count;
177   x->searches_per_step = 8;
178 }
179 
180 /*
181  * To avoid the penalty for crossing cache-line read, preload the reference
182  * area in a small buffer, which is aligned to make sure there won't be crossing
183  * cache-line read while reading from this buffer. This reduced the cpu
184  * cycles spent on reading ref data in sub-pixel filter functions.
185  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
186  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
187  * could reduce the area.
188  */
189 
190 /* estimated cost of a motion vector (r,c) */
191 #define MVC(r, c)                                                             \
192   (mvcost                                                                     \
193        ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
194        : 0)
195 /* pointer to predictor base of a motionvector */
196 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
197 /* convert motion vector component to offset for svf calc */
198 #define SP(x) (((x)&3) << 1)
199 /* returns subpixel variance error function. */
200 #define DIST(r, c) \
201   vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
202 #define IFMVCV(r, c, s, e) \
203   if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
204 /* returns distortion + motion vector cost */
205 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
206 /* checks if (r,c) has better score than previous best */
207 #define CHECK_BETTER(v, r, c)                           \
208   IFMVCV(r, c,                                          \
209          {                                              \
210            thismse = DIST(r, c);                        \
211            if ((v = (MVC(r, c) + thismse)) < besterr) { \
212              besterr = v;                               \
213              br = r;                                    \
214              bc = c;                                    \
215              *distortion = thismse;                     \
216              *sse1 = sse;                               \
217            }                                            \
218          },                                             \
219          v = UINT_MAX;)
220 
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)221 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
222                                              int_mv *bestmv, int_mv *ref_mv,
223                                              int error_per_bit,
224                                              const vp8_variance_fn_ptr_t *vfp,
225                                              int *mvcost[2], int *distortion,
226                                              unsigned int *sse1) {
227   unsigned char *z = (*(b->base_src) + b->src);
228 
229   int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
230   int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
231   int tr = br, tc = bc;
232   unsigned int besterr;
233   unsigned int left, right, up, down, diag;
234   unsigned int sse;
235   unsigned int whichdir;
236   unsigned int halfiters = 4;
237   unsigned int quarteriters = 4;
238   int thismse;
239 
240   int minc = VPXMAX(x->mv_col_min * 4,
241                     (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
242   int maxc = VPXMIN(x->mv_col_max * 4,
243                     (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
244   int minr = VPXMAX(x->mv_row_min * 4,
245                     (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
246   int maxr = VPXMIN(x->mv_row_max * 4,
247                     (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
248 
249   int y_stride;
250   int offset;
251   int pre_stride = x->e_mbd.pre.y_stride;
252   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
253 
254 #if ARCH_X86 || ARCH_X86_64
255   MACROBLOCKD *xd = &x->e_mbd;
256   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
257                        bestmv->as_mv.col;
258   unsigned char *y;
259   int buf_r1, buf_r2, buf_c1;
260 
261   /* Clamping to avoid out-of-range data access */
262   buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
263                ? (bestmv->as_mv.row - x->mv_row_min)
264                : 3;
265   buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
266                ? (x->mv_row_max - bestmv->as_mv.row)
267                : 3;
268   buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
269                ? (bestmv->as_mv.col - x->mv_col_min)
270                : 3;
271   y_stride = 32;
272 
273   /* Copy to intermediate buffer before searching. */
274   vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
275                y_stride, 16 + buf_r1 + buf_r2);
276   y = xd->y_buf + y_stride * buf_r1 + buf_c1;
277 #else
278   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
279                      bestmv->as_mv.col;
280   y_stride = pre_stride;
281 #endif
282 
283   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
284 
285   /* central mv */
286   bestmv->as_mv.row *= 8;
287   bestmv->as_mv.col *= 8;
288 
289   /* calculate central point error */
290   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
291   *distortion = besterr;
292   besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
293 
294   /* TODO: Each subsequent iteration checks at least one point in common
295    * with the last iteration could be 2 ( if diag selected)
296    */
297   while (--halfiters) {
298     /* 1/2 pel */
299     CHECK_BETTER(left, tr, tc - 2);
300     CHECK_BETTER(right, tr, tc + 2);
301     CHECK_BETTER(up, tr - 2, tc);
302     CHECK_BETTER(down, tr + 2, tc);
303 
304     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
305 
306     switch (whichdir) {
307       case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
308       case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
309       case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
310       case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
311     }
312 
313     /* no reason to check the same one again. */
314     if (tr == br && tc == bc) break;
315 
316     tr = br;
317     tc = bc;
318   }
319 
320   /* TODO: Each subsequent iteration checks at least one point in common
321    * with the last iteration could be 2 ( if diag selected)
322    */
323 
324   /* 1/4 pel */
325   while (--quarteriters) {
326     CHECK_BETTER(left, tr, tc - 1);
327     CHECK_BETTER(right, tr, tc + 1);
328     CHECK_BETTER(up, tr - 1, tc);
329     CHECK_BETTER(down, tr + 1, tc);
330 
331     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
332 
333     switch (whichdir) {
334       case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
335       case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
336       case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
337       case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
338     }
339 
340     /* no reason to check the same one again. */
341     if (tr == br && tc == bc) break;
342 
343     tr = br;
344     tc = bc;
345   }
346 
347   bestmv->as_mv.row = br * 2;
348   bestmv->as_mv.col = bc * 2;
349 
350   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
351       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
352     return INT_MAX;
353   }
354 
355   return besterr;
356 }
357 #undef MVC
358 #undef PRE
359 #undef SP
360 #undef DIST
361 #undef IFMVCV
362 #undef ERR
363 #undef CHECK_BETTER
364 
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)365 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
366                                  int_mv *bestmv, int_mv *ref_mv,
367                                  int error_per_bit,
368                                  const vp8_variance_fn_ptr_t *vfp,
369                                  int *mvcost[2], int *distortion,
370                                  unsigned int *sse1) {
371   int bestmse = INT_MAX;
372   int_mv startmv;
373   int_mv this_mv;
374   unsigned char *z = (*(b->base_src) + b->src);
375   int left, right, up, down, diag;
376   unsigned int sse;
377   int whichdir;
378   int thismse;
379   int y_stride;
380   int pre_stride = x->e_mbd.pre.y_stride;
381   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
382 
383 #if ARCH_X86 || ARCH_X86_64
384   MACROBLOCKD *xd = &x->e_mbd;
385   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
386                        bestmv->as_mv.col;
387   unsigned char *y;
388 
389   y_stride = 32;
390   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
391   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
392   y = xd->y_buf + y_stride + 1;
393 #else
394   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
395                      bestmv->as_mv.col;
396   y_stride = pre_stride;
397 #endif
398 
399   /* central mv */
400   bestmv->as_mv.row *= 8;
401   bestmv->as_mv.col *= 8;
402   startmv = *bestmv;
403 
404   /* calculate central point error */
405   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
406   *distortion = bestmse;
407   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
408 
409   /* go left then right and check error */
410   this_mv.as_mv.row = startmv.as_mv.row;
411   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
412   /* "halfpix" horizontal variance */
413   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
414   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
415 
416   if (left < bestmse) {
417     *bestmv = this_mv;
418     bestmse = left;
419     *distortion = thismse;
420     *sse1 = sse;
421   }
422 
423   this_mv.as_mv.col += 8;
424   /* "halfpix" horizontal variance */
425   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
426   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
427 
428   if (right < bestmse) {
429     *bestmv = this_mv;
430     bestmse = right;
431     *distortion = thismse;
432     *sse1 = sse;
433   }
434 
435   /* go up then down and check error */
436   this_mv.as_mv.col = startmv.as_mv.col;
437   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
438   /* "halfpix" vertical variance */
439   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
440   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
441 
442   if (up < bestmse) {
443     *bestmv = this_mv;
444     bestmse = up;
445     *distortion = thismse;
446     *sse1 = sse;
447   }
448 
449   this_mv.as_mv.row += 8;
450   /* "halfpix" vertical variance */
451   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
452   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
453 
454   if (down < bestmse) {
455     *bestmv = this_mv;
456     bestmse = down;
457     *distortion = thismse;
458     *sse1 = sse;
459   }
460 
461   /* now check 1 more diagonal */
462   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
463   this_mv = startmv;
464 
465   switch (whichdir) {
466     case 0:
467       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
468       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
469       /* "halfpix" horizontal/vertical variance */
470       thismse =
471           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
472       break;
473     case 1:
474       this_mv.as_mv.col += 4;
475       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
476       /* "halfpix" horizontal/vertical variance */
477       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
478       break;
479     case 2:
480       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
481       this_mv.as_mv.row += 4;
482       /* "halfpix" horizontal/vertical variance */
483       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
484       break;
485     case 3:
486     default:
487       this_mv.as_mv.col += 4;
488       this_mv.as_mv.row += 4;
489       /* "halfpix" horizontal/vertical variance */
490       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
491       break;
492   }
493 
494   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
495 
496   if (diag < bestmse) {
497     *bestmv = this_mv;
498     bestmse = diag;
499     *distortion = thismse;
500     *sse1 = sse;
501   }
502 
503   /* time to check quarter pels. */
504   if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
505 
506   if (bestmv->as_mv.col < startmv.as_mv.col) y--;
507 
508   startmv = *bestmv;
509 
510   /* go left then right and check error */
511   this_mv.as_mv.row = startmv.as_mv.row;
512 
513   if (startmv.as_mv.col & 7) {
514     this_mv.as_mv.col = startmv.as_mv.col - 2;
515     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
516                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
517   } else {
518     this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
519     thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
520                        b->src_stride, &sse);
521   }
522 
523   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
524 
525   if (left < bestmse) {
526     *bestmv = this_mv;
527     bestmse = left;
528     *distortion = thismse;
529     *sse1 = sse;
530   }
531 
532   this_mv.as_mv.col += 4;
533   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
534                      z, b->src_stride, &sse);
535   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
536 
537   if (right < bestmse) {
538     *bestmv = this_mv;
539     bestmse = right;
540     *distortion = thismse;
541     *sse1 = sse;
542   }
543 
544   /* go up then down and check error */
545   this_mv.as_mv.col = startmv.as_mv.col;
546 
547   if (startmv.as_mv.row & 7) {
548     this_mv.as_mv.row = startmv.as_mv.row - 2;
549     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
550                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
551   } else {
552     this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
553     thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
554                        b->src_stride, &sse);
555   }
556 
557   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
558 
559   if (up < bestmse) {
560     *bestmv = this_mv;
561     bestmse = up;
562     *distortion = thismse;
563     *sse1 = sse;
564   }
565 
566   this_mv.as_mv.row += 4;
567   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
568                      z, b->src_stride, &sse);
569   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
570 
571   if (down < bestmse) {
572     *bestmv = this_mv;
573     bestmse = down;
574     *distortion = thismse;
575     *sse1 = sse;
576   }
577 
578   /* now check 1 more diagonal */
579   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
580 
581   this_mv = startmv;
582 
583   switch (whichdir) {
584     case 0:
585 
586       if (startmv.as_mv.row & 7) {
587         this_mv.as_mv.row -= 2;
588 
589         if (startmv.as_mv.col & 7) {
590           this_mv.as_mv.col -= 2;
591           thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
592                              this_mv.as_mv.row & 7, z, b->src_stride, &sse);
593         } else {
594           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
595           thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
596                              b->src_stride, &sse);
597         }
598       } else {
599         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
600 
601         if (startmv.as_mv.col & 7) {
602           this_mv.as_mv.col -= 2;
603           thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
604                              z, b->src_stride, &sse);
605         } else {
606           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
607           thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
608                              &sse);
609         }
610       }
611 
612       break;
613     case 1:
614       this_mv.as_mv.col += 2;
615 
616       if (startmv.as_mv.row & 7) {
617         this_mv.as_mv.row -= 2;
618         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
619                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
620       } else {
621         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
622         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
623                            b->src_stride, &sse);
624       }
625 
626       break;
627     case 2:
628       this_mv.as_mv.row += 2;
629 
630       if (startmv.as_mv.col & 7) {
631         this_mv.as_mv.col -= 2;
632         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
633                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
634       } else {
635         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
636         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
637                            b->src_stride, &sse);
638       }
639 
640       break;
641     case 3:
642       this_mv.as_mv.col += 2;
643       this_mv.as_mv.row += 2;
644       thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
645                          this_mv.as_mv.row & 7, z, b->src_stride, &sse);
646       break;
647   }
648 
649   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
650 
651   if (diag < bestmse) {
652     *bestmv = this_mv;
653     bestmse = diag;
654     *distortion = thismse;
655     *sse1 = sse;
656   }
657 
658   return bestmse;
659 }
660 
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)661 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
662                                   int_mv *bestmv, int_mv *ref_mv,
663                                   int error_per_bit,
664                                   const vp8_variance_fn_ptr_t *vfp,
665                                   int *mvcost[2], int *distortion,
666                                   unsigned int *sse1) {
667   int bestmse = INT_MAX;
668   int_mv startmv;
669   int_mv this_mv;
670   unsigned char *z = (*(b->base_src) + b->src);
671   int left, right, up, down, diag;
672   unsigned int sse;
673   int whichdir;
674   int thismse;
675   int y_stride;
676   int pre_stride = x->e_mbd.pre.y_stride;
677   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
678 
679 #if ARCH_X86 || ARCH_X86_64
680   MACROBLOCKD *xd = &x->e_mbd;
681   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
682                        bestmv->as_mv.col;
683   unsigned char *y;
684 
685   y_stride = 32;
686   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
687   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
688   y = xd->y_buf + y_stride + 1;
689 #else
690   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
691                      bestmv->as_mv.col;
692   y_stride = pre_stride;
693 #endif
694 
695   /* central mv */
696   bestmv->as_mv.row *= 8;
697   bestmv->as_mv.col *= 8;
698   startmv = *bestmv;
699 
700   /* calculate central point error */
701   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
702   *distortion = bestmse;
703   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
704 
705   /* go left then right and check error */
706   this_mv.as_mv.row = startmv.as_mv.row;
707   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
708   /* "halfpix" horizontal variance */
709   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
710   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
711 
712   if (left < bestmse) {
713     *bestmv = this_mv;
714     bestmse = left;
715     *distortion = thismse;
716     *sse1 = sse;
717   }
718 
719   this_mv.as_mv.col += 8;
720   /* "halfpix" horizontal variance */
721   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
722   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
723 
724   if (right < bestmse) {
725     *bestmv = this_mv;
726     bestmse = right;
727     *distortion = thismse;
728     *sse1 = sse;
729   }
730 
731   /* go up then down and check error */
732   this_mv.as_mv.col = startmv.as_mv.col;
733   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
734   /* "halfpix" vertical variance */
735   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
736   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
737 
738   if (up < bestmse) {
739     *bestmv = this_mv;
740     bestmse = up;
741     *distortion = thismse;
742     *sse1 = sse;
743   }
744 
745   this_mv.as_mv.row += 8;
746   /* "halfpix" vertical variance */
747   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
748   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
749 
750   if (down < bestmse) {
751     *bestmv = this_mv;
752     bestmse = down;
753     *distortion = thismse;
754     *sse1 = sse;
755   }
756 
757   /* now check 1 more diagonal - */
758   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
759   this_mv = startmv;
760 
761   switch (whichdir) {
762     case 0:
763       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
764       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
765       /* "halfpix" horizontal/vertical variance */
766       thismse =
767           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
768       break;
769     case 1:
770       this_mv.as_mv.col += 4;
771       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
772       /* "halfpix" horizontal/vertical variance */
773       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
774       break;
775     case 2:
776       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
777       this_mv.as_mv.row += 4;
778       /* "halfpix" horizontal/vertical variance */
779       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
780       break;
781     case 3:
782     default:
783       this_mv.as_mv.col += 4;
784       this_mv.as_mv.row += 4;
785       /* "halfpix" horizontal/vertical variance */
786       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
787       break;
788   }
789 
790   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
791 
792   if (diag < bestmse) {
793     *bestmv = this_mv;
794     bestmse = diag;
795     *distortion = thismse;
796     *sse1 = sse;
797   }
798 
799   return bestmse;
800 }
801 
802 #define CHECK_BOUNDS(range)                    \
803   {                                            \
804     all_in = 1;                                \
805     all_in &= ((br - range) >= x->mv_row_min); \
806     all_in &= ((br + range) <= x->mv_row_max); \
807     all_in &= ((bc - range) >= x->mv_col_min); \
808     all_in &= ((bc + range) <= x->mv_col_max); \
809   }
810 
811 #define CHECK_POINT                                  \
812   {                                                  \
813     if (this_mv.as_mv.col < x->mv_col_min) continue; \
814     if (this_mv.as_mv.col > x->mv_col_max) continue; \
815     if (this_mv.as_mv.row < x->mv_row_min) continue; \
816     if (this_mv.as_mv.row > x->mv_row_max) continue; \
817   }
818 
819 #define CHECK_BETTER                                                     \
820   {                                                                      \
821     if (thissad < bestsad) {                                             \
822       thissad +=                                                         \
823           mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
824       if (thissad < bestsad) {                                           \
825         bestsad = thissad;                                               \
826         best_site = i;                                                   \
827       }                                                                  \
828     }                                                                    \
829   }
830 
831 static const MV next_chkpts[6][3] = {
832   { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
833   { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
834   { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
835 };
836 
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)837 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
838                    int_mv *best_mv, int search_param, int sad_per_bit,
839                    const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
840                    int *mvcost[2], int_mv *center_mv) {
841   MV hex[6] = {
842     { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
843   };
844   MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
845   int i, j;
846 
847   unsigned char *what = (*(b->base_src) + b->src);
848   int what_stride = b->src_stride;
849   int pre_stride = x->e_mbd.pre.y_stride;
850   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
851 
852   int in_what_stride = pre_stride;
853   int br, bc;
854   int_mv this_mv;
855   unsigned int bestsad;
856   unsigned int thissad;
857   unsigned char *base_offset;
858   unsigned char *this_offset;
859   int k = -1;
860   int all_in;
861   int best_site = -1;
862   int hex_range = 127;
863   int dia_range = 8;
864 
865   int_mv fcenter_mv;
866   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
867   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
868 
869   (void)mvcost;
870 
871   /* adjust ref_mv to make sure it is within MV range */
872   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
873                x->mv_row_max);
874   br = ref_mv->as_mv.row;
875   bc = ref_mv->as_mv.col;
876 
877   /* Work out the start point for the search */
878   base_offset = (unsigned char *)(base_pre + d->offset);
879   this_offset = base_offset + (br * (pre_stride)) + bc;
880   this_mv.as_mv.row = br;
881   this_mv.as_mv.col = bc;
882   bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
883             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
884 
885 #if CONFIG_MULTI_RES_ENCODING
886   /* Lower search range based on prediction info */
887   if (search_param >= 6)
888     goto cal_neighbors;
889   else if (search_param >= 5)
890     hex_range = 4;
891   else if (search_param >= 4)
892     hex_range = 6;
893   else if (search_param >= 3)
894     hex_range = 15;
895   else if (search_param >= 2)
896     hex_range = 31;
897   else if (search_param >= 1)
898     hex_range = 63;
899 
900   dia_range = 8;
901 #else
902   (void)search_param;
903 #endif
904 
905   /* hex search */
906   CHECK_BOUNDS(2)
907 
908   if (all_in) {
909     for (i = 0; i < 6; ++i) {
910       this_mv.as_mv.row = br + hex[i].row;
911       this_mv.as_mv.col = bc + hex[i].col;
912       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
913                     this_mv.as_mv.col;
914       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
915       CHECK_BETTER
916     }
917   } else {
918     for (i = 0; i < 6; ++i) {
919       this_mv.as_mv.row = br + hex[i].row;
920       this_mv.as_mv.col = bc + hex[i].col;
921       CHECK_POINT
922       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
923                     this_mv.as_mv.col;
924       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
925       CHECK_BETTER
926     }
927   }
928 
929   if (best_site == -1) {
930     goto cal_neighbors;
931   } else {
932     br += hex[best_site].row;
933     bc += hex[best_site].col;
934     k = best_site;
935   }
936 
937   for (j = 1; j < hex_range; ++j) {
938     best_site = -1;
939     CHECK_BOUNDS(2)
940 
941     if (all_in) {
942       for (i = 0; i < 3; ++i) {
943         this_mv.as_mv.row = br + next_chkpts[k][i].row;
944         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
945         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
946                       this_mv.as_mv.col;
947         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
948         CHECK_BETTER
949       }
950     } else {
951       for (i = 0; i < 3; ++i) {
952         this_mv.as_mv.row = br + next_chkpts[k][i].row;
953         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
954         CHECK_POINT
955         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
956                       this_mv.as_mv.col;
957         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
958         CHECK_BETTER
959       }
960     }
961 
962     if (best_site == -1) {
963       break;
964     } else {
965       br += next_chkpts[k][best_site].row;
966       bc += next_chkpts[k][best_site].col;
967       k += 5 + best_site;
968       if (k >= 12) {
969         k -= 12;
970       } else if (k >= 6) {
971         k -= 6;
972       }
973     }
974   }
975 
976 /* check 4 1-away neighbors */
977 cal_neighbors:
978   for (j = 0; j < dia_range; ++j) {
979     best_site = -1;
980     CHECK_BOUNDS(1)
981 
982     if (all_in) {
983       for (i = 0; i < 4; ++i) {
984         this_mv.as_mv.row = br + neighbors[i].row;
985         this_mv.as_mv.col = bc + neighbors[i].col;
986         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
987                       this_mv.as_mv.col;
988         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
989         CHECK_BETTER
990       }
991     } else {
992       for (i = 0; i < 4; ++i) {
993         this_mv.as_mv.row = br + neighbors[i].row;
994         this_mv.as_mv.col = bc + neighbors[i].col;
995         CHECK_POINT
996         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
997                       this_mv.as_mv.col;
998         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
999         CHECK_BETTER
1000       }
1001     }
1002 
1003     if (best_site == -1) {
1004       break;
1005     } else {
1006       br += neighbors[best_site].row;
1007       bc += neighbors[best_site].col;
1008     }
1009   }
1010 
1011   best_mv->as_mv.row = br;
1012   best_mv->as_mv.col = bc;
1013 
1014   return bestsad;
1015 }
1016 #undef CHECK_BOUNDS
1017 #undef CHECK_POINT
1018 #undef CHECK_BETTER
1019 
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1020 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1021                              int_mv *best_mv, int search_param, int sad_per_bit,
1022                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1023                              int *mvcost[2], int_mv *center_mv) {
1024   int i, j, step;
1025 
1026   unsigned char *what = (*(b->base_src) + b->src);
1027   int what_stride = b->src_stride;
1028   unsigned char *in_what;
1029   int pre_stride = x->e_mbd.pre.y_stride;
1030   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1031   int in_what_stride = pre_stride;
1032   unsigned char *best_address;
1033 
1034   int tot_steps;
1035   int_mv this_mv;
1036 
1037   unsigned int bestsad;
1038   unsigned int thissad;
1039   int best_site = 0;
1040   int last_site = 0;
1041 
1042   int ref_row;
1043   int ref_col;
1044   int this_row_offset;
1045   int this_col_offset;
1046   search_site *ss;
1047 
1048   unsigned char *check_here;
1049 
1050   int *mvsadcost[2];
1051   int_mv fcenter_mv;
1052 
1053   mvsadcost[0] = x->mvsadcost[0];
1054   mvsadcost[1] = x->mvsadcost[1];
1055   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1056   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1057 
1058   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1059                x->mv_row_max);
1060   ref_row = ref_mv->as_mv.row;
1061   ref_col = ref_mv->as_mv.col;
1062   *num00 = 0;
1063   best_mv->as_mv.row = ref_row;
1064   best_mv->as_mv.col = ref_col;
1065 
1066   /* Work out the start point for the search */
1067   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1068                               ref_col);
1069   best_address = in_what;
1070 
1071   /* Check the starting position */
1072   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1073             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1074 
1075   /* search_param determines the length of the initial step and hence
1076    * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1077    * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1078    */
1079   ss = &x->ss[search_param * x->searches_per_step];
1080   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1081 
1082   i = 1;
1083 
1084   for (step = 0; step < tot_steps; ++step) {
1085     for (j = 0; j < x->searches_per_step; ++j) {
1086       /* Trap illegal vectors */
1087       this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1088       this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1089 
1090       if ((this_col_offset > x->mv_col_min) &&
1091           (this_col_offset < x->mv_col_max) &&
1092           (this_row_offset > x->mv_row_min) &&
1093           (this_row_offset < x->mv_row_max))
1094 
1095       {
1096         check_here = ss[i].offset + best_address;
1097         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1098 
1099         if (thissad < bestsad) {
1100           this_mv.as_mv.row = this_row_offset;
1101           this_mv.as_mv.col = this_col_offset;
1102           thissad +=
1103               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1104 
1105           if (thissad < bestsad) {
1106             bestsad = thissad;
1107             best_site = i;
1108           }
1109         }
1110       }
1111 
1112       i++;
1113     }
1114 
1115     if (best_site != last_site) {
1116       best_mv->as_mv.row += ss[best_site].mv.row;
1117       best_mv->as_mv.col += ss[best_site].mv.col;
1118       best_address += ss[best_site].offset;
1119       last_site = best_site;
1120     } else if (best_address == in_what) {
1121       (*num00)++;
1122     }
1123   }
1124 
1125   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1126   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1127 
1128   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1129          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1130 }
1131 
1132 #if HAVE_SSE2 || HAVE_MSA
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1133 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1134                              int_mv *best_mv, int search_param, int sad_per_bit,
1135                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1136                              int *mvcost[2], int_mv *center_mv) {
1137   int i, j, step;
1138 
1139   unsigned char *what = (*(b->base_src) + b->src);
1140   int what_stride = b->src_stride;
1141   unsigned char *in_what;
1142   int pre_stride = x->e_mbd.pre.y_stride;
1143   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1144   int in_what_stride = pre_stride;
1145   unsigned char *best_address;
1146 
1147   int tot_steps;
1148   int_mv this_mv;
1149 
1150   unsigned int bestsad;
1151   unsigned int thissad;
1152   int best_site = 0;
1153   int last_site = 0;
1154 
1155   int ref_row;
1156   int ref_col;
1157   int this_row_offset;
1158   int this_col_offset;
1159   search_site *ss;
1160 
1161   unsigned char *check_here;
1162 
1163   int *mvsadcost[2];
1164   int_mv fcenter_mv;
1165 
1166   mvsadcost[0] = x->mvsadcost[0];
1167   mvsadcost[1] = x->mvsadcost[1];
1168   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1169   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1170 
1171   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1172                x->mv_row_max);
1173   ref_row = ref_mv->as_mv.row;
1174   ref_col = ref_mv->as_mv.col;
1175   *num00 = 0;
1176   best_mv->as_mv.row = ref_row;
1177   best_mv->as_mv.col = ref_col;
1178 
1179   /* Work out the start point for the search */
1180   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1181                               ref_col);
1182   best_address = in_what;
1183 
1184   /* Check the starting position */
1185   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1186             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1187 
1188   /* search_param determines the length of the initial step and hence the
1189    * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1190    * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1191    */
1192   ss = &x->ss[search_param * x->searches_per_step];
1193   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1194 
1195   i = 1;
1196 
1197   for (step = 0; step < tot_steps; ++step) {
1198     int all_in = 1, t;
1199 
1200     /* To know if all neighbor points are within the bounds, 4 bounds
1201      * checking are enough instead of checking 4 bounds for each
1202      * points.
1203      */
1204     all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1205     all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1206     all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1207     all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1208 
1209     if (all_in) {
1210       unsigned int sad_array[4];
1211 
1212       for (j = 0; j < x->searches_per_step; j += 4) {
1213         const unsigned char *block_offset[4];
1214 
1215         for (t = 0; t < 4; ++t) {
1216           block_offset[t] = ss[i + t].offset + best_address;
1217         }
1218 
1219         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1220                        sad_array);
1221 
1222         for (t = 0; t < 4; t++, i++) {
1223           if (sad_array[t] < bestsad) {
1224             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1225             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1226             sad_array[t] +=
1227                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1228 
1229             if (sad_array[t] < bestsad) {
1230               bestsad = sad_array[t];
1231               best_site = i;
1232             }
1233           }
1234         }
1235       }
1236     } else {
1237       for (j = 0; j < x->searches_per_step; ++j) {
1238         /* Trap illegal vectors */
1239         this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1240         this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1241 
1242         if ((this_col_offset > x->mv_col_min) &&
1243             (this_col_offset < x->mv_col_max) &&
1244             (this_row_offset > x->mv_row_min) &&
1245             (this_row_offset < x->mv_row_max)) {
1246           check_here = ss[i].offset + best_address;
1247           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1248 
1249           if (thissad < bestsad) {
1250             this_mv.as_mv.row = this_row_offset;
1251             this_mv.as_mv.col = this_col_offset;
1252             thissad +=
1253                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1254 
1255             if (thissad < bestsad) {
1256               bestsad = thissad;
1257               best_site = i;
1258             }
1259           }
1260         }
1261         i++;
1262       }
1263     }
1264 
1265     if (best_site != last_site) {
1266       best_mv->as_mv.row += ss[best_site].mv.row;
1267       best_mv->as_mv.col += ss[best_site].mv.col;
1268       best_address += ss[best_site].offset;
1269       last_site = best_site;
1270     } else if (best_address == in_what) {
1271       (*num00)++;
1272     }
1273   }
1274 
1275   this_mv.as_mv.row = best_mv->as_mv.row * 8;
1276   this_mv.as_mv.col = best_mv->as_mv.col * 8;
1277 
1278   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1279          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1280 }
1281 #endif  // HAVE_SSE2 || HAVE_MSA
1282 
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1283 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1284                           int sad_per_bit, int distance,
1285                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1286                           int_mv *center_mv) {
1287   unsigned char *what = (*(b->base_src) + b->src);
1288   int what_stride = b->src_stride;
1289   unsigned char *in_what;
1290   int pre_stride = x->e_mbd.pre.y_stride;
1291   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1292   int in_what_stride = pre_stride;
1293   int mv_stride = pre_stride;
1294   unsigned char *bestaddress;
1295   int_mv *best_mv = &d->bmi.mv;
1296   int_mv this_mv;
1297   unsigned int bestsad;
1298   unsigned int thissad;
1299   int r, c;
1300 
1301   unsigned char *check_here;
1302 
1303   int ref_row = ref_mv->as_mv.row;
1304   int ref_col = ref_mv->as_mv.col;
1305 
1306   int row_min = ref_row - distance;
1307   int row_max = ref_row + distance;
1308   int col_min = ref_col - distance;
1309   int col_max = ref_col + distance;
1310 
1311   int *mvsadcost[2];
1312   int_mv fcenter_mv;
1313 
1314   mvsadcost[0] = x->mvsadcost[0];
1315   mvsadcost[1] = x->mvsadcost[1];
1316   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1317   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1318 
1319   /* Work out the mid point for the search */
1320   in_what = base_pre + d->offset;
1321   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1322 
1323   best_mv->as_mv.row = ref_row;
1324   best_mv->as_mv.col = ref_col;
1325 
1326   /* Baseline value at the centre */
1327   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1328             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1329 
1330   /* Apply further limits to prevent us looking using vectors that
1331    * stretch beyiond the UMV border
1332    */
1333   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1334 
1335   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1336 
1337   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1338 
1339   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1340 
1341   for (r = row_min; r < row_max; ++r) {
1342     this_mv.as_mv.row = r;
1343     check_here = r * mv_stride + in_what + col_min;
1344 
1345     for (c = col_min; c < col_max; ++c) {
1346       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1347 
1348       this_mv.as_mv.col = c;
1349       thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1350 
1351       if (thissad < bestsad) {
1352         bestsad = thissad;
1353         best_mv->as_mv.row = r;
1354         best_mv->as_mv.col = c;
1355         bestaddress = check_here;
1356       }
1357 
1358       check_here++;
1359     }
1360   }
1361 
1362   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1363   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1364 
1365   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1366          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1367 }
1368 
1369 #if HAVE_SSSE3
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1370 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1371                           int sad_per_bit, int distance,
1372                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1373                           int_mv *center_mv) {
1374   unsigned char *what = (*(b->base_src) + b->src);
1375   int what_stride = b->src_stride;
1376   unsigned char *in_what;
1377   int pre_stride = x->e_mbd.pre.y_stride;
1378   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1379   int in_what_stride = pre_stride;
1380   int mv_stride = pre_stride;
1381   unsigned char *bestaddress;
1382   int_mv *best_mv = &d->bmi.mv;
1383   int_mv this_mv;
1384   unsigned int bestsad;
1385   unsigned int thissad;
1386   int r, c;
1387 
1388   unsigned char *check_here;
1389 
1390   int ref_row = ref_mv->as_mv.row;
1391   int ref_col = ref_mv->as_mv.col;
1392 
1393   int row_min = ref_row - distance;
1394   int row_max = ref_row + distance;
1395   int col_min = ref_col - distance;
1396   int col_max = ref_col + distance;
1397 
1398   unsigned int sad_array[3];
1399 
1400   int *mvsadcost[2];
1401   int_mv fcenter_mv;
1402 
1403   mvsadcost[0] = x->mvsadcost[0];
1404   mvsadcost[1] = x->mvsadcost[1];
1405   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1406   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1407 
1408   /* Work out the mid point for the search */
1409   in_what = base_pre + d->offset;
1410   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1411 
1412   best_mv->as_mv.row = ref_row;
1413   best_mv->as_mv.col = ref_col;
1414 
1415   /* Baseline value at the centre */
1416   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1417             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1418 
1419   /* Apply further limits to prevent us looking using vectors that stretch
1420    * beyond the UMV border
1421    */
1422   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1423 
1424   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1425 
1426   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1427 
1428   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1429 
1430   for (r = row_min; r < row_max; ++r) {
1431     this_mv.as_mv.row = r;
1432     check_here = r * mv_stride + in_what + col_min;
1433     c = col_min;
1434 
1435     while ((c + 2) < col_max) {
1436       int i;
1437 
1438       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1439 
1440       for (i = 0; i < 3; ++i) {
1441         thissad = sad_array[i];
1442 
1443         if (thissad < bestsad) {
1444           this_mv.as_mv.col = c;
1445           thissad +=
1446               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1447 
1448           if (thissad < bestsad) {
1449             bestsad = thissad;
1450             best_mv->as_mv.row = r;
1451             best_mv->as_mv.col = c;
1452             bestaddress = check_here;
1453           }
1454         }
1455 
1456         check_here++;
1457         c++;
1458       }
1459     }
1460 
1461     while (c < col_max) {
1462       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1463 
1464       if (thissad < bestsad) {
1465         this_mv.as_mv.col = c;
1466         thissad +=
1467             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1468 
1469         if (thissad < bestsad) {
1470           bestsad = thissad;
1471           best_mv->as_mv.row = r;
1472           best_mv->as_mv.col = c;
1473           bestaddress = check_here;
1474         }
1475       }
1476 
1477       check_here++;
1478       c++;
1479     }
1480   }
1481 
1482   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1483   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1484 
1485   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1486          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1487 }
1488 #endif  // HAVE_SSSE3
1489 
1490 #if HAVE_SSE4_1
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1491 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1492                           int sad_per_bit, int distance,
1493                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1494                           int_mv *center_mv) {
1495   unsigned char *what = (*(b->base_src) + b->src);
1496   int what_stride = b->src_stride;
1497   int pre_stride = x->e_mbd.pre.y_stride;
1498   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1499   unsigned char *in_what;
1500   int in_what_stride = pre_stride;
1501   int mv_stride = pre_stride;
1502   unsigned char *bestaddress;
1503   int_mv *best_mv = &d->bmi.mv;
1504   int_mv this_mv;
1505   unsigned int bestsad;
1506   unsigned int thissad;
1507   int r, c;
1508 
1509   unsigned char *check_here;
1510 
1511   int ref_row = ref_mv->as_mv.row;
1512   int ref_col = ref_mv->as_mv.col;
1513 
1514   int row_min = ref_row - distance;
1515   int row_max = ref_row + distance;
1516   int col_min = ref_col - distance;
1517   int col_max = ref_col + distance;
1518 
1519   DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1520   unsigned int sad_array[3];
1521 
1522   int *mvsadcost[2];
1523   int_mv fcenter_mv;
1524 
1525   mvsadcost[0] = x->mvsadcost[0];
1526   mvsadcost[1] = x->mvsadcost[1];
1527   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1528   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1529 
1530   /* Work out the mid point for the search */
1531   in_what = base_pre + d->offset;
1532   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1533 
1534   best_mv->as_mv.row = ref_row;
1535   best_mv->as_mv.col = ref_col;
1536 
1537   /* Baseline value at the centre */
1538   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1539             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1540 
1541   /* Apply further limits to prevent us looking using vectors that stretch
1542    * beyond the UMV border
1543    */
1544   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1545 
1546   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1547 
1548   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1549 
1550   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1551 
1552   for (r = row_min; r < row_max; ++r) {
1553     this_mv.as_mv.row = r;
1554     check_here = r * mv_stride + in_what + col_min;
1555     c = col_min;
1556 
1557     while ((c + 7) < col_max) {
1558       int i;
1559 
1560       fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1561 
1562       for (i = 0; i < 8; ++i) {
1563         thissad = sad_array8[i];
1564 
1565         if (thissad < bestsad) {
1566           this_mv.as_mv.col = c;
1567           thissad +=
1568               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1569 
1570           if (thissad < bestsad) {
1571             bestsad = thissad;
1572             best_mv->as_mv.row = r;
1573             best_mv->as_mv.col = c;
1574             bestaddress = check_here;
1575           }
1576         }
1577 
1578         check_here++;
1579         c++;
1580       }
1581     }
1582 
1583     while ((c + 2) < col_max) {
1584       int i;
1585 
1586       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1587 
1588       for (i = 0; i < 3; ++i) {
1589         thissad = sad_array[i];
1590 
1591         if (thissad < bestsad) {
1592           this_mv.as_mv.col = c;
1593           thissad +=
1594               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1595 
1596           if (thissad < bestsad) {
1597             bestsad = thissad;
1598             best_mv->as_mv.row = r;
1599             best_mv->as_mv.col = c;
1600             bestaddress = check_here;
1601           }
1602         }
1603 
1604         check_here++;
1605         c++;
1606       }
1607     }
1608 
1609     while (c < col_max) {
1610       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1611 
1612       if (thissad < bestsad) {
1613         this_mv.as_mv.col = c;
1614         thissad +=
1615             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1616 
1617         if (thissad < bestsad) {
1618           bestsad = thissad;
1619           best_mv->as_mv.row = r;
1620           best_mv->as_mv.col = c;
1621           bestaddress = check_here;
1622         }
1623       }
1624 
1625       check_here++;
1626       c++;
1627     }
1628   }
1629 
1630   this_mv.as_mv.row = best_mv->as_mv.row * 8;
1631   this_mv.as_mv.col = best_mv->as_mv.col * 8;
1632 
1633   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1634          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1635 }
1636 #endif  // HAVE_SSE4_1
1637 
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1638 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1639                               int_mv *ref_mv, int error_per_bit,
1640                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1641                               int *mvcost[2], int_mv *center_mv) {
1642   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1643   int i, j;
1644   short this_row_offset, this_col_offset;
1645 
1646   int what_stride = b->src_stride;
1647   int pre_stride = x->e_mbd.pre.y_stride;
1648   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1649   int in_what_stride = pre_stride;
1650   unsigned char *what = (*(b->base_src) + b->src);
1651   unsigned char *best_address =
1652       (unsigned char *)(base_pre + d->offset +
1653                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1654   unsigned char *check_here;
1655   int_mv this_mv;
1656   unsigned int bestsad;
1657   unsigned int thissad;
1658 
1659   int *mvsadcost[2];
1660   int_mv fcenter_mv;
1661 
1662   mvsadcost[0] = x->mvsadcost[0];
1663   mvsadcost[1] = x->mvsadcost[1];
1664   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1665   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1666 
1667   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1668             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1669 
1670   for (i = 0; i < search_range; ++i) {
1671     int best_site = -1;
1672 
1673     for (j = 0; j < 4; ++j) {
1674       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1675       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1676 
1677       if ((this_col_offset > x->mv_col_min) &&
1678           (this_col_offset < x->mv_col_max) &&
1679           (this_row_offset > x->mv_row_min) &&
1680           (this_row_offset < x->mv_row_max)) {
1681         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1682                      best_address;
1683         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1684 
1685         if (thissad < bestsad) {
1686           this_mv.as_mv.row = this_row_offset;
1687           this_mv.as_mv.col = this_col_offset;
1688           thissad +=
1689               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1690 
1691           if (thissad < bestsad) {
1692             bestsad = thissad;
1693             best_site = j;
1694           }
1695         }
1696       }
1697     }
1698 
1699     if (best_site == -1) {
1700       break;
1701     } else {
1702       ref_mv->as_mv.row += neighbors[best_site].row;
1703       ref_mv->as_mv.col += neighbors[best_site].col;
1704       best_address += (neighbors[best_site].row) * in_what_stride +
1705                       neighbors[best_site].col;
1706     }
1707   }
1708 
1709   this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1710   this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1711 
1712   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1713          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1714 }
1715 
1716 #if HAVE_SSE2 || HAVE_MSA
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1717 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1718                               int_mv *ref_mv, int error_per_bit,
1719                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1720                               int *mvcost[2], int_mv *center_mv) {
1721   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1722   int i, j;
1723   short this_row_offset, this_col_offset;
1724 
1725   int what_stride = b->src_stride;
1726   int pre_stride = x->e_mbd.pre.y_stride;
1727   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1728   int in_what_stride = pre_stride;
1729   unsigned char *what = (*(b->base_src) + b->src);
1730   unsigned char *best_address =
1731       (unsigned char *)(base_pre + d->offset +
1732                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1733   unsigned char *check_here;
1734   int_mv this_mv;
1735   unsigned int bestsad;
1736   unsigned int thissad;
1737 
1738   int *mvsadcost[2];
1739   int_mv fcenter_mv;
1740 
1741   mvsadcost[0] = x->mvsadcost[0];
1742   mvsadcost[1] = x->mvsadcost[1];
1743   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1744   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1745 
1746   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1747             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1748 
1749   for (i = 0; i < search_range; ++i) {
1750     int best_site = -1;
1751     int all_in = 1;
1752 
1753     all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1754     all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1755     all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1756     all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1757 
1758     if (all_in) {
1759       unsigned int sad_array[4];
1760       const unsigned char *block_offset[4];
1761       block_offset[0] = best_address - in_what_stride;
1762       block_offset[1] = best_address - 1;
1763       block_offset[2] = best_address + 1;
1764       block_offset[3] = best_address + in_what_stride;
1765 
1766       fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1767                      sad_array);
1768 
1769       for (j = 0; j < 4; ++j) {
1770         if (sad_array[j] < bestsad) {
1771           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1772           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1773           sad_array[j] +=
1774               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1775 
1776           if (sad_array[j] < bestsad) {
1777             bestsad = sad_array[j];
1778             best_site = j;
1779           }
1780         }
1781       }
1782     } else {
1783       for (j = 0; j < 4; ++j) {
1784         this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1785         this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1786 
1787         if ((this_col_offset > x->mv_col_min) &&
1788             (this_col_offset < x->mv_col_max) &&
1789             (this_row_offset > x->mv_row_min) &&
1790             (this_row_offset < x->mv_row_max)) {
1791           check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1792                        best_address;
1793           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1794 
1795           if (thissad < bestsad) {
1796             this_mv.as_mv.row = this_row_offset;
1797             this_mv.as_mv.col = this_col_offset;
1798             thissad +=
1799                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1800 
1801             if (thissad < bestsad) {
1802               bestsad = thissad;
1803               best_site = j;
1804             }
1805           }
1806         }
1807       }
1808     }
1809 
1810     if (best_site == -1) {
1811       break;
1812     } else {
1813       ref_mv->as_mv.row += neighbors[best_site].row;
1814       ref_mv->as_mv.col += neighbors[best_site].col;
1815       best_address += (neighbors[best_site].row) * in_what_stride +
1816                       neighbors[best_site].col;
1817     }
1818   }
1819 
1820   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1821   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1822 
1823   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1824          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1825 }
1826 #endif  // HAVE_SSE2 || HAVE_MSA
1827