1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "onyx_int.h"
13 #include "mcomp.h"
14 #include "vpx_mem/vpx_mem.h"
15 #include "vpx_config.h"
16 #include <stdio.h>
17 #include <limits.h>
18 #include <math.h>
19 #include "vp8/common/findnearmv.h"
20 #include "vp8/common/common.h"
21 
22 #ifdef VP8_ENTROPY_STATS
23 static int mv_ref_ct [31] [4] [2];
24 static int mv_mode_cts [4] [2];
25 #endif
26 
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)27 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
28 {
29     /* MV costing is based on the distribution of vectors in the previous
30      * frame and as such will tend to over state the cost of vectors. In
31      * addition coding a new vector can have a knock on effect on the cost
32      * of subsequent vectors and the quality of prediction from NEAR and
33      * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
34      * limited extent, for some account to be taken of these factors.
35      */
36     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
37 }
38 
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
40 {
41     /* Ignore mv costing if mvcost is NULL */
42     if (mvcost)
43         return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
44                  mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
45                  * error_per_bit + 128) >> 8;
46     return 0;
47 }
48 
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)49 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
50 {
51     /* Calculate sad error cost on full pixel basis. */
52     /* Ignore mv costing if mvsadcost is NULL */
53     if (mvsadcost)
54         return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
55                  mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
56                 * error_per_bit + 128) >> 8;
57     return 0;
58 }
59 
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)60 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
61 {
62     int Len;
63     int search_site_count = 0;
64 
65 
66     /* Generate offsets for 4 search sites per step. */
67     Len = MAX_FIRST_STEP;
68     x->ss[search_site_count].mv.col = 0;
69     x->ss[search_site_count].mv.row = 0;
70     x->ss[search_site_count].offset = 0;
71     search_site_count++;
72 
73     while (Len > 0)
74     {
75 
76         /* Compute offsets for search sites. */
77         x->ss[search_site_count].mv.col = 0;
78         x->ss[search_site_count].mv.row = -Len;
79         x->ss[search_site_count].offset = -Len * stride;
80         search_site_count++;
81 
82         /* Compute offsets for search sites. */
83         x->ss[search_site_count].mv.col = 0;
84         x->ss[search_site_count].mv.row = Len;
85         x->ss[search_site_count].offset = Len * stride;
86         search_site_count++;
87 
88         /* Compute offsets for search sites. */
89         x->ss[search_site_count].mv.col = -Len;
90         x->ss[search_site_count].mv.row = 0;
91         x->ss[search_site_count].offset = -Len;
92         search_site_count++;
93 
94         /* Compute offsets for search sites. */
95         x->ss[search_site_count].mv.col = Len;
96         x->ss[search_site_count].mv.row = 0;
97         x->ss[search_site_count].offset = Len;
98         search_site_count++;
99 
100         /* Contract. */
101         Len /= 2;
102     }
103 
104     x->ss_count = search_site_count;
105     x->searches_per_step = 4;
106 }
107 
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)108 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
109 {
110     int Len;
111     int search_site_count = 0;
112 
113     /* Generate offsets for 8 search sites per step. */
114     Len = MAX_FIRST_STEP;
115     x->ss[search_site_count].mv.col = 0;
116     x->ss[search_site_count].mv.row = 0;
117     x->ss[search_site_count].offset = 0;
118     search_site_count++;
119 
120     while (Len > 0)
121     {
122 
123         /* Compute offsets for search sites. */
124         x->ss[search_site_count].mv.col = 0;
125         x->ss[search_site_count].mv.row = -Len;
126         x->ss[search_site_count].offset = -Len * stride;
127         search_site_count++;
128 
129         /* Compute offsets for search sites. */
130         x->ss[search_site_count].mv.col = 0;
131         x->ss[search_site_count].mv.row = Len;
132         x->ss[search_site_count].offset = Len * stride;
133         search_site_count++;
134 
135         /* Compute offsets for search sites. */
136         x->ss[search_site_count].mv.col = -Len;
137         x->ss[search_site_count].mv.row = 0;
138         x->ss[search_site_count].offset = -Len;
139         search_site_count++;
140 
141         /* Compute offsets for search sites. */
142         x->ss[search_site_count].mv.col = Len;
143         x->ss[search_site_count].mv.row = 0;
144         x->ss[search_site_count].offset = Len;
145         search_site_count++;
146 
147         /* Compute offsets for search sites. */
148         x->ss[search_site_count].mv.col = -Len;
149         x->ss[search_site_count].mv.row = -Len;
150         x->ss[search_site_count].offset = -Len * stride - Len;
151         search_site_count++;
152 
153         /* Compute offsets for search sites. */
154         x->ss[search_site_count].mv.col = Len;
155         x->ss[search_site_count].mv.row = -Len;
156         x->ss[search_site_count].offset = -Len * stride + Len;
157         search_site_count++;
158 
159         /* Compute offsets for search sites. */
160         x->ss[search_site_count].mv.col = -Len;
161         x->ss[search_site_count].mv.row = Len;
162         x->ss[search_site_count].offset = Len * stride - Len;
163         search_site_count++;
164 
165         /* Compute offsets for search sites. */
166         x->ss[search_site_count].mv.col = Len;
167         x->ss[search_site_count].mv.row = Len;
168         x->ss[search_site_count].offset = Len * stride + Len;
169         search_site_count++;
170 
171 
172         /* Contract. */
173         Len /= 2;
174     }
175 
176     x->ss_count = search_site_count;
177     x->searches_per_step = 8;
178 }
179 
180 /*
181  * To avoid the penalty for crossing cache-line read, preload the reference
182  * area in a small buffer, which is aligned to make sure there won't be crossing
183  * cache-line read while reading from this buffer. This reduced the cpu
184  * cycles spent on reading ref data in sub-pixel filter functions.
185  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
186  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
187  * could reduce the area.
188  */
189 
190 /* estimated cost of a motion vector (r,c) */
191 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
192 /* pointer to predictor base of a motionvector */
193 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
194 /* convert motion vector component to offset for svf calc */
195 #define SP(x) (((x)&3)<<1)
196 /* returns subpixel variance error function. */
197 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
198 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
199 /* returns distortion + motion vector cost */
200 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
201 /* checks if (r,c) has better score than previous best */
202 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
203 
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)204 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
205                                              int_mv *bestmv, int_mv *ref_mv,
206                                              int error_per_bit,
207                                              const vp8_variance_fn_ptr_t *vfp,
208                                              int *mvcost[2], int *distortion,
209                                              unsigned int *sse1)
210 {
211     unsigned char *z = (*(b->base_src) + b->src);
212 
213     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
214     int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
215     int tr = br, tc = bc;
216     unsigned int besterr;
217     unsigned int left, right, up, down, diag;
218     unsigned int sse;
219     unsigned int whichdir;
220     unsigned int halfiters = 4;
221     unsigned int quarteriters = 4;
222     int thismse;
223 
224     int minc = MAX(x->mv_col_min * 4,
225                    (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
226     int maxc = MIN(x->mv_col_max * 4,
227                    (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
228     int minr = MAX(x->mv_row_min * 4,
229                    (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
230     int maxr = MIN(x->mv_row_max * 4,
231                    (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
232 
233     int y_stride;
234     int offset;
235     int pre_stride = x->e_mbd.pre.y_stride;
236     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
237 
238 
239 #if ARCH_X86 || ARCH_X86_64
240     MACROBLOCKD *xd = &x->e_mbd;
241     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
242     unsigned char *y;
243     int buf_r1, buf_r2, buf_c1;
244 
245     /* Clamping to avoid out-of-range data access */
246     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
247     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
248     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
249     y_stride = 32;
250 
251     /* Copy to intermediate buffer before searching. */
252     vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
253     y = xd->y_buf + y_stride*buf_r1 +buf_c1;
254 #else
255     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
256     y_stride = pre_stride;
257 #endif
258 
259     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
260 
261     /* central mv */
262     bestmv->as_mv.row *= 8;
263     bestmv->as_mv.col *= 8;
264 
265     /* calculate central point error */
266     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
267     *distortion = besterr;
268     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
269 
270     /* TODO: Each subsequent iteration checks at least one point in common
271      * with the last iteration could be 2 ( if diag selected)
272      */
273     while (--halfiters)
274     {
275         /* 1/2 pel */
276         CHECK_BETTER(left, tr, tc - 2);
277         CHECK_BETTER(right, tr, tc + 2);
278         CHECK_BETTER(up, tr - 2, tc);
279         CHECK_BETTER(down, tr + 2, tc);
280 
281         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
282 
283         switch (whichdir)
284         {
285         case 0:
286             CHECK_BETTER(diag, tr - 2, tc - 2);
287             break;
288         case 1:
289             CHECK_BETTER(diag, tr - 2, tc + 2);
290             break;
291         case 2:
292             CHECK_BETTER(diag, tr + 2, tc - 2);
293             break;
294         case 3:
295             CHECK_BETTER(diag, tr + 2, tc + 2);
296             break;
297         }
298 
299         /* no reason to check the same one again. */
300         if (tr == br && tc == bc)
301             break;
302 
303         tr = br;
304         tc = bc;
305     }
306 
307     /* TODO: Each subsequent iteration checks at least one point in common
308      * with the last iteration could be 2 ( if diag selected)
309      */
310 
311     /* 1/4 pel */
312     while (--quarteriters)
313     {
314         CHECK_BETTER(left, tr, tc - 1);
315         CHECK_BETTER(right, tr, tc + 1);
316         CHECK_BETTER(up, tr - 1, tc);
317         CHECK_BETTER(down, tr + 1, tc);
318 
319         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
320 
321         switch (whichdir)
322         {
323         case 0:
324             CHECK_BETTER(diag, tr - 1, tc - 1);
325             break;
326         case 1:
327             CHECK_BETTER(diag, tr - 1, tc + 1);
328             break;
329         case 2:
330             CHECK_BETTER(diag, tr + 1, tc - 1);
331             break;
332         case 3:
333             CHECK_BETTER(diag, tr + 1, tc + 1);
334             break;
335         }
336 
337         /* no reason to check the same one again. */
338         if (tr == br && tc == bc)
339             break;
340 
341         tr = br;
342         tc = bc;
343     }
344 
345     bestmv->as_mv.row = br * 2;
346     bestmv->as_mv.col = bc * 2;
347 
348     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
349         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
350         return INT_MAX;
351 
352     return besterr;
353 }
354 #undef MVC
355 #undef PRE
356 #undef SP
357 #undef DIST
358 #undef IFMVCV
359 #undef ERR
360 #undef CHECK_BETTER
361 
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)362 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
363                                  int_mv *bestmv, int_mv *ref_mv,
364                                  int error_per_bit,
365                                  const vp8_variance_fn_ptr_t *vfp,
366                                  int *mvcost[2], int *distortion,
367                                  unsigned int *sse1)
368 {
369     int bestmse = INT_MAX;
370     int_mv startmv;
371     int_mv this_mv;
372     unsigned char *z = (*(b->base_src) + b->src);
373     int left, right, up, down, diag;
374     unsigned int sse;
375     int whichdir ;
376     int thismse;
377     int y_stride;
378     int pre_stride = x->e_mbd.pre.y_stride;
379     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
380 
381 #if ARCH_X86 || ARCH_X86_64
382     MACROBLOCKD *xd = &x->e_mbd;
383     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
384     unsigned char *y;
385 
386     y_stride = 32;
387     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
388      vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
389      y = xd->y_buf + y_stride + 1;
390 #else
391      unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
392      y_stride = pre_stride;
393 #endif
394 
395     /* central mv */
396     bestmv->as_mv.row <<= 3;
397     bestmv->as_mv.col <<= 3;
398     startmv = *bestmv;
399 
400     /* calculate central point error */
401     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
402     *distortion = bestmse;
403     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
404 
405     /* go left then right and check error */
406     this_mv.as_mv.row = startmv.as_mv.row;
407     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
408     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
409     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
410 
411     if (left < bestmse)
412     {
413         *bestmv = this_mv;
414         bestmse = left;
415         *distortion = thismse;
416         *sse1 = sse;
417     }
418 
419     this_mv.as_mv.col += 8;
420     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
421     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
422 
423     if (right < bestmse)
424     {
425         *bestmv = this_mv;
426         bestmse = right;
427         *distortion = thismse;
428         *sse1 = sse;
429     }
430 
431     /* go up then down and check error */
432     this_mv.as_mv.col = startmv.as_mv.col;
433     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
434     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
435     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
436 
437     if (up < bestmse)
438     {
439         *bestmv = this_mv;
440         bestmse = up;
441         *distortion = thismse;
442         *sse1 = sse;
443     }
444 
445     this_mv.as_mv.row += 8;
446     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
447     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
448 
449     if (down < bestmse)
450     {
451         *bestmv = this_mv;
452         bestmse = down;
453         *distortion = thismse;
454         *sse1 = sse;
455     }
456 
457 
458     /* now check 1 more diagonal */
459     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
460     this_mv = startmv;
461 
462     switch (whichdir)
463     {
464     case 0:
465         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
466         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
467         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
468         break;
469     case 1:
470         this_mv.as_mv.col += 4;
471         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
472         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
473         break;
474     case 2:
475         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
476         this_mv.as_mv.row += 4;
477         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
478         break;
479     case 3:
480     default:
481         this_mv.as_mv.col += 4;
482         this_mv.as_mv.row += 4;
483         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
484         break;
485     }
486 
487     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
488 
489     if (diag < bestmse)
490     {
491         *bestmv = this_mv;
492         bestmse = diag;
493         *distortion = thismse;
494         *sse1 = sse;
495     }
496 
497 
498     /* time to check quarter pels. */
499     if (bestmv->as_mv.row < startmv.as_mv.row)
500         y -= y_stride;
501 
502     if (bestmv->as_mv.col < startmv.as_mv.col)
503         y--;
504 
505     startmv = *bestmv;
506 
507 
508 
509     /* go left then right and check error */
510     this_mv.as_mv.row = startmv.as_mv.row;
511 
512     if (startmv.as_mv.col & 7)
513     {
514         this_mv.as_mv.col = startmv.as_mv.col - 2;
515         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
516     }
517     else
518     {
519         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
520         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
521     }
522 
523     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
524 
525     if (left < bestmse)
526     {
527         *bestmv = this_mv;
528         bestmse = left;
529         *distortion = thismse;
530         *sse1 = sse;
531     }
532 
533     this_mv.as_mv.col += 4;
534     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
535     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
536 
537     if (right < bestmse)
538     {
539         *bestmv = this_mv;
540         bestmse = right;
541         *distortion = thismse;
542         *sse1 = sse;
543     }
544 
545     /* go up then down and check error */
546     this_mv.as_mv.col = startmv.as_mv.col;
547 
548     if (startmv.as_mv.row & 7)
549     {
550         this_mv.as_mv.row = startmv.as_mv.row - 2;
551         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
552     }
553     else
554     {
555         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
556         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
557     }
558 
559     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
560 
561     if (up < bestmse)
562     {
563         *bestmv = this_mv;
564         bestmse = up;
565         *distortion = thismse;
566         *sse1 = sse;
567     }
568 
569     this_mv.as_mv.row += 4;
570     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
571     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
572 
573     if (down < bestmse)
574     {
575         *bestmv = this_mv;
576         bestmse = down;
577         *distortion = thismse;
578         *sse1 = sse;
579     }
580 
581 
582     /* now check 1 more diagonal */
583     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
584 
585     this_mv = startmv;
586 
587     switch (whichdir)
588     {
589     case 0:
590 
591         if (startmv.as_mv.row & 7)
592         {
593             this_mv.as_mv.row -= 2;
594 
595             if (startmv.as_mv.col & 7)
596             {
597                 this_mv.as_mv.col -= 2;
598                 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
599             }
600             else
601             {
602                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
603                 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
604             }
605         }
606         else
607         {
608             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
609 
610             if (startmv.as_mv.col & 7)
611             {
612                 this_mv.as_mv.col -= 2;
613                 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
614             }
615             else
616             {
617                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
618                 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
619             }
620         }
621 
622         break;
623     case 1:
624         this_mv.as_mv.col += 2;
625 
626         if (startmv.as_mv.row & 7)
627         {
628             this_mv.as_mv.row -= 2;
629             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
630         }
631         else
632         {
633             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
634             thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
635         }
636 
637         break;
638     case 2:
639         this_mv.as_mv.row += 2;
640 
641         if (startmv.as_mv.col & 7)
642         {
643             this_mv.as_mv.col -= 2;
644             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
645         }
646         else
647         {
648             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
649             thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
650         }
651 
652         break;
653     case 3:
654         this_mv.as_mv.col += 2;
655         this_mv.as_mv.row += 2;
656         thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
657         break;
658     }
659 
660     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
661 
662     if (diag < bestmse)
663     {
664         *bestmv = this_mv;
665         bestmse = diag;
666         *distortion = thismse;
667         *sse1 = sse;
668     }
669 
670     return bestmse;
671 }
672 
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)673 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
674                                   int_mv *bestmv, int_mv *ref_mv,
675                                   int error_per_bit,
676                                   const vp8_variance_fn_ptr_t *vfp,
677                                   int *mvcost[2], int *distortion,
678                                   unsigned int *sse1)
679 {
680     int bestmse = INT_MAX;
681     int_mv startmv;
682     int_mv this_mv;
683     unsigned char *z = (*(b->base_src) + b->src);
684     int left, right, up, down, diag;
685     unsigned int sse;
686     int whichdir ;
687     int thismse;
688     int y_stride;
689     int pre_stride = x->e_mbd.pre.y_stride;
690     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
691 
692 #if ARCH_X86 || ARCH_X86_64
693     MACROBLOCKD *xd = &x->e_mbd;
694     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
695     unsigned char *y;
696 
697     y_stride = 32;
698     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
699     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
700     y = xd->y_buf + y_stride + 1;
701 #else
702     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
703     y_stride = pre_stride;
704 #endif
705 
706     /* central mv */
707     bestmv->as_mv.row *= 8;
708     bestmv->as_mv.col *= 8;
709     startmv = *bestmv;
710 
711     /* calculate central point error */
712     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
713     *distortion = bestmse;
714     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
715 
716     /* go left then right and check error */
717     this_mv.as_mv.row = startmv.as_mv.row;
718     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
719     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
720     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
721 
722     if (left < bestmse)
723     {
724         *bestmv = this_mv;
725         bestmse = left;
726         *distortion = thismse;
727         *sse1 = sse;
728     }
729 
730     this_mv.as_mv.col += 8;
731     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
732     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
733 
734     if (right < bestmse)
735     {
736         *bestmv = this_mv;
737         bestmse = right;
738         *distortion = thismse;
739         *sse1 = sse;
740     }
741 
742     /* go up then down and check error */
743     this_mv.as_mv.col = startmv.as_mv.col;
744     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
745     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
746     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
747 
748     if (up < bestmse)
749     {
750         *bestmv = this_mv;
751         bestmse = up;
752         *distortion = thismse;
753         *sse1 = sse;
754     }
755 
756     this_mv.as_mv.row += 8;
757     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
758     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
759 
760     if (down < bestmse)
761     {
762         *bestmv = this_mv;
763         bestmse = down;
764         *distortion = thismse;
765         *sse1 = sse;
766     }
767 
768     /* now check 1 more diagonal - */
769     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
770     this_mv = startmv;
771 
772     switch (whichdir)
773     {
774     case 0:
775         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
776         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
777         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
778         break;
779     case 1:
780         this_mv.as_mv.col += 4;
781         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
782         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
783         break;
784     case 2:
785         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
786         this_mv.as_mv.row += 4;
787         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
788         break;
789     case 3:
790     default:
791         this_mv.as_mv.col += 4;
792         this_mv.as_mv.row += 4;
793         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
794         break;
795     }
796 
797     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
798 
799     if (diag < bestmse)
800     {
801         *bestmv = this_mv;
802         bestmse = diag;
803         *distortion = thismse;
804         *sse1 = sse;
805     }
806 
807     return bestmse;
808 }
809 
810 #define CHECK_BOUNDS(range) \
811 {\
812     all_in = 1;\
813     all_in &= ((br-range) >= x->mv_row_min);\
814     all_in &= ((br+range) <= x->mv_row_max);\
815     all_in &= ((bc-range) >= x->mv_col_min);\
816     all_in &= ((bc+range) <= x->mv_col_max);\
817 }
818 
819 #define CHECK_POINT \
820 {\
821     if (this_mv.as_mv.col < x->mv_col_min) continue;\
822     if (this_mv.as_mv.col > x->mv_col_max) continue;\
823     if (this_mv.as_mv.row < x->mv_row_min) continue;\
824     if (this_mv.as_mv.row > x->mv_row_max) continue;\
825 }
826 
827 #define CHECK_BETTER \
828 {\
829     if (thissad < bestsad)\
830     {\
831         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
832         if (thissad < bestsad)\
833         {\
834             bestsad = thissad;\
835             best_site = i;\
836         }\
837     }\
838 }
839 
840 static const MV next_chkpts[6][3] =
841 {
842     {{ -2, 0}, { -1, -2}, {1, -2}},
843     {{ -1, -2}, {1, -2}, {2, 0}},
844     {{1, -2}, {2, 0}, {1, 2}},
845     {{2, 0}, {1, 2}, { -1, 2}},
846     {{1, 2}, { -1, 2}, { -2, 0}},
847     {{ -1, 2}, { -2, 0}, { -1, -2}}
848 };
849 
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)850 int vp8_hex_search
851 (
852     MACROBLOCK *x,
853     BLOCK *b,
854     BLOCKD *d,
855     int_mv *ref_mv,
856     int_mv *best_mv,
857     int search_param,
858     int sad_per_bit,
859     const vp8_variance_fn_ptr_t *vfp,
860     int *mvsadcost[2],
861     int *mvcost[2],
862     int_mv *center_mv
863 )
864 {
865     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
866     MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
867     int i, j;
868 
869     unsigned char *what = (*(b->base_src) + b->src);
870     int what_stride = b->src_stride;
871     int pre_stride = x->e_mbd.pre.y_stride;
872     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
873 
874     int in_what_stride = pre_stride;
875     int br, bc;
876     int_mv this_mv;
877     unsigned int bestsad;
878     unsigned int thissad;
879     unsigned char *base_offset;
880     unsigned char *this_offset;
881     int k = -1;
882     int all_in;
883     int best_site = -1;
884     int hex_range = 127;
885     int dia_range = 8;
886 
887     int_mv fcenter_mv;
888     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
889     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
890 
891     /* adjust ref_mv to make sure it is within MV range */
892     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
893     br = ref_mv->as_mv.row;
894     bc = ref_mv->as_mv.col;
895 
896     /* Work out the start point for the search */
897     base_offset = (unsigned char *)(base_pre + d->offset);
898     this_offset = base_offset + (br * (pre_stride)) + bc;
899     this_mv.as_mv.row = br;
900     this_mv.as_mv.col = bc;
901     bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
902             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
903 
904 #if CONFIG_MULTI_RES_ENCODING
905     /* Lower search range based on prediction info */
906     if (search_param >= 6) goto cal_neighbors;
907     else if (search_param >= 5) hex_range = 4;
908     else if (search_param >= 4) hex_range = 6;
909     else if (search_param >= 3) hex_range = 15;
910     else if (search_param >= 2) hex_range = 31;
911     else if (search_param >= 1) hex_range = 63;
912 
913     dia_range = 8;
914 #endif
915 
916     /* hex search */
917     CHECK_BOUNDS(2)
918 
919     if(all_in)
920     {
921         for (i = 0; i < 6; i++)
922         {
923             this_mv.as_mv.row = br + hex[i].row;
924             this_mv.as_mv.col = bc + hex[i].col;
925             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
926             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
927             CHECK_BETTER
928         }
929     }else
930     {
931         for (i = 0; i < 6; i++)
932         {
933             this_mv.as_mv.row = br + hex[i].row;
934             this_mv.as_mv.col = bc + hex[i].col;
935             CHECK_POINT
936             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
937             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
938             CHECK_BETTER
939         }
940     }
941 
942     if (best_site == -1)
943         goto cal_neighbors;
944     else
945     {
946         br += hex[best_site].row;
947         bc += hex[best_site].col;
948         k = best_site;
949     }
950 
951     for (j = 1; j < hex_range; j++)
952     {
953         best_site = -1;
954         CHECK_BOUNDS(2)
955 
956         if(all_in)
957         {
958             for (i = 0; i < 3; i++)
959             {
960                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
961                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
962                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
963                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
964                 CHECK_BETTER
965             }
966         }else
967         {
968             for (i = 0; i < 3; i++)
969             {
970                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
971                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
972                 CHECK_POINT
973                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
974                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
975                 CHECK_BETTER
976             }
977         }
978 
979         if (best_site == -1)
980             break;
981         else
982         {
983             br += next_chkpts[k][best_site].row;
984             bc += next_chkpts[k][best_site].col;
985             k += 5 + best_site;
986             if (k >= 12) k -= 12;
987             else if (k >= 6) k -= 6;
988         }
989     }
990 
991     /* check 4 1-away neighbors */
992 cal_neighbors:
993     for (j = 0; j < dia_range; j++)
994     {
995         best_site = -1;
996         CHECK_BOUNDS(1)
997 
998         if(all_in)
999         {
1000             for (i = 0; i < 4; i++)
1001             {
1002                 this_mv.as_mv.row = br + neighbors[i].row;
1003                 this_mv.as_mv.col = bc + neighbors[i].col;
1004                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1005                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1006                 CHECK_BETTER
1007             }
1008         }else
1009         {
1010             for (i = 0; i < 4; i++)
1011             {
1012                 this_mv.as_mv.row = br + neighbors[i].row;
1013                 this_mv.as_mv.col = bc + neighbors[i].col;
1014                 CHECK_POINT
1015                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1016                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1017                 CHECK_BETTER
1018             }
1019         }
1020 
1021         if (best_site == -1)
1022             break;
1023         else
1024         {
1025             br += neighbors[best_site].row;
1026             bc += neighbors[best_site].col;
1027         }
1028     }
1029 
1030     best_mv->as_mv.row = br;
1031     best_mv->as_mv.col = bc;
1032 
1033     return bestsad;
1034 }
1035 #undef CHECK_BOUNDS
1036 #undef CHECK_POINT
1037 #undef CHECK_BETTER
1038 
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1039 int vp8_diamond_search_sad_c
1040 (
1041     MACROBLOCK *x,
1042     BLOCK *b,
1043     BLOCKD *d,
1044     int_mv *ref_mv,
1045     int_mv *best_mv,
1046     int search_param,
1047     int sad_per_bit,
1048     int *num00,
1049     vp8_variance_fn_ptr_t *fn_ptr,
1050     int *mvcost[2],
1051     int_mv *center_mv
1052 )
1053 {
1054     int i, j, step;
1055 
1056     unsigned char *what = (*(b->base_src) + b->src);
1057     int what_stride = b->src_stride;
1058     unsigned char *in_what;
1059     int pre_stride = x->e_mbd.pre.y_stride;
1060     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1061     int in_what_stride = pre_stride;
1062     unsigned char *best_address;
1063 
1064     int tot_steps;
1065     int_mv this_mv;
1066 
1067     unsigned int bestsad;
1068     unsigned int thissad;
1069     int best_site = 0;
1070     int last_site = 0;
1071 
1072     int ref_row;
1073     int ref_col;
1074     int this_row_offset;
1075     int this_col_offset;
1076     search_site *ss;
1077 
1078     unsigned char *check_here;
1079 
1080     int *mvsadcost[2];
1081     int_mv fcenter_mv;
1082 
1083     mvsadcost[0] = x->mvsadcost[0];
1084     mvsadcost[1] = x->mvsadcost[1];
1085     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1086     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1087 
1088     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1089     ref_row = ref_mv->as_mv.row;
1090     ref_col = ref_mv->as_mv.col;
1091     *num00 = 0;
1092     best_mv->as_mv.row = ref_row;
1093     best_mv->as_mv.col = ref_col;
1094 
1095     /* Work out the start point for the search */
1096     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
1097     best_address = in_what;
1098 
1099     /* Check the starting position */
1100     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
1101             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1102 
1103     /* search_param determines the length of the initial step and hence
1104      * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1105      * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1106      */
1107     ss = &x->ss[search_param * x->searches_per_step];
1108     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1109 
1110     i = 1;
1111 
1112     for (step = 0; step < tot_steps ; step++)
1113     {
1114         for (j = 0 ; j < x->searches_per_step ; j++)
1115         {
1116             /* Trap illegal vectors */
1117             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1118             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1119 
1120             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1121             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1122 
1123             {
1124                 check_here = ss[i].offset + best_address;
1125                 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1126 
1127                 if (thissad < bestsad)
1128                 {
1129                     this_mv.as_mv.row = this_row_offset;
1130                     this_mv.as_mv.col = this_col_offset;
1131                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1132                                               mvsadcost, sad_per_bit);
1133 
1134                     if (thissad < bestsad)
1135                     {
1136                         bestsad = thissad;
1137                         best_site = i;
1138                     }
1139                 }
1140             }
1141 
1142             i++;
1143         }
1144 
1145         if (best_site != last_site)
1146         {
1147             best_mv->as_mv.row += ss[best_site].mv.row;
1148             best_mv->as_mv.col += ss[best_site].mv.col;
1149             best_address += ss[best_site].offset;
1150             last_site = best_site;
1151         }
1152         else if (best_address == in_what)
1153             (*num00)++;
1154     }
1155 
1156     this_mv.as_mv.row = best_mv->as_mv.row << 3;
1157     this_mv.as_mv.col = best_mv->as_mv.col << 3;
1158 
1159     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1160            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1161 }
1162 
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1163 int vp8_diamond_search_sadx4
1164 (
1165     MACROBLOCK *x,
1166     BLOCK *b,
1167     BLOCKD *d,
1168     int_mv *ref_mv,
1169     int_mv *best_mv,
1170     int search_param,
1171     int sad_per_bit,
1172     int *num00,
1173     vp8_variance_fn_ptr_t *fn_ptr,
1174     int *mvcost[2],
1175     int_mv *center_mv
1176 )
1177 {
1178     int i, j, step;
1179 
1180     unsigned char *what = (*(b->base_src) + b->src);
1181     int what_stride = b->src_stride;
1182     unsigned char *in_what;
1183     int pre_stride = x->e_mbd.pre.y_stride;
1184     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1185     int in_what_stride = pre_stride;
1186     unsigned char *best_address;
1187 
1188     int tot_steps;
1189     int_mv this_mv;
1190 
1191     unsigned int bestsad;
1192     unsigned int thissad;
1193     int best_site = 0;
1194     int last_site = 0;
1195 
1196     int ref_row;
1197     int ref_col;
1198     int this_row_offset;
1199     int this_col_offset;
1200     search_site *ss;
1201 
1202     unsigned char *check_here;
1203 
1204     int *mvsadcost[2];
1205     int_mv fcenter_mv;
1206 
1207     mvsadcost[0] = x->mvsadcost[0];
1208     mvsadcost[1] = x->mvsadcost[1];
1209     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1210     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1211 
1212     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1213     ref_row = ref_mv->as_mv.row;
1214     ref_col = ref_mv->as_mv.col;
1215     *num00 = 0;
1216     best_mv->as_mv.row = ref_row;
1217     best_mv->as_mv.col = ref_col;
1218 
1219     /* Work out the start point for the search */
1220     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
1221     best_address = in_what;
1222 
1223     /* Check the starting position */
1224     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
1225             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1226 
1227     /* search_param determines the length of the initial step and hence the
1228      * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1229      * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1230      */
1231     ss = &x->ss[search_param * x->searches_per_step];
1232     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1233 
1234     i = 1;
1235 
1236     for (step = 0; step < tot_steps ; step++)
1237     {
1238         int all_in = 1, t;
1239 
1240         /* To know if all neighbor points are within the bounds, 4 bounds
1241          * checking are enough instead of checking 4 bounds for each
1242          * points.
1243          */
1244         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
1245         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
1246         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
1247         all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
1248 
1249         if (all_in)
1250         {
1251             unsigned int sad_array[4];
1252 
1253             for (j = 0 ; j < x->searches_per_step ; j += 4)
1254             {
1255                 const unsigned char *block_offset[4];
1256 
1257                 for (t = 0; t < 4; t++)
1258                     block_offset[t] = ss[i+t].offset + best_address;
1259 
1260                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1261 
1262                 for (t = 0; t < 4; t++, i++)
1263                 {
1264                     if (sad_array[t] < bestsad)
1265                     {
1266                         this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1267                         this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1268                         sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1269                                                        mvsadcost, sad_per_bit);
1270 
1271                         if (sad_array[t] < bestsad)
1272                         {
1273                             bestsad = sad_array[t];
1274                             best_site = i;
1275                         }
1276                     }
1277                 }
1278             }
1279         }
1280         else
1281         {
1282             for (j = 0 ; j < x->searches_per_step ; j++)
1283             {
1284                 /* Trap illegal vectors */
1285                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1286                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1287 
1288                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1289                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1290                 {
1291                     check_here = ss[i].offset + best_address;
1292                     thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1293 
1294                     if (thissad < bestsad)
1295                     {
1296                         this_mv.as_mv.row = this_row_offset;
1297                         this_mv.as_mv.col = this_col_offset;
1298                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1299                                                   mvsadcost, sad_per_bit);
1300 
1301                         if (thissad < bestsad)
1302                         {
1303                             bestsad = thissad;
1304                             best_site = i;
1305                         }
1306                     }
1307                 }
1308                 i++;
1309             }
1310         }
1311 
1312         if (best_site != last_site)
1313         {
1314             best_mv->as_mv.row += ss[best_site].mv.row;
1315             best_mv->as_mv.col += ss[best_site].mv.col;
1316             best_address += ss[best_site].offset;
1317             last_site = best_site;
1318         }
1319         else if (best_address == in_what)
1320             (*num00)++;
1321     }
1322 
1323     this_mv.as_mv.row = best_mv->as_mv.row * 8;
1324     this_mv.as_mv.col = best_mv->as_mv.col * 8;
1325 
1326     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1327            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1328 }
1329 
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1330 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1331                         int sad_per_bit, int distance,
1332                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1333                         int_mv *center_mv)
1334 {
1335     unsigned char *what = (*(b->base_src) + b->src);
1336     int what_stride = b->src_stride;
1337     unsigned char *in_what;
1338     int pre_stride = x->e_mbd.pre.y_stride;
1339     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1340     int in_what_stride = pre_stride;
1341     int mv_stride = pre_stride;
1342     unsigned char *bestaddress;
1343     int_mv *best_mv = &d->bmi.mv;
1344     int_mv this_mv;
1345     unsigned int bestsad;
1346     unsigned int thissad;
1347     int r, c;
1348 
1349     unsigned char *check_here;
1350 
1351     int ref_row = ref_mv->as_mv.row;
1352     int ref_col = ref_mv->as_mv.col;
1353 
1354     int row_min = ref_row - distance;
1355     int row_max = ref_row + distance;
1356     int col_min = ref_col - distance;
1357     int col_max = ref_col + distance;
1358 
1359     int *mvsadcost[2];
1360     int_mv fcenter_mv;
1361 
1362     mvsadcost[0] = x->mvsadcost[0];
1363     mvsadcost[1] = x->mvsadcost[1];
1364     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1365     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1366 
1367     /* Work out the mid point for the search */
1368     in_what = base_pre + d->offset;
1369     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1370 
1371     best_mv->as_mv.row = ref_row;
1372     best_mv->as_mv.col = ref_col;
1373 
1374     /* Baseline value at the centre */
1375     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1376                           in_what_stride, UINT_MAX)
1377             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1378 
1379     /* Apply further limits to prevent us looking using vectors that
1380      * stretch beyiond the UMV border
1381      */
1382     if (col_min < x->mv_col_min)
1383         col_min = x->mv_col_min;
1384 
1385     if (col_max > x->mv_col_max)
1386         col_max = x->mv_col_max;
1387 
1388     if (row_min < x->mv_row_min)
1389         row_min = x->mv_row_min;
1390 
1391     if (row_max > x->mv_row_max)
1392         row_max = x->mv_row_max;
1393 
1394     for (r = row_min; r < row_max ; r++)
1395     {
1396         this_mv.as_mv.row = r;
1397         check_here = r * mv_stride + in_what + col_min;
1398 
1399         for (c = col_min; c < col_max; c++)
1400         {
1401             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1402 
1403             this_mv.as_mv.col = c;
1404             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1405                                       mvsadcost, sad_per_bit);
1406 
1407             if (thissad < bestsad)
1408             {
1409                 bestsad = thissad;
1410                 best_mv->as_mv.row = r;
1411                 best_mv->as_mv.col = c;
1412                 bestaddress = check_here;
1413             }
1414 
1415             check_here++;
1416         }
1417     }
1418 
1419     this_mv.as_mv.row = best_mv->as_mv.row << 3;
1420     this_mv.as_mv.col = best_mv->as_mv.col << 3;
1421 
1422     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1423            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1424 }
1425 
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1426 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1427                           int sad_per_bit, int distance,
1428                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1429                           int_mv *center_mv)
1430 {
1431     unsigned char *what = (*(b->base_src) + b->src);
1432     int what_stride = b->src_stride;
1433     unsigned char *in_what;
1434     int pre_stride = x->e_mbd.pre.y_stride;
1435     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1436     int in_what_stride = pre_stride;
1437     int mv_stride = pre_stride;
1438     unsigned char *bestaddress;
1439     int_mv *best_mv = &d->bmi.mv;
1440     int_mv this_mv;
1441     unsigned int bestsad;
1442     unsigned int thissad;
1443     int r, c;
1444 
1445     unsigned char *check_here;
1446 
1447     int ref_row = ref_mv->as_mv.row;
1448     int ref_col = ref_mv->as_mv.col;
1449 
1450     int row_min = ref_row - distance;
1451     int row_max = ref_row + distance;
1452     int col_min = ref_col - distance;
1453     int col_max = ref_col + distance;
1454 
1455     unsigned int sad_array[3];
1456 
1457     int *mvsadcost[2];
1458     int_mv fcenter_mv;
1459 
1460     mvsadcost[0] = x->mvsadcost[0];
1461     mvsadcost[1] = x->mvsadcost[1];
1462     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1463     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1464 
1465     /* Work out the mid point for the search */
1466     in_what = base_pre + d->offset;
1467     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1468 
1469     best_mv->as_mv.row = ref_row;
1470     best_mv->as_mv.col = ref_col;
1471 
1472     /* Baseline value at the centre */
1473     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1474                           in_what_stride, UINT_MAX)
1475             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1476 
1477     /* Apply further limits to prevent us looking using vectors that stretch
1478      * beyond the UMV border
1479      */
1480     if (col_min < x->mv_col_min)
1481         col_min = x->mv_col_min;
1482 
1483     if (col_max > x->mv_col_max)
1484         col_max = x->mv_col_max;
1485 
1486     if (row_min < x->mv_row_min)
1487         row_min = x->mv_row_min;
1488 
1489     if (row_max > x->mv_row_max)
1490         row_max = x->mv_row_max;
1491 
1492     for (r = row_min; r < row_max ; r++)
1493     {
1494         this_mv.as_mv.row = r;
1495         check_here = r * mv_stride + in_what + col_min;
1496         c = col_min;
1497 
1498         while ((c + 2) < col_max)
1499         {
1500             int i;
1501 
1502             fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1503 
1504             for (i = 0; i < 3; i++)
1505             {
1506                 thissad = sad_array[i];
1507 
1508                 if (thissad < bestsad)
1509                 {
1510                     this_mv.as_mv.col = c;
1511                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1512                                               mvsadcost, sad_per_bit);
1513 
1514                     if (thissad < bestsad)
1515                     {
1516                         bestsad = thissad;
1517                         best_mv->as_mv.row = r;
1518                         best_mv->as_mv.col = c;
1519                         bestaddress = check_here;
1520                     }
1521                 }
1522 
1523                 check_here++;
1524                 c++;
1525             }
1526         }
1527 
1528         while (c < col_max)
1529         {
1530             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1531 
1532             if (thissad < bestsad)
1533             {
1534                 this_mv.as_mv.col = c;
1535                 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1536                                           mvsadcost, sad_per_bit);
1537 
1538                 if (thissad < bestsad)
1539                 {
1540                     bestsad = thissad;
1541                     best_mv->as_mv.row = r;
1542                     best_mv->as_mv.col = c;
1543                     bestaddress = check_here;
1544                 }
1545             }
1546 
1547             check_here ++;
1548             c ++;
1549         }
1550 
1551     }
1552 
1553     this_mv.as_mv.row = best_mv->as_mv.row << 3;
1554     this_mv.as_mv.col = best_mv->as_mv.col << 3;
1555 
1556     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1557            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1558 }
1559 
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1560 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1561                           int sad_per_bit, int distance,
1562                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1563                           int_mv *center_mv)
1564 {
1565     unsigned char *what = (*(b->base_src) + b->src);
1566     int what_stride = b->src_stride;
1567     int pre_stride = x->e_mbd.pre.y_stride;
1568     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1569     unsigned char *in_what;
1570     int in_what_stride = pre_stride;
1571     int mv_stride = pre_stride;
1572     unsigned char *bestaddress;
1573     int_mv *best_mv = &d->bmi.mv;
1574     int_mv this_mv;
1575     unsigned int bestsad;
1576     unsigned int thissad;
1577     int r, c;
1578 
1579     unsigned char *check_here;
1580 
1581     int ref_row = ref_mv->as_mv.row;
1582     int ref_col = ref_mv->as_mv.col;
1583 
1584     int row_min = ref_row - distance;
1585     int row_max = ref_row + distance;
1586     int col_min = ref_col - distance;
1587     int col_max = ref_col + distance;
1588 
1589     DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
1590     unsigned int sad_array[3];
1591 
1592     int *mvsadcost[2];
1593     int_mv fcenter_mv;
1594 
1595     mvsadcost[0] = x->mvsadcost[0];
1596     mvsadcost[1] = x->mvsadcost[1];
1597     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1598     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1599 
1600     /* Work out the mid point for the search */
1601     in_what = base_pre + d->offset;
1602     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1603 
1604     best_mv->as_mv.row = ref_row;
1605     best_mv->as_mv.col = ref_col;
1606 
1607     /* Baseline value at the centre */
1608     bestsad = fn_ptr->sdf(what, what_stride,
1609                           bestaddress, in_what_stride, UINT_MAX)
1610             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1611 
1612     /* Apply further limits to prevent us looking using vectors that stretch
1613      * beyond the UMV border
1614      */
1615     if (col_min < x->mv_col_min)
1616         col_min = x->mv_col_min;
1617 
1618     if (col_max > x->mv_col_max)
1619         col_max = x->mv_col_max;
1620 
1621     if (row_min < x->mv_row_min)
1622         row_min = x->mv_row_min;
1623 
1624     if (row_max > x->mv_row_max)
1625         row_max = x->mv_row_max;
1626 
1627     for (r = row_min; r < row_max ; r++)
1628     {
1629         this_mv.as_mv.row = r;
1630         check_here = r * mv_stride + in_what + col_min;
1631         c = col_min;
1632 
1633         while ((c + 7) < col_max)
1634         {
1635             int i;
1636 
1637             fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1638 
1639             for (i = 0; i < 8; i++)
1640             {
1641                 thissad = sad_array8[i];
1642 
1643                 if (thissad < bestsad)
1644                 {
1645                     this_mv.as_mv.col = c;
1646                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1647                                               mvsadcost, sad_per_bit);
1648 
1649                     if (thissad < bestsad)
1650                     {
1651                         bestsad = thissad;
1652                         best_mv->as_mv.row = r;
1653                         best_mv->as_mv.col = c;
1654                         bestaddress = check_here;
1655                     }
1656                 }
1657 
1658                 check_here++;
1659                 c++;
1660             }
1661         }
1662 
1663         while ((c + 2) < col_max)
1664         {
1665             int i;
1666 
1667             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1668 
1669             for (i = 0; i < 3; i++)
1670             {
1671                 thissad = sad_array[i];
1672 
1673                 if (thissad < bestsad)
1674                 {
1675                     this_mv.as_mv.col = c;
1676                     thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1677                         mvsadcost, sad_per_bit);
1678 
1679                     if (thissad < bestsad)
1680                     {
1681                         bestsad = thissad;
1682                         best_mv->as_mv.row = r;
1683                         best_mv->as_mv.col = c;
1684                         bestaddress = check_here;
1685                     }
1686                 }
1687 
1688                 check_here++;
1689                 c++;
1690             }
1691         }
1692 
1693         while (c < col_max)
1694         {
1695             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1696 
1697             if (thissad < bestsad)
1698             {
1699                 this_mv.as_mv.col = c;
1700                 thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1701                     mvsadcost, sad_per_bit);
1702 
1703                 if (thissad < bestsad)
1704                 {
1705                     bestsad = thissad;
1706                     best_mv->as_mv.row = r;
1707                     best_mv->as_mv.col = c;
1708                     bestaddress = check_here;
1709                 }
1710             }
1711 
1712             check_here ++;
1713             c ++;
1714         }
1715     }
1716 
1717     this_mv.as_mv.row = best_mv->as_mv.row * 8;
1718     this_mv.as_mv.col = best_mv->as_mv.col * 8;
1719 
1720     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1721            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1722 }
1723 
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1724 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1725                             int error_per_bit, int search_range,
1726                             vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1727                             int_mv *center_mv)
1728 {
1729     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1730     int i, j;
1731     short this_row_offset, this_col_offset;
1732 
1733     int what_stride = b->src_stride;
1734     int pre_stride = x->e_mbd.pre.y_stride;
1735     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1736     int in_what_stride = pre_stride;
1737     unsigned char *what = (*(b->base_src) + b->src);
1738     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
1739         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1740     unsigned char *check_here;
1741     int_mv this_mv;
1742     unsigned int bestsad;
1743     unsigned int thissad;
1744 
1745     int *mvsadcost[2];
1746     int_mv fcenter_mv;
1747 
1748     mvsadcost[0] = x->mvsadcost[0];
1749     mvsadcost[1] = x->mvsadcost[1];
1750     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1751     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1752 
1753     bestsad = fn_ptr->sdf(what, what_stride, best_address,
1754                           in_what_stride, UINT_MAX)
1755             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1756 
1757     for (i=0; i<search_range; i++)
1758     {
1759         int best_site = -1;
1760 
1761         for (j = 0 ; j < 4 ; j++)
1762         {
1763             this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1764             this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1765 
1766             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1767             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1768             {
1769                 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1770                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1771 
1772                 if (thissad < bestsad)
1773                 {
1774                     this_mv.as_mv.row = this_row_offset;
1775                     this_mv.as_mv.col = this_col_offset;
1776                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1777 
1778                     if (thissad < bestsad)
1779                     {
1780                         bestsad = thissad;
1781                         best_site = j;
1782                     }
1783                 }
1784             }
1785         }
1786 
1787         if (best_site == -1)
1788             break;
1789         else
1790         {
1791             ref_mv->as_mv.row += neighbors[best_site].row;
1792             ref_mv->as_mv.col += neighbors[best_site].col;
1793             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1794         }
1795     }
1796 
1797     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1798     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1799 
1800     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1801            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1802 }
1803 
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1804 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1805                               int_mv *ref_mv, int error_per_bit,
1806                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1807                               int *mvcost[2], int_mv *center_mv)
1808 {
1809     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1810     int i, j;
1811     short this_row_offset, this_col_offset;
1812 
1813     int what_stride = b->src_stride;
1814     int pre_stride = x->e_mbd.pre.y_stride;
1815     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1816     int in_what_stride = pre_stride;
1817     unsigned char *what = (*(b->base_src) + b->src);
1818     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
1819         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1820     unsigned char *check_here;
1821     int_mv this_mv;
1822     unsigned int bestsad;
1823     unsigned int thissad;
1824 
1825     int *mvsadcost[2];
1826     int_mv fcenter_mv;
1827 
1828     mvsadcost[0] = x->mvsadcost[0];
1829     mvsadcost[1] = x->mvsadcost[1];
1830     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1831     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1832 
1833     bestsad = fn_ptr->sdf(what, what_stride, best_address,
1834                           in_what_stride, UINT_MAX)
1835             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1836 
1837     for (i=0; i<search_range; i++)
1838     {
1839         int best_site = -1;
1840         int all_in = 1;
1841 
1842         all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1843         all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1844         all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1845         all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1846 
1847         if(all_in)
1848         {
1849             unsigned int sad_array[4];
1850             const unsigned char *block_offset[4];
1851             block_offset[0] = best_address - in_what_stride;
1852             block_offset[1] = best_address - 1;
1853             block_offset[2] = best_address + 1;
1854             block_offset[3] = best_address + in_what_stride;
1855 
1856             fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1857 
1858             for (j = 0; j < 4; j++)
1859             {
1860                 if (sad_array[j] < bestsad)
1861                 {
1862                     this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1863                     this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1864                     sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1865 
1866                     if (sad_array[j] < bestsad)
1867                     {
1868                         bestsad = sad_array[j];
1869                         best_site = j;
1870                     }
1871                 }
1872             }
1873         }
1874         else
1875         {
1876             for (j = 0 ; j < 4 ; j++)
1877             {
1878                 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1879                 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1880 
1881                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1882                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1883                 {
1884                     check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1885                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1886 
1887                     if (thissad < bestsad)
1888                     {
1889                         this_mv.as_mv.row = this_row_offset;
1890                         this_mv.as_mv.col = this_col_offset;
1891                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1892 
1893                         if (thissad < bestsad)
1894                         {
1895                             bestsad = thissad;
1896                             best_site = j;
1897                         }
1898                     }
1899                 }
1900             }
1901         }
1902 
1903         if (best_site == -1)
1904             break;
1905         else
1906         {
1907             ref_mv->as_mv.row += neighbors[best_site].row;
1908             ref_mv->as_mv.col += neighbors[best_site].col;
1909             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1910         }
1911     }
1912 
1913     this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1914     this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1915 
1916     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1917            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1918 }
1919 
1920 #ifdef VP8_ENTROPY_STATS
print_mode_context(void)1921 void print_mode_context(void)
1922 {
1923     FILE *f = fopen("modecont.c", "w");
1924     int i, j;
1925 
1926     fprintf(f, "#include \"entropy.h\"\n");
1927     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1928     fprintf(f, "{\n");
1929 
1930     for (j = 0; j < 6; j++)
1931     {
1932         fprintf(f, "  { /* %d */\n", j);
1933         fprintf(f, "    ");
1934 
1935         for (i = 0; i < 4; i++)
1936         {
1937             int overal_prob;
1938             int this_prob;
1939             int count;
1940 
1941             /* Overall probs */
1942             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1943 
1944             if (count)
1945                 overal_prob = 256 * mv_mode_cts[i][0] / count;
1946             else
1947                 overal_prob = 128;
1948 
1949             if (overal_prob == 0)
1950                 overal_prob = 1;
1951 
1952             /* context probs */
1953             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1954 
1955             if (count)
1956                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
1957             else
1958                 this_prob = 128;
1959 
1960             if (this_prob == 0)
1961                 this_prob = 1;
1962 
1963             fprintf(f, "%5d, ", this_prob);
1964         }
1965 
1966         fprintf(f, "  },\n");
1967     }
1968 
1969     fprintf(f, "};\n");
1970     fclose(f);
1971 }
1972 
1973 /* MV ref count VP8_ENTROPY_STATS stats code */
1974 #ifdef VP8_ENTROPY_STATS
init_mv_ref_counts()1975 void init_mv_ref_counts()
1976 {
1977     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1978     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1979 }
1980 
accum_mv_refs(MB_PREDICTION_MODE m,const int ct[4])1981 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
1982 {
1983     if (m == ZEROMV)
1984     {
1985         ++mv_ref_ct [ct[0]] [0] [0];
1986         ++mv_mode_cts[0][0];
1987     }
1988     else
1989     {
1990         ++mv_ref_ct [ct[0]] [0] [1];
1991         ++mv_mode_cts[0][1];
1992 
1993         if (m == NEARESTMV)
1994         {
1995             ++mv_ref_ct [ct[1]] [1] [0];
1996             ++mv_mode_cts[1][0];
1997         }
1998         else
1999         {
2000             ++mv_ref_ct [ct[1]] [1] [1];
2001             ++mv_mode_cts[1][1];
2002 
2003             if (m == NEARMV)
2004             {
2005                 ++mv_ref_ct [ct[2]] [2] [0];
2006                 ++mv_mode_cts[2][0];
2007             }
2008             else
2009             {
2010                 ++mv_ref_ct [ct[2]] [2] [1];
2011                 ++mv_mode_cts[2][1];
2012 
2013                 if (m == NEWMV)
2014                 {
2015                     ++mv_ref_ct [ct[3]] [3] [0];
2016                     ++mv_mode_cts[3][0];
2017                 }
2018                 else
2019                 {
2020                     ++mv_ref_ct [ct[3]] [3] [1];
2021                     ++mv_mode_cts[3][1];
2022                 }
2023             }
2024         }
2025     }
2026 }
2027 
2028 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
2029 
2030 #endif
2031