1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "./vp8_rtcd.h"
13 #include "./vpx_dsp_rtcd.h"
14 #include "onyx_int.h"
15 #include "mcomp.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vpx_config.h"
18 #include <stdio.h>
19 #include <limits.h>
20 #include <math.h>
21 #include "vp8/common/findnearmv.h"
22 #include "vp8/common/common.h"
23 #include "vpx_dsp/vpx_dsp_common.h"
24 
25 #ifdef VP8_ENTROPY_STATS
26 static int mv_ref_ct [31] [4] [2];
27 static int mv_mode_cts [4] [2];
28 #endif
29 
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)30 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
31 {
32     /* MV costing is based on the distribution of vectors in the previous
33      * frame and as such will tend to over state the cost of vectors. In
34      * addition coding a new vector can have a knock on effect on the cost
35      * of subsequent vectors and the quality of prediction from NEAR and
36      * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
37      * limited extent, for some account to be taken of these factors.
38      */
39     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
40 }
41 
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)42 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
43 {
44     /* Ignore mv costing if mvcost is NULL */
45     if (mvcost)
46         return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
47                  mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
48                  * error_per_bit + 128) >> 8;
49     return 0;
50 }
51 
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)52 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
53 {
54     /* Calculate sad error cost on full pixel basis. */
55     /* Ignore mv costing if mvsadcost is NULL */
56     if (mvsadcost)
57         return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
58                  mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
59                 * error_per_bit + 128) >> 8;
60     return 0;
61 }
62 
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)63 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
64 {
65     int Len;
66     int search_site_count = 0;
67 
68 
69     /* Generate offsets for 4 search sites per step. */
70     Len = MAX_FIRST_STEP;
71     x->ss[search_site_count].mv.col = 0;
72     x->ss[search_site_count].mv.row = 0;
73     x->ss[search_site_count].offset = 0;
74     search_site_count++;
75 
76     while (Len > 0)
77     {
78 
79         /* Compute offsets for search sites. */
80         x->ss[search_site_count].mv.col = 0;
81         x->ss[search_site_count].mv.row = -Len;
82         x->ss[search_site_count].offset = -Len * stride;
83         search_site_count++;
84 
85         /* Compute offsets for search sites. */
86         x->ss[search_site_count].mv.col = 0;
87         x->ss[search_site_count].mv.row = Len;
88         x->ss[search_site_count].offset = Len * stride;
89         search_site_count++;
90 
91         /* Compute offsets for search sites. */
92         x->ss[search_site_count].mv.col = -Len;
93         x->ss[search_site_count].mv.row = 0;
94         x->ss[search_site_count].offset = -Len;
95         search_site_count++;
96 
97         /* Compute offsets for search sites. */
98         x->ss[search_site_count].mv.col = Len;
99         x->ss[search_site_count].mv.row = 0;
100         x->ss[search_site_count].offset = Len;
101         search_site_count++;
102 
103         /* Contract. */
104         Len /= 2;
105     }
106 
107     x->ss_count = search_site_count;
108     x->searches_per_step = 4;
109 }
110 
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)111 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
112 {
113     int Len;
114     int search_site_count = 0;
115 
116     /* Generate offsets for 8 search sites per step. */
117     Len = MAX_FIRST_STEP;
118     x->ss[search_site_count].mv.col = 0;
119     x->ss[search_site_count].mv.row = 0;
120     x->ss[search_site_count].offset = 0;
121     search_site_count++;
122 
123     while (Len > 0)
124     {
125 
126         /* Compute offsets for search sites. */
127         x->ss[search_site_count].mv.col = 0;
128         x->ss[search_site_count].mv.row = -Len;
129         x->ss[search_site_count].offset = -Len * stride;
130         search_site_count++;
131 
132         /* Compute offsets for search sites. */
133         x->ss[search_site_count].mv.col = 0;
134         x->ss[search_site_count].mv.row = Len;
135         x->ss[search_site_count].offset = Len * stride;
136         search_site_count++;
137 
138         /* Compute offsets for search sites. */
139         x->ss[search_site_count].mv.col = -Len;
140         x->ss[search_site_count].mv.row = 0;
141         x->ss[search_site_count].offset = -Len;
142         search_site_count++;
143 
144         /* Compute offsets for search sites. */
145         x->ss[search_site_count].mv.col = Len;
146         x->ss[search_site_count].mv.row = 0;
147         x->ss[search_site_count].offset = Len;
148         search_site_count++;
149 
150         /* Compute offsets for search sites. */
151         x->ss[search_site_count].mv.col = -Len;
152         x->ss[search_site_count].mv.row = -Len;
153         x->ss[search_site_count].offset = -Len * stride - Len;
154         search_site_count++;
155 
156         /* Compute offsets for search sites. */
157         x->ss[search_site_count].mv.col = Len;
158         x->ss[search_site_count].mv.row = -Len;
159         x->ss[search_site_count].offset = -Len * stride + Len;
160         search_site_count++;
161 
162         /* Compute offsets for search sites. */
163         x->ss[search_site_count].mv.col = -Len;
164         x->ss[search_site_count].mv.row = Len;
165         x->ss[search_site_count].offset = Len * stride - Len;
166         search_site_count++;
167 
168         /* Compute offsets for search sites. */
169         x->ss[search_site_count].mv.col = Len;
170         x->ss[search_site_count].mv.row = Len;
171         x->ss[search_site_count].offset = Len * stride + Len;
172         search_site_count++;
173 
174 
175         /* Contract. */
176         Len /= 2;
177     }
178 
179     x->ss_count = search_site_count;
180     x->searches_per_step = 8;
181 }
182 
183 /*
184  * To avoid the penalty for crossing cache-line read, preload the reference
185  * area in a small buffer, which is aligned to make sure there won't be crossing
186  * cache-line read while reading from this buffer. This reduced the cpu
187  * cycles spent on reading ref data in sub-pixel filter functions.
188  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
189  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
190  * could reduce the area.
191  */
192 
193 /* estimated cost of a motion vector (r,c) */
194 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
195 /* pointer to predictor base of a motionvector */
196 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
197 /* convert motion vector component to offset for svf calc */
198 #define SP(x) (((x)&3)<<1)
199 /* returns subpixel variance error function. */
200 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
201 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
202 /* returns distortion + motion vector cost */
203 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
204 /* checks if (r,c) has better score than previous best */
205 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
206 
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)207 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
208                                              int_mv *bestmv, int_mv *ref_mv,
209                                              int error_per_bit,
210                                              const vp8_variance_fn_ptr_t *vfp,
211                                              int *mvcost[2], int *distortion,
212                                              unsigned int *sse1)
213 {
214     unsigned char *z = (*(b->base_src) + b->src);
215 
216     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
217     int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
218     int tr = br, tc = bc;
219     unsigned int besterr;
220     unsigned int left, right, up, down, diag;
221     unsigned int sse;
222     unsigned int whichdir;
223     unsigned int halfiters = 4;
224     unsigned int quarteriters = 4;
225     int thismse;
226 
227     int minc = VPXMAX(x->mv_col_min * 4,
228                       (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
229     int maxc = VPXMIN(x->mv_col_max * 4,
230                       (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
231     int minr = VPXMAX(x->mv_row_min * 4,
232                       (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
233     int maxr = VPXMIN(x->mv_row_max * 4,
234                       (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
235 
236     int y_stride;
237     int offset;
238     int pre_stride = x->e_mbd.pre.y_stride;
239     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
240 
241 
242 #if ARCH_X86 || ARCH_X86_64
243     MACROBLOCKD *xd = &x->e_mbd;
244     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
245     unsigned char *y;
246     int buf_r1, buf_r2, buf_c1;
247 
248     /* Clamping to avoid out-of-range data access */
249     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
250     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
251     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
252     y_stride = 32;
253 
254     /* Copy to intermediate buffer before searching. */
255     vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
256     y = xd->y_buf + y_stride*buf_r1 +buf_c1;
257 #else
258     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
259     y_stride = pre_stride;
260 #endif
261 
262     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
263 
264     /* central mv */
265     bestmv->as_mv.row *= 8;
266     bestmv->as_mv.col *= 8;
267 
268     /* calculate central point error */
269     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
270     *distortion = besterr;
271     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
272 
273     /* TODO: Each subsequent iteration checks at least one point in common
274      * with the last iteration could be 2 ( if diag selected)
275      */
276     while (--halfiters)
277     {
278         /* 1/2 pel */
279         CHECK_BETTER(left, tr, tc - 2);
280         CHECK_BETTER(right, tr, tc + 2);
281         CHECK_BETTER(up, tr - 2, tc);
282         CHECK_BETTER(down, tr + 2, tc);
283 
284         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
285 
286         switch (whichdir)
287         {
288         case 0:
289             CHECK_BETTER(diag, tr - 2, tc - 2);
290             break;
291         case 1:
292             CHECK_BETTER(diag, tr - 2, tc + 2);
293             break;
294         case 2:
295             CHECK_BETTER(diag, tr + 2, tc - 2);
296             break;
297         case 3:
298             CHECK_BETTER(diag, tr + 2, tc + 2);
299             break;
300         }
301 
302         /* no reason to check the same one again. */
303         if (tr == br && tc == bc)
304             break;
305 
306         tr = br;
307         tc = bc;
308     }
309 
310     /* TODO: Each subsequent iteration checks at least one point in common
311      * with the last iteration could be 2 ( if diag selected)
312      */
313 
314     /* 1/4 pel */
315     while (--quarteriters)
316     {
317         CHECK_BETTER(left, tr, tc - 1);
318         CHECK_BETTER(right, tr, tc + 1);
319         CHECK_BETTER(up, tr - 1, tc);
320         CHECK_BETTER(down, tr + 1, tc);
321 
322         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
323 
324         switch (whichdir)
325         {
326         case 0:
327             CHECK_BETTER(diag, tr - 1, tc - 1);
328             break;
329         case 1:
330             CHECK_BETTER(diag, tr - 1, tc + 1);
331             break;
332         case 2:
333             CHECK_BETTER(diag, tr + 1, tc - 1);
334             break;
335         case 3:
336             CHECK_BETTER(diag, tr + 1, tc + 1);
337             break;
338         }
339 
340         /* no reason to check the same one again. */
341         if (tr == br && tc == bc)
342             break;
343 
344         tr = br;
345         tc = bc;
346     }
347 
348     bestmv->as_mv.row = br * 2;
349     bestmv->as_mv.col = bc * 2;
350 
351     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
352         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
353         return INT_MAX;
354 
355     return besterr;
356 }
357 #undef MVC
358 #undef PRE
359 #undef SP
360 #undef DIST
361 #undef IFMVCV
362 #undef ERR
363 #undef CHECK_BETTER
364 
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)365 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
366                                  int_mv *bestmv, int_mv *ref_mv,
367                                  int error_per_bit,
368                                  const vp8_variance_fn_ptr_t *vfp,
369                                  int *mvcost[2], int *distortion,
370                                  unsigned int *sse1)
371 {
372     int bestmse = INT_MAX;
373     int_mv startmv;
374     int_mv this_mv;
375     unsigned char *z = (*(b->base_src) + b->src);
376     int left, right, up, down, diag;
377     unsigned int sse;
378     int whichdir ;
379     int thismse;
380     int y_stride;
381     int pre_stride = x->e_mbd.pre.y_stride;
382     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
383 
384 #if ARCH_X86 || ARCH_X86_64
385     MACROBLOCKD *xd = &x->e_mbd;
386     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
387     unsigned char *y;
388 
389     y_stride = 32;
390     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
391      vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
392      y = xd->y_buf + y_stride + 1;
393 #else
394      unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
395      y_stride = pre_stride;
396 #endif
397 
398     /* central mv */
399     bestmv->as_mv.row *= 8;
400     bestmv->as_mv.col *= 8;
401     startmv = *bestmv;
402 
403     /* calculate central point error */
404     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
405     *distortion = bestmse;
406     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
407 
408     /* go left then right and check error */
409     this_mv.as_mv.row = startmv.as_mv.row;
410     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
411     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
412     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
413 
414     if (left < bestmse)
415     {
416         *bestmv = this_mv;
417         bestmse = left;
418         *distortion = thismse;
419         *sse1 = sse;
420     }
421 
422     this_mv.as_mv.col += 8;
423     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
424     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
425 
426     if (right < bestmse)
427     {
428         *bestmv = this_mv;
429         bestmse = right;
430         *distortion = thismse;
431         *sse1 = sse;
432     }
433 
434     /* go up then down and check error */
435     this_mv.as_mv.col = startmv.as_mv.col;
436     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
437     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
438     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
439 
440     if (up < bestmse)
441     {
442         *bestmv = this_mv;
443         bestmse = up;
444         *distortion = thismse;
445         *sse1 = sse;
446     }
447 
448     this_mv.as_mv.row += 8;
449     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
450     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
451 
452     if (down < bestmse)
453     {
454         *bestmv = this_mv;
455         bestmse = down;
456         *distortion = thismse;
457         *sse1 = sse;
458     }
459 
460 
461     /* now check 1 more diagonal */
462     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
463     this_mv = startmv;
464 
465     switch (whichdir)
466     {
467     case 0:
468         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
469         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
470         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
471         break;
472     case 1:
473         this_mv.as_mv.col += 4;
474         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
475         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
476         break;
477     case 2:
478         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
479         this_mv.as_mv.row += 4;
480         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
481         break;
482     case 3:
483     default:
484         this_mv.as_mv.col += 4;
485         this_mv.as_mv.row += 4;
486         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
487         break;
488     }
489 
490     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
491 
492     if (diag < bestmse)
493     {
494         *bestmv = this_mv;
495         bestmse = diag;
496         *distortion = thismse;
497         *sse1 = sse;
498     }
499 
500 
501     /* time to check quarter pels. */
502     if (bestmv->as_mv.row < startmv.as_mv.row)
503         y -= y_stride;
504 
505     if (bestmv->as_mv.col < startmv.as_mv.col)
506         y--;
507 
508     startmv = *bestmv;
509 
510 
511 
512     /* go left then right and check error */
513     this_mv.as_mv.row = startmv.as_mv.row;
514 
515     if (startmv.as_mv.col & 7)
516     {
517         this_mv.as_mv.col = startmv.as_mv.col - 2;
518         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
519     }
520     else
521     {
522         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
523         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
524     }
525 
526     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
527 
528     if (left < bestmse)
529     {
530         *bestmv = this_mv;
531         bestmse = left;
532         *distortion = thismse;
533         *sse1 = sse;
534     }
535 
536     this_mv.as_mv.col += 4;
537     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
538     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
539 
540     if (right < bestmse)
541     {
542         *bestmv = this_mv;
543         bestmse = right;
544         *distortion = thismse;
545         *sse1 = sse;
546     }
547 
548     /* go up then down and check error */
549     this_mv.as_mv.col = startmv.as_mv.col;
550 
551     if (startmv.as_mv.row & 7)
552     {
553         this_mv.as_mv.row = startmv.as_mv.row - 2;
554         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
555     }
556     else
557     {
558         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
559         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
560     }
561 
562     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
563 
564     if (up < bestmse)
565     {
566         *bestmv = this_mv;
567         bestmse = up;
568         *distortion = thismse;
569         *sse1 = sse;
570     }
571 
572     this_mv.as_mv.row += 4;
573     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
574     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
575 
576     if (down < bestmse)
577     {
578         *bestmv = this_mv;
579         bestmse = down;
580         *distortion = thismse;
581         *sse1 = sse;
582     }
583 
584 
585     /* now check 1 more diagonal */
586     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
587 
588     this_mv = startmv;
589 
590     switch (whichdir)
591     {
592     case 0:
593 
594         if (startmv.as_mv.row & 7)
595         {
596             this_mv.as_mv.row -= 2;
597 
598             if (startmv.as_mv.col & 7)
599             {
600                 this_mv.as_mv.col -= 2;
601                 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
602             }
603             else
604             {
605                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
606                 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
607             }
608         }
609         else
610         {
611             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
612 
613             if (startmv.as_mv.col & 7)
614             {
615                 this_mv.as_mv.col -= 2;
616                 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
617             }
618             else
619             {
620                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
621                 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
622             }
623         }
624 
625         break;
626     case 1:
627         this_mv.as_mv.col += 2;
628 
629         if (startmv.as_mv.row & 7)
630         {
631             this_mv.as_mv.row -= 2;
632             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
633         }
634         else
635         {
636             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
637             thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
638         }
639 
640         break;
641     case 2:
642         this_mv.as_mv.row += 2;
643 
644         if (startmv.as_mv.col & 7)
645         {
646             this_mv.as_mv.col -= 2;
647             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
648         }
649         else
650         {
651             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
652             thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
653         }
654 
655         break;
656     case 3:
657         this_mv.as_mv.col += 2;
658         this_mv.as_mv.row += 2;
659         thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
660         break;
661     }
662 
663     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
664 
665     if (diag < bestmse)
666     {
667         *bestmv = this_mv;
668         bestmse = diag;
669         *distortion = thismse;
670         *sse1 = sse;
671     }
672 
673     return bestmse;
674 }
675 
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)676 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
677                                   int_mv *bestmv, int_mv *ref_mv,
678                                   int error_per_bit,
679                                   const vp8_variance_fn_ptr_t *vfp,
680                                   int *mvcost[2], int *distortion,
681                                   unsigned int *sse1)
682 {
683     int bestmse = INT_MAX;
684     int_mv startmv;
685     int_mv this_mv;
686     unsigned char *z = (*(b->base_src) + b->src);
687     int left, right, up, down, diag;
688     unsigned int sse;
689     int whichdir ;
690     int thismse;
691     int y_stride;
692     int pre_stride = x->e_mbd.pre.y_stride;
693     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
694 
695 #if ARCH_X86 || ARCH_X86_64
696     MACROBLOCKD *xd = &x->e_mbd;
697     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
698     unsigned char *y;
699 
700     y_stride = 32;
701     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
702     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
703     y = xd->y_buf + y_stride + 1;
704 #else
705     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
706     y_stride = pre_stride;
707 #endif
708 
709     /* central mv */
710     bestmv->as_mv.row *= 8;
711     bestmv->as_mv.col *= 8;
712     startmv = *bestmv;
713 
714     /* calculate central point error */
715     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
716     *distortion = bestmse;
717     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
718 
719     /* go left then right and check error */
720     this_mv.as_mv.row = startmv.as_mv.row;
721     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
722     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
723     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
724 
725     if (left < bestmse)
726     {
727         *bestmv = this_mv;
728         bestmse = left;
729         *distortion = thismse;
730         *sse1 = sse;
731     }
732 
733     this_mv.as_mv.col += 8;
734     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
735     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
736 
737     if (right < bestmse)
738     {
739         *bestmv = this_mv;
740         bestmse = right;
741         *distortion = thismse;
742         *sse1 = sse;
743     }
744 
745     /* go up then down and check error */
746     this_mv.as_mv.col = startmv.as_mv.col;
747     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
748     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
749     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
750 
751     if (up < bestmse)
752     {
753         *bestmv = this_mv;
754         bestmse = up;
755         *distortion = thismse;
756         *sse1 = sse;
757     }
758 
759     this_mv.as_mv.row += 8;
760     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
761     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
762 
763     if (down < bestmse)
764     {
765         *bestmv = this_mv;
766         bestmse = down;
767         *distortion = thismse;
768         *sse1 = sse;
769     }
770 
771     /* now check 1 more diagonal - */
772     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
773     this_mv = startmv;
774 
775     switch (whichdir)
776     {
777     case 0:
778         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
779         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
780         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
781         break;
782     case 1:
783         this_mv.as_mv.col += 4;
784         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
785         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
786         break;
787     case 2:
788         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
789         this_mv.as_mv.row += 4;
790         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
791         break;
792     case 3:
793     default:
794         this_mv.as_mv.col += 4;
795         this_mv.as_mv.row += 4;
796         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
797         break;
798     }
799 
800     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
801 
802     if (diag < bestmse)
803     {
804         *bestmv = this_mv;
805         bestmse = diag;
806         *distortion = thismse;
807         *sse1 = sse;
808     }
809 
810     return bestmse;
811 }
812 
813 #define CHECK_BOUNDS(range) \
814 {\
815     all_in = 1;\
816     all_in &= ((br-range) >= x->mv_row_min);\
817     all_in &= ((br+range) <= x->mv_row_max);\
818     all_in &= ((bc-range) >= x->mv_col_min);\
819     all_in &= ((bc+range) <= x->mv_col_max);\
820 }
821 
822 #define CHECK_POINT \
823 {\
824     if (this_mv.as_mv.col < x->mv_col_min) continue;\
825     if (this_mv.as_mv.col > x->mv_col_max) continue;\
826     if (this_mv.as_mv.row < x->mv_row_min) continue;\
827     if (this_mv.as_mv.row > x->mv_row_max) continue;\
828 }
829 
830 #define CHECK_BETTER \
831 {\
832     if (thissad < bestsad)\
833     {\
834         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
835         if (thissad < bestsad)\
836         {\
837             bestsad = thissad;\
838             best_site = i;\
839         }\
840     }\
841 }
842 
843 static const MV next_chkpts[6][3] =
844 {
845     {{ -2, 0}, { -1, -2}, {1, -2}},
846     {{ -1, -2}, {1, -2}, {2, 0}},
847     {{1, -2}, {2, 0}, {1, 2}},
848     {{2, 0}, {1, 2}, { -1, 2}},
849     {{1, 2}, { -1, 2}, { -2, 0}},
850     {{ -1, 2}, { -2, 0}, { -1, -2}}
851 };
852 
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)853 int vp8_hex_search
854 (
855     MACROBLOCK *x,
856     BLOCK *b,
857     BLOCKD *d,
858     int_mv *ref_mv,
859     int_mv *best_mv,
860     int search_param,
861     int sad_per_bit,
862     const vp8_variance_fn_ptr_t *vfp,
863     int *mvsadcost[2],
864     int *mvcost[2],
865     int_mv *center_mv
866 )
867 {
868     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
869     MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
870     int i, j;
871 
872     unsigned char *what = (*(b->base_src) + b->src);
873     int what_stride = b->src_stride;
874     int pre_stride = x->e_mbd.pre.y_stride;
875     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
876 
877     int in_what_stride = pre_stride;
878     int br, bc;
879     int_mv this_mv;
880     unsigned int bestsad;
881     unsigned int thissad;
882     unsigned char *base_offset;
883     unsigned char *this_offset;
884     int k = -1;
885     int all_in;
886     int best_site = -1;
887     int hex_range = 127;
888     int dia_range = 8;
889 
890     int_mv fcenter_mv;
891     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
892     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
893 
894     (void)mvcost;
895 
896     /* adjust ref_mv to make sure it is within MV range */
897     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
898     br = ref_mv->as_mv.row;
899     bc = ref_mv->as_mv.col;
900 
901     /* Work out the start point for the search */
902     base_offset = (unsigned char *)(base_pre + d->offset);
903     this_offset = base_offset + (br * (pre_stride)) + bc;
904     this_mv.as_mv.row = br;
905     this_mv.as_mv.col = bc;
906     bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride)
907             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
908 
909 #if CONFIG_MULTI_RES_ENCODING
910     /* Lower search range based on prediction info */
911     if (search_param >= 6) goto cal_neighbors;
912     else if (search_param >= 5) hex_range = 4;
913     else if (search_param >= 4) hex_range = 6;
914     else if (search_param >= 3) hex_range = 15;
915     else if (search_param >= 2) hex_range = 31;
916     else if (search_param >= 1) hex_range = 63;
917 
918     dia_range = 8;
919 #else
920     (void)search_param;
921 #endif
922 
923     /* hex search */
924     CHECK_BOUNDS(2)
925 
926     if(all_in)
927     {
928         for (i = 0; i < 6; i++)
929         {
930             this_mv.as_mv.row = br + hex[i].row;
931             this_mv.as_mv.col = bc + hex[i].col;
932             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
933             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
934             CHECK_BETTER
935         }
936     }else
937     {
938         for (i = 0; i < 6; i++)
939         {
940             this_mv.as_mv.row = br + hex[i].row;
941             this_mv.as_mv.col = bc + hex[i].col;
942             CHECK_POINT
943             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
944             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
945             CHECK_BETTER
946         }
947     }
948 
949     if (best_site == -1)
950         goto cal_neighbors;
951     else
952     {
953         br += hex[best_site].row;
954         bc += hex[best_site].col;
955         k = best_site;
956     }
957 
958     for (j = 1; j < hex_range; j++)
959     {
960         best_site = -1;
961         CHECK_BOUNDS(2)
962 
963         if(all_in)
964         {
965             for (i = 0; i < 3; i++)
966             {
967                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
968                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
969                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
970                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
971                 CHECK_BETTER
972             }
973         }else
974         {
975             for (i = 0; i < 3; i++)
976             {
977                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
978                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
979                 CHECK_POINT
980                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
981                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
982                 CHECK_BETTER
983             }
984         }
985 
986         if (best_site == -1)
987             break;
988         else
989         {
990             br += next_chkpts[k][best_site].row;
991             bc += next_chkpts[k][best_site].col;
992             k += 5 + best_site;
993             if (k >= 12) k -= 12;
994             else if (k >= 6) k -= 6;
995         }
996     }
997 
998     /* check 4 1-away neighbors */
999 cal_neighbors:
1000     for (j = 0; j < dia_range; j++)
1001     {
1002         best_site = -1;
1003         CHECK_BOUNDS(1)
1004 
1005         if(all_in)
1006         {
1007             for (i = 0; i < 4; i++)
1008             {
1009                 this_mv.as_mv.row = br + neighbors[i].row;
1010                 this_mv.as_mv.col = bc + neighbors[i].col;
1011                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1012                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1013                 CHECK_BETTER
1014             }
1015         }else
1016         {
1017             for (i = 0; i < 4; i++)
1018             {
1019                 this_mv.as_mv.row = br + neighbors[i].row;
1020                 this_mv.as_mv.col = bc + neighbors[i].col;
1021                 CHECK_POINT
1022                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1023                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1024                 CHECK_BETTER
1025             }
1026         }
1027 
1028         if (best_site == -1)
1029             break;
1030         else
1031         {
1032             br += neighbors[best_site].row;
1033             bc += neighbors[best_site].col;
1034         }
1035     }
1036 
1037     best_mv->as_mv.row = br;
1038     best_mv->as_mv.col = bc;
1039 
1040     return bestsad;
1041 }
1042 #undef CHECK_BOUNDS
1043 #undef CHECK_POINT
1044 #undef CHECK_BETTER
1045 
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1046 int vp8_diamond_search_sad_c
1047 (
1048     MACROBLOCK *x,
1049     BLOCK *b,
1050     BLOCKD *d,
1051     int_mv *ref_mv,
1052     int_mv *best_mv,
1053     int search_param,
1054     int sad_per_bit,
1055     int *num00,
1056     vp8_variance_fn_ptr_t *fn_ptr,
1057     int *mvcost[2],
1058     int_mv *center_mv
1059 )
1060 {
1061     int i, j, step;
1062 
1063     unsigned char *what = (*(b->base_src) + b->src);
1064     int what_stride = b->src_stride;
1065     unsigned char *in_what;
1066     int pre_stride = x->e_mbd.pre.y_stride;
1067     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1068     int in_what_stride = pre_stride;
1069     unsigned char *best_address;
1070 
1071     int tot_steps;
1072     int_mv this_mv;
1073 
1074     unsigned int bestsad;
1075     unsigned int thissad;
1076     int best_site = 0;
1077     int last_site = 0;
1078 
1079     int ref_row;
1080     int ref_col;
1081     int this_row_offset;
1082     int this_col_offset;
1083     search_site *ss;
1084 
1085     unsigned char *check_here;
1086 
1087     int *mvsadcost[2];
1088     int_mv fcenter_mv;
1089 
1090     mvsadcost[0] = x->mvsadcost[0];
1091     mvsadcost[1] = x->mvsadcost[1];
1092     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1093     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1094 
1095     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1096     ref_row = ref_mv->as_mv.row;
1097     ref_col = ref_mv->as_mv.col;
1098     *num00 = 0;
1099     best_mv->as_mv.row = ref_row;
1100     best_mv->as_mv.col = ref_col;
1101 
1102     /* Work out the start point for the search */
1103     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
1104     best_address = in_what;
1105 
1106     /* Check the starting position */
1107     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
1108             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1109 
1110     /* search_param determines the length of the initial step and hence
1111      * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1112      * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1113      */
1114     ss = &x->ss[search_param * x->searches_per_step];
1115     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1116 
1117     i = 1;
1118 
1119     for (step = 0; step < tot_steps ; step++)
1120     {
1121         for (j = 0 ; j < x->searches_per_step ; j++)
1122         {
1123             /* Trap illegal vectors */
1124             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1125             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1126 
1127             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1128             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1129 
1130             {
1131                 check_here = ss[i].offset + best_address;
1132                 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1133 
1134                 if (thissad < bestsad)
1135                 {
1136                     this_mv.as_mv.row = this_row_offset;
1137                     this_mv.as_mv.col = this_col_offset;
1138                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1139                                               mvsadcost, sad_per_bit);
1140 
1141                     if (thissad < bestsad)
1142                     {
1143                         bestsad = thissad;
1144                         best_site = i;
1145                     }
1146                 }
1147             }
1148 
1149             i++;
1150         }
1151 
1152         if (best_site != last_site)
1153         {
1154             best_mv->as_mv.row += ss[best_site].mv.row;
1155             best_mv->as_mv.col += ss[best_site].mv.col;
1156             best_address += ss[best_site].offset;
1157             last_site = best_site;
1158         }
1159         else if (best_address == in_what)
1160             (*num00)++;
1161     }
1162 
1163     this_mv.as_mv.row = best_mv->as_mv.row << 3;
1164     this_mv.as_mv.col = best_mv->as_mv.col << 3;
1165 
1166     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1167            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1168 }
1169 
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1170 int vp8_diamond_search_sadx4
1171 (
1172     MACROBLOCK *x,
1173     BLOCK *b,
1174     BLOCKD *d,
1175     int_mv *ref_mv,
1176     int_mv *best_mv,
1177     int search_param,
1178     int sad_per_bit,
1179     int *num00,
1180     vp8_variance_fn_ptr_t *fn_ptr,
1181     int *mvcost[2],
1182     int_mv *center_mv
1183 )
1184 {
1185     int i, j, step;
1186 
1187     unsigned char *what = (*(b->base_src) + b->src);
1188     int what_stride = b->src_stride;
1189     unsigned char *in_what;
1190     int pre_stride = x->e_mbd.pre.y_stride;
1191     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1192     int in_what_stride = pre_stride;
1193     unsigned char *best_address;
1194 
1195     int tot_steps;
1196     int_mv this_mv;
1197 
1198     unsigned int bestsad;
1199     unsigned int thissad;
1200     int best_site = 0;
1201     int last_site = 0;
1202 
1203     int ref_row;
1204     int ref_col;
1205     int this_row_offset;
1206     int this_col_offset;
1207     search_site *ss;
1208 
1209     unsigned char *check_here;
1210 
1211     int *mvsadcost[2];
1212     int_mv fcenter_mv;
1213 
1214     mvsadcost[0] = x->mvsadcost[0];
1215     mvsadcost[1] = x->mvsadcost[1];
1216     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1217     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1218 
1219     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1220     ref_row = ref_mv->as_mv.row;
1221     ref_col = ref_mv->as_mv.col;
1222     *num00 = 0;
1223     best_mv->as_mv.row = ref_row;
1224     best_mv->as_mv.col = ref_col;
1225 
1226     /* Work out the start point for the search */
1227     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
1228     best_address = in_what;
1229 
1230     /* Check the starting position */
1231     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
1232             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1233 
1234     /* search_param determines the length of the initial step and hence the
1235      * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1236      * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1237      */
1238     ss = &x->ss[search_param * x->searches_per_step];
1239     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1240 
1241     i = 1;
1242 
1243     for (step = 0; step < tot_steps ; step++)
1244     {
1245         int all_in = 1, t;
1246 
1247         /* To know if all neighbor points are within the bounds, 4 bounds
1248          * checking are enough instead of checking 4 bounds for each
1249          * points.
1250          */
1251         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
1252         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
1253         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
1254         all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
1255 
1256         if (all_in)
1257         {
1258             unsigned int sad_array[4];
1259 
1260             for (j = 0 ; j < x->searches_per_step ; j += 4)
1261             {
1262                 const unsigned char *block_offset[4];
1263 
1264                 for (t = 0; t < 4; t++)
1265                     block_offset[t] = ss[i+t].offset + best_address;
1266 
1267                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1268 
1269                 for (t = 0; t < 4; t++, i++)
1270                 {
1271                     if (sad_array[t] < bestsad)
1272                     {
1273                         this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1274                         this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1275                         sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1276                                                        mvsadcost, sad_per_bit);
1277 
1278                         if (sad_array[t] < bestsad)
1279                         {
1280                             bestsad = sad_array[t];
1281                             best_site = i;
1282                         }
1283                     }
1284                 }
1285             }
1286         }
1287         else
1288         {
1289             for (j = 0 ; j < x->searches_per_step ; j++)
1290             {
1291                 /* Trap illegal vectors */
1292                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1293                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1294 
1295                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1296                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1297                 {
1298                     check_here = ss[i].offset + best_address;
1299                     thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1300 
1301                     if (thissad < bestsad)
1302                     {
1303                         this_mv.as_mv.row = this_row_offset;
1304                         this_mv.as_mv.col = this_col_offset;
1305                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1306                                                   mvsadcost, sad_per_bit);
1307 
1308                         if (thissad < bestsad)
1309                         {
1310                             bestsad = thissad;
1311                             best_site = i;
1312                         }
1313                     }
1314                 }
1315                 i++;
1316             }
1317         }
1318 
1319         if (best_site != last_site)
1320         {
1321             best_mv->as_mv.row += ss[best_site].mv.row;
1322             best_mv->as_mv.col += ss[best_site].mv.col;
1323             best_address += ss[best_site].offset;
1324             last_site = best_site;
1325         }
1326         else if (best_address == in_what)
1327             (*num00)++;
1328     }
1329 
1330     this_mv.as_mv.row = best_mv->as_mv.row * 8;
1331     this_mv.as_mv.col = best_mv->as_mv.col * 8;
1332 
1333     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1334            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1335 }
1336 
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1337 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1338                         int sad_per_bit, int distance,
1339                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1340                         int_mv *center_mv)
1341 {
1342     unsigned char *what = (*(b->base_src) + b->src);
1343     int what_stride = b->src_stride;
1344     unsigned char *in_what;
1345     int pre_stride = x->e_mbd.pre.y_stride;
1346     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1347     int in_what_stride = pre_stride;
1348     int mv_stride = pre_stride;
1349     unsigned char *bestaddress;
1350     int_mv *best_mv = &d->bmi.mv;
1351     int_mv this_mv;
1352     unsigned int bestsad;
1353     unsigned int thissad;
1354     int r, c;
1355 
1356     unsigned char *check_here;
1357 
1358     int ref_row = ref_mv->as_mv.row;
1359     int ref_col = ref_mv->as_mv.col;
1360 
1361     int row_min = ref_row - distance;
1362     int row_max = ref_row + distance;
1363     int col_min = ref_col - distance;
1364     int col_max = ref_col + distance;
1365 
1366     int *mvsadcost[2];
1367     int_mv fcenter_mv;
1368 
1369     mvsadcost[0] = x->mvsadcost[0];
1370     mvsadcost[1] = x->mvsadcost[1];
1371     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1372     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1373 
1374     /* Work out the mid point for the search */
1375     in_what = base_pre + d->offset;
1376     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1377 
1378     best_mv->as_mv.row = ref_row;
1379     best_mv->as_mv.col = ref_col;
1380 
1381     /* Baseline value at the centre */
1382     bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
1383             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1384 
1385     /* Apply further limits to prevent us looking using vectors that
1386      * stretch beyiond the UMV border
1387      */
1388     if (col_min < x->mv_col_min)
1389         col_min = x->mv_col_min;
1390 
1391     if (col_max > x->mv_col_max)
1392         col_max = x->mv_col_max;
1393 
1394     if (row_min < x->mv_row_min)
1395         row_min = x->mv_row_min;
1396 
1397     if (row_max > x->mv_row_max)
1398         row_max = x->mv_row_max;
1399 
1400     for (r = row_min; r < row_max ; r++)
1401     {
1402         this_mv.as_mv.row = r;
1403         check_here = r * mv_stride + in_what + col_min;
1404 
1405         for (c = col_min; c < col_max; c++)
1406         {
1407             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1408 
1409             this_mv.as_mv.col = c;
1410             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1411                                       mvsadcost, sad_per_bit);
1412 
1413             if (thissad < bestsad)
1414             {
1415                 bestsad = thissad;
1416                 best_mv->as_mv.row = r;
1417                 best_mv->as_mv.col = c;
1418                 bestaddress = check_here;
1419             }
1420 
1421             check_here++;
1422         }
1423     }
1424 
1425     this_mv.as_mv.row = best_mv->as_mv.row << 3;
1426     this_mv.as_mv.col = best_mv->as_mv.col << 3;
1427 
1428     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1429            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1430 }
1431 
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1432 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1433                           int sad_per_bit, int distance,
1434                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1435                           int_mv *center_mv)
1436 {
1437     unsigned char *what = (*(b->base_src) + b->src);
1438     int what_stride = b->src_stride;
1439     unsigned char *in_what;
1440     int pre_stride = x->e_mbd.pre.y_stride;
1441     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1442     int in_what_stride = pre_stride;
1443     int mv_stride = pre_stride;
1444     unsigned char *bestaddress;
1445     int_mv *best_mv = &d->bmi.mv;
1446     int_mv this_mv;
1447     unsigned int bestsad;
1448     unsigned int thissad;
1449     int r, c;
1450 
1451     unsigned char *check_here;
1452 
1453     int ref_row = ref_mv->as_mv.row;
1454     int ref_col = ref_mv->as_mv.col;
1455 
1456     int row_min = ref_row - distance;
1457     int row_max = ref_row + distance;
1458     int col_min = ref_col - distance;
1459     int col_max = ref_col + distance;
1460 
1461     unsigned int sad_array[3];
1462 
1463     int *mvsadcost[2];
1464     int_mv fcenter_mv;
1465 
1466     mvsadcost[0] = x->mvsadcost[0];
1467     mvsadcost[1] = x->mvsadcost[1];
1468     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1469     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1470 
1471     /* Work out the mid point for the search */
1472     in_what = base_pre + d->offset;
1473     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1474 
1475     best_mv->as_mv.row = ref_row;
1476     best_mv->as_mv.col = ref_col;
1477 
1478     /* Baseline value at the centre */
1479     bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
1480             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1481 
1482     /* Apply further limits to prevent us looking using vectors that stretch
1483      * beyond the UMV border
1484      */
1485     if (col_min < x->mv_col_min)
1486         col_min = x->mv_col_min;
1487 
1488     if (col_max > x->mv_col_max)
1489         col_max = x->mv_col_max;
1490 
1491     if (row_min < x->mv_row_min)
1492         row_min = x->mv_row_min;
1493 
1494     if (row_max > x->mv_row_max)
1495         row_max = x->mv_row_max;
1496 
1497     for (r = row_min; r < row_max ; r++)
1498     {
1499         this_mv.as_mv.row = r;
1500         check_here = r * mv_stride + in_what + col_min;
1501         c = col_min;
1502 
1503         while ((c + 2) < col_max)
1504         {
1505             int i;
1506 
1507             fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1508 
1509             for (i = 0; i < 3; i++)
1510             {
1511                 thissad = sad_array[i];
1512 
1513                 if (thissad < bestsad)
1514                 {
1515                     this_mv.as_mv.col = c;
1516                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1517                                               mvsadcost, sad_per_bit);
1518 
1519                     if (thissad < bestsad)
1520                     {
1521                         bestsad = thissad;
1522                         best_mv->as_mv.row = r;
1523                         best_mv->as_mv.col = c;
1524                         bestaddress = check_here;
1525                     }
1526                 }
1527 
1528                 check_here++;
1529                 c++;
1530             }
1531         }
1532 
1533         while (c < col_max)
1534         {
1535             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1536 
1537             if (thissad < bestsad)
1538             {
1539                 this_mv.as_mv.col = c;
1540                 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1541                                           mvsadcost, sad_per_bit);
1542 
1543                 if (thissad < bestsad)
1544                 {
1545                     bestsad = thissad;
1546                     best_mv->as_mv.row = r;
1547                     best_mv->as_mv.col = c;
1548                     bestaddress = check_here;
1549                 }
1550             }
1551 
1552             check_here ++;
1553             c ++;
1554         }
1555 
1556     }
1557 
1558     this_mv.as_mv.row = best_mv->as_mv.row << 3;
1559     this_mv.as_mv.col = best_mv->as_mv.col << 3;
1560 
1561     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1562            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1563 }
1564 
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1565 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1566                           int sad_per_bit, int distance,
1567                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1568                           int_mv *center_mv)
1569 {
1570     unsigned char *what = (*(b->base_src) + b->src);
1571     int what_stride = b->src_stride;
1572     int pre_stride = x->e_mbd.pre.y_stride;
1573     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1574     unsigned char *in_what;
1575     int in_what_stride = pre_stride;
1576     int mv_stride = pre_stride;
1577     unsigned char *bestaddress;
1578     int_mv *best_mv = &d->bmi.mv;
1579     int_mv this_mv;
1580     unsigned int bestsad;
1581     unsigned int thissad;
1582     int r, c;
1583 
1584     unsigned char *check_here;
1585 
1586     int ref_row = ref_mv->as_mv.row;
1587     int ref_col = ref_mv->as_mv.col;
1588 
1589     int row_min = ref_row - distance;
1590     int row_max = ref_row + distance;
1591     int col_min = ref_col - distance;
1592     int col_max = ref_col + distance;
1593 
1594     // TODO(johannkoenig): check if this alignment is necessary.
1595     DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1596     unsigned int sad_array[3];
1597 
1598     int *mvsadcost[2];
1599     int_mv fcenter_mv;
1600 
1601     mvsadcost[0] = x->mvsadcost[0];
1602     mvsadcost[1] = x->mvsadcost[1];
1603     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1604     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1605 
1606     /* Work out the mid point for the search */
1607     in_what = base_pre + d->offset;
1608     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1609 
1610     best_mv->as_mv.row = ref_row;
1611     best_mv->as_mv.col = ref_col;
1612 
1613     /* Baseline value at the centre */
1614     bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
1615             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1616 
1617     /* Apply further limits to prevent us looking using vectors that stretch
1618      * beyond the UMV border
1619      */
1620     if (col_min < x->mv_col_min)
1621         col_min = x->mv_col_min;
1622 
1623     if (col_max > x->mv_col_max)
1624         col_max = x->mv_col_max;
1625 
1626     if (row_min < x->mv_row_min)
1627         row_min = x->mv_row_min;
1628 
1629     if (row_max > x->mv_row_max)
1630         row_max = x->mv_row_max;
1631 
1632     for (r = row_min; r < row_max ; r++)
1633     {
1634         this_mv.as_mv.row = r;
1635         check_here = r * mv_stride + in_what + col_min;
1636         c = col_min;
1637 
1638         while ((c + 7) < col_max)
1639         {
1640             int i;
1641 
1642             fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1643 
1644             for (i = 0; i < 8; i++)
1645             {
1646                 thissad = sad_array8[i];
1647 
1648                 if (thissad < bestsad)
1649                 {
1650                     this_mv.as_mv.col = c;
1651                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1652                                               mvsadcost, sad_per_bit);
1653 
1654                     if (thissad < bestsad)
1655                     {
1656                         bestsad = thissad;
1657                         best_mv->as_mv.row = r;
1658                         best_mv->as_mv.col = c;
1659                         bestaddress = check_here;
1660                     }
1661                 }
1662 
1663                 check_here++;
1664                 c++;
1665             }
1666         }
1667 
1668         while ((c + 2) < col_max)
1669         {
1670             int i;
1671 
1672             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1673 
1674             for (i = 0; i < 3; i++)
1675             {
1676                 thissad = sad_array[i];
1677 
1678                 if (thissad < bestsad)
1679                 {
1680                     this_mv.as_mv.col = c;
1681                     thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1682                         mvsadcost, sad_per_bit);
1683 
1684                     if (thissad < bestsad)
1685                     {
1686                         bestsad = thissad;
1687                         best_mv->as_mv.row = r;
1688                         best_mv->as_mv.col = c;
1689                         bestaddress = check_here;
1690                     }
1691                 }
1692 
1693                 check_here++;
1694                 c++;
1695             }
1696         }
1697 
1698         while (c < col_max)
1699         {
1700             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
1701 
1702             if (thissad < bestsad)
1703             {
1704                 this_mv.as_mv.col = c;
1705                 thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1706                     mvsadcost, sad_per_bit);
1707 
1708                 if (thissad < bestsad)
1709                 {
1710                     bestsad = thissad;
1711                     best_mv->as_mv.row = r;
1712                     best_mv->as_mv.col = c;
1713                     bestaddress = check_here;
1714                 }
1715             }
1716 
1717             check_here ++;
1718             c ++;
1719         }
1720     }
1721 
1722     this_mv.as_mv.row = best_mv->as_mv.row * 8;
1723     this_mv.as_mv.col = best_mv->as_mv.col * 8;
1724 
1725     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
1726            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1727 }
1728 
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1729 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1730                             int error_per_bit, int search_range,
1731                             vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1732                             int_mv *center_mv)
1733 {
1734     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1735     int i, j;
1736     short this_row_offset, this_col_offset;
1737 
1738     int what_stride = b->src_stride;
1739     int pre_stride = x->e_mbd.pre.y_stride;
1740     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1741     int in_what_stride = pre_stride;
1742     unsigned char *what = (*(b->base_src) + b->src);
1743     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
1744         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1745     unsigned char *check_here;
1746     int_mv this_mv;
1747     unsigned int bestsad;
1748     unsigned int thissad;
1749 
1750     int *mvsadcost[2];
1751     int_mv fcenter_mv;
1752 
1753     mvsadcost[0] = x->mvsadcost[0];
1754     mvsadcost[1] = x->mvsadcost[1];
1755     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1756     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1757 
1758     bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
1759             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1760 
1761     for (i=0; i<search_range; i++)
1762     {
1763         int best_site = -1;
1764 
1765         for (j = 0 ; j < 4 ; j++)
1766         {
1767             this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1768             this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1769 
1770             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1771             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1772             {
1773                 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1774                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
1775 
1776                 if (thissad < bestsad)
1777                 {
1778                     this_mv.as_mv.row = this_row_offset;
1779                     this_mv.as_mv.col = this_col_offset;
1780                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1781 
1782                     if (thissad < bestsad)
1783                     {
1784                         bestsad = thissad;
1785                         best_site = j;
1786                     }
1787                 }
1788             }
1789         }
1790 
1791         if (best_site == -1)
1792             break;
1793         else
1794         {
1795             ref_mv->as_mv.row += neighbors[best_site].row;
1796             ref_mv->as_mv.col += neighbors[best_site].col;
1797             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1798         }
1799     }
1800 
1801     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1802     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1803 
1804     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1805            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1806 }
1807 
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1808 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1809                               int_mv *ref_mv, int error_per_bit,
1810                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1811                               int *mvcost[2], int_mv *center_mv)
1812 {
1813     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1814     int i, j;
1815     short this_row_offset, this_col_offset;
1816 
1817     int what_stride = b->src_stride;
1818     int pre_stride = x->e_mbd.pre.y_stride;
1819     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1820     int in_what_stride = pre_stride;
1821     unsigned char *what = (*(b->base_src) + b->src);
1822     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
1823         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1824     unsigned char *check_here;
1825     int_mv this_mv;
1826     unsigned int bestsad;
1827     unsigned int thissad;
1828 
1829     int *mvsadcost[2];
1830     int_mv fcenter_mv;
1831 
1832     mvsadcost[0] = x->mvsadcost[0];
1833     mvsadcost[1] = x->mvsadcost[1];
1834     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1835     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1836 
1837     bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
1838             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1839 
1840     for (i=0; i<search_range; i++)
1841     {
1842         int best_site = -1;
1843         int all_in = 1;
1844 
1845         all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1846         all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1847         all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1848         all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1849 
1850         if(all_in)
1851         {
1852             unsigned int sad_array[4];
1853             const unsigned char *block_offset[4];
1854             block_offset[0] = best_address - in_what_stride;
1855             block_offset[1] = best_address - 1;
1856             block_offset[2] = best_address + 1;
1857             block_offset[3] = best_address + in_what_stride;
1858 
1859             fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1860 
1861             for (j = 0; j < 4; j++)
1862             {
1863                 if (sad_array[j] < bestsad)
1864                 {
1865                     this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1866                     this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1867                     sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1868 
1869                     if (sad_array[j] < bestsad)
1870                     {
1871                         bestsad = sad_array[j];
1872                         best_site = j;
1873                     }
1874                 }
1875             }
1876         }
1877         else
1878         {
1879             for (j = 0 ; j < 4 ; j++)
1880             {
1881                 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1882                 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1883 
1884                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1885                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1886                 {
1887                     check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1888                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
1889 
1890                     if (thissad < bestsad)
1891                     {
1892                         this_mv.as_mv.row = this_row_offset;
1893                         this_mv.as_mv.col = this_col_offset;
1894                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1895 
1896                         if (thissad < bestsad)
1897                         {
1898                             bestsad = thissad;
1899                             best_site = j;
1900                         }
1901                     }
1902                 }
1903             }
1904         }
1905 
1906         if (best_site == -1)
1907             break;
1908         else
1909         {
1910             ref_mv->as_mv.row += neighbors[best_site].row;
1911             ref_mv->as_mv.col += neighbors[best_site].col;
1912             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1913         }
1914     }
1915 
1916     this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1917     this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1918 
1919     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
1920            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1921 }
1922 
1923 #ifdef VP8_ENTROPY_STATS
print_mode_context(void)1924 void print_mode_context(void)
1925 {
1926     FILE *f = fopen("modecont.c", "w");
1927     int i, j;
1928 
1929     fprintf(f, "#include \"entropy.h\"\n");
1930     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1931     fprintf(f, "{\n");
1932 
1933     for (j = 0; j < 6; j++)
1934     {
1935         fprintf(f, "  { /* %d */\n", j);
1936         fprintf(f, "    ");
1937 
1938         for (i = 0; i < 4; i++)
1939         {
1940             int overal_prob;
1941             int this_prob;
1942             int count;
1943 
1944             /* Overall probs */
1945             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1946 
1947             if (count)
1948                 overal_prob = 256 * mv_mode_cts[i][0] / count;
1949             else
1950                 overal_prob = 128;
1951 
1952             if (overal_prob == 0)
1953                 overal_prob = 1;
1954 
1955             /* context probs */
1956             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1957 
1958             if (count)
1959                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
1960             else
1961                 this_prob = 128;
1962 
1963             if (this_prob == 0)
1964                 this_prob = 1;
1965 
1966             fprintf(f, "%5d, ", this_prob);
1967         }
1968 
1969         fprintf(f, "  },\n");
1970     }
1971 
1972     fprintf(f, "};\n");
1973     fclose(f);
1974 }
1975 
1976 /* MV ref count VP8_ENTROPY_STATS stats code */
1977 #ifdef VP8_ENTROPY_STATS
init_mv_ref_counts()1978 void init_mv_ref_counts()
1979 {
1980     memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1981     memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1982 }
1983 
accum_mv_refs(MB_PREDICTION_MODE m,const int ct[4])1984 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
1985 {
1986     if (m == ZEROMV)
1987     {
1988         ++mv_ref_ct [ct[0]] [0] [0];
1989         ++mv_mode_cts[0][0];
1990     }
1991     else
1992     {
1993         ++mv_ref_ct [ct[0]] [0] [1];
1994         ++mv_mode_cts[0][1];
1995 
1996         if (m == NEARESTMV)
1997         {
1998             ++mv_ref_ct [ct[1]] [1] [0];
1999             ++mv_mode_cts[1][0];
2000         }
2001         else
2002         {
2003             ++mv_ref_ct [ct[1]] [1] [1];
2004             ++mv_mode_cts[1][1];
2005 
2006             if (m == NEARMV)
2007             {
2008                 ++mv_ref_ct [ct[2]] [2] [0];
2009                 ++mv_mode_cts[2][0];
2010             }
2011             else
2012             {
2013                 ++mv_ref_ct [ct[2]] [2] [1];
2014                 ++mv_mode_cts[2][1];
2015 
2016                 if (m == NEWMV)
2017                 {
2018                     ++mv_ref_ct [ct[3]] [3] [0];
2019                     ++mv_mode_cts[3][0];
2020                 }
2021                 else
2022                 {
2023                     ++mv_ref_ct [ct[3]] [3] [1];
2024                     ++mv_mode_cts[3][1];
2025                 }
2026             }
2027         }
2028     }
2029 }
2030 
2031 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
2032 
2033 #endif
2034