1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /* contains
19 int AVCHalfPel1_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
20 int AVCHalfPel2_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width)
21 int AVCHalfPel1_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
22 int AVCHalfPel2_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width)
23 
24 int AVCSAD_MB_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
25 int AVCSAD_MB_HP_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
26 int AVCSAD_MB_HP_HTFM(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
27 int AVCSAD_Blk_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
28 */
29 
30 #include "avcenc_lib.h"
31 #include "sad_halfpel_inline.h"
32 
33 #ifdef _SAD_STAT
34 uint32 num_sad_HP_MB = 0;
35 uint32 num_sad_HP_Blk = 0;
36 uint32 num_sad_HP_MB_call = 0;
37 uint32 num_sad_HP_Blk_call = 0;
38 #define NUM_SAD_HP_MB_CALL()    num_sad_HP_MB_call++
39 #define NUM_SAD_HP_MB()         num_sad_HP_MB++
40 #define NUM_SAD_HP_BLK_CALL()   num_sad_HP_Blk_call++
41 #define NUM_SAD_HP_BLK()        num_sad_HP_Blk++
42 #else
43 #define NUM_SAD_HP_MB_CALL()
44 #define NUM_SAD_HP_MB()
45 #define NUM_SAD_HP_BLK_CALL()
46 #define NUM_SAD_HP_BLK()
47 #endif
48 
49 
50 
51 /*===============================================================
52     Function:   SAD_MB_HalfPel
53     Date:       09/17/2000
54     Purpose:    Compute the SAD on the half-pel resolution
55     Input/Output:   hmem is assumed to be a pointer to the starting
56                 point of the search in the 33x33 matrix search region
57     Changes:
58     11/7/00:    implemented MMX
59   ===============================================================*/
60 /*==================================================================
61     Function:   AVCSAD_MB_HalfPel_C
62     Date:       04/30/2001
63     Purpose:    Compute SAD 16x16 between blk and ref in halfpel
64                 resolution,
65     Changes:
66   ==================================================================*/
67 /* One component is half-pel */
AVCSAD_MB_HalfPel_Cxhyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)68 int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
69 {
70     (void)(extra_info);
71 
72     int i, j;
73     int sad = 0;
74     uint8 *kk, *p1, *p2, *p3, *p4;
75 //  int sumref=0;
76     int temp;
77     int rx = dmin_rx & 0xFFFF;
78 
79     NUM_SAD_HP_MB_CALL();
80 
81     p1 = ref;
82     p2 = ref + 1;
83     p3 = ref + rx;
84     p4 = ref + rx + 1;
85     kk  = blk;
86 
87     for (i = 0; i < 16; i++)
88     {
89         for (j = 0; j < 16; j++)
90         {
91 
92             temp = ((p1[j] + p2[j] + p3[j] + p4[j] + 2) >> 2) - *kk++;
93             sad += AVC_ABS(temp);
94         }
95 
96         NUM_SAD_HP_MB();
97 
98         if (sad > (int)((uint32)dmin_rx >> 16))
99             return sad;
100 
101         p1 += rx;
102         p3 += rx;
103         p2 += rx;
104         p4 += rx;
105     }
106     return sad;
107 }
108 
AVCSAD_MB_HalfPel_Cyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)109 int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
110 {
111     (void)(extra_info);
112 
113     int i, j;
114     int sad = 0;
115     uint8 *kk, *p1, *p2;
116 //  int sumref=0;
117     int temp;
118     int rx = dmin_rx & 0xFFFF;
119 
120     NUM_SAD_HP_MB_CALL();
121 
122     p1 = ref;
123     p2 = ref + rx; /* either left/right or top/bottom pixel */
124     kk  = blk;
125 
126     for (i = 0; i < 16; i++)
127     {
128         for (j = 0; j < 16; j++)
129         {
130 
131             temp = ((p1[j] + p2[j] + 1) >> 1) - *kk++;
132             sad += AVC_ABS(temp);
133         }
134 
135         NUM_SAD_HP_MB();
136 
137         if (sad > (int)((uint32)dmin_rx >> 16))
138             return sad;
139         p1 += rx;
140         p2 += rx;
141     }
142     return sad;
143 }
144 
AVCSAD_MB_HalfPel_Cxh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)145 int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
146 {
147     (void)(extra_info);
148 
149     int i, j;
150     int sad = 0;
151     uint8 *kk, *p1;
152     int temp;
153     int rx = dmin_rx & 0xFFFF;
154 
155     NUM_SAD_HP_MB_CALL();
156 
157     p1 = ref;
158     kk  = blk;
159 
160     for (i = 0; i < 16; i++)
161     {
162         for (j = 0; j < 16; j++)
163         {
164 
165             temp = ((p1[j] + p1[j+1] + 1) >> 1) - *kk++;
166             sad += AVC_ABS(temp);
167         }
168 
169         NUM_SAD_HP_MB();
170 
171         if (sad > (int)((uint32)dmin_rx >> 16))
172             return sad;
173         p1 += rx;
174     }
175     return sad;
176 }
177 
178 #ifdef HTFM  /* HTFM with uniform subsampling implementation,  2/28/01 */
179 
180 //Checheck here
AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)181 int AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
182 {
183     int i, j;
184     int sad = 0;
185     uint8 *p1, *p2;
186     int rx = dmin_rx & 0xFFFF;
187     int refwx4 = rx << 2;
188     int saddata[16];      /* used when collecting flag (global) is on */
189     int difmad, tmp, tmp2;
190     int madstar;
191     HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
192     int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
193     UInt *countbreak = &(htfm_stat->countbreak);
194     int *offsetRef = htfm_stat->offsetRef;
195     uint32 cur_word;
196 
197     madstar = (uint32)dmin_rx >> 20;
198 
199     NUM_SAD_HP_MB_CALL();
200 
201     blk -= 4;
202 
203     for (i = 0; i < 16; i++) /* 16 stages */
204     {
205         p1 = ref + offsetRef[i];
206         p2 = p1 + rx;
207 
208         j = 4;/* 4 lines */
209         do
210         {
211             cur_word = *((uint32*)(blk += 4));
212             tmp = p1[12] + p2[12];
213             tmp2 = p1[13] + p2[13];
214             tmp += tmp2;
215             tmp2 = (cur_word >> 24) & 0xFF;
216             tmp += 2;
217             sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
218             tmp = p1[8] + p2[8];
219             tmp2 = p1[9] + p2[9];
220             tmp += tmp2;
221             tmp2 = (cur_word >> 16) & 0xFF;
222             tmp += 2;
223             sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
224             tmp = p1[4] + p2[4];
225             tmp2 = p1[5] + p2[5];
226             tmp += tmp2;
227             tmp2 = (cur_word >> 8) & 0xFF;
228             tmp += 2;
229             sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
230             tmp2 = p1[1] + p2[1];
231             tmp = p1[0] + p2[0];
232             p1 += refwx4;
233             p2 += refwx4;
234             tmp += tmp2;
235             tmp2 = (cur_word & 0xFF);
236             tmp += 2;
237             sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
238         }
239         while (--j);
240 
241         NUM_SAD_HP_MB();
242 
243         saddata[i] = sad;
244 
245         if (i > 0)
246         {
247             if (sad > ((uint32)dmin_rx >> 16))
248             {
249                 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
250                 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
251                 (*countbreak)++;
252                 return sad;
253             }
254         }
255     }
256     difmad = saddata[0] - ((saddata[1] + 1) >> 1);
257     (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
258     (*countbreak)++;
259 
260     return sad;
261 }
262 
AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)263 int AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
264 {
265     int i, j;
266     int sad = 0;
267     uint8 *p1, *p2;
268     int rx = dmin_rx & 0xFFFF;
269     int refwx4 = rx << 2;
270     int saddata[16];      /* used when collecting flag (global) is on */
271     int difmad, tmp, tmp2;
272     int madstar;
273     HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
274     int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
275     UInt *countbreak = &(htfm_stat->countbreak);
276     int *offsetRef = htfm_stat->offsetRef;
277     uint32 cur_word;
278 
279     madstar = (uint32)dmin_rx >> 20;
280 
281     NUM_SAD_HP_MB_CALL();
282 
283     blk -= 4;
284 
285     for (i = 0; i < 16; i++) /* 16 stages */
286     {
287         p1 = ref + offsetRef[i];
288         p2 = p1 + rx;
289         j = 4;
290         do
291         {
292             cur_word = *((uint32*)(blk += 4));
293             tmp = p1[12];
294             tmp2 = p2[12];
295             tmp++;
296             tmp2 += tmp;
297             tmp = (cur_word >> 24) & 0xFF;
298             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
299             tmp = p1[8];
300             tmp2 = p2[8];
301             tmp++;
302             tmp2 += tmp;
303             tmp = (cur_word >> 16) & 0xFF;
304             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
305             tmp = p1[4];
306             tmp2 = p2[4];
307             tmp++;
308             tmp2 += tmp;
309             tmp = (cur_word >> 8) & 0xFF;
310             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
311             tmp = p1[0];
312             p1 += refwx4;
313             tmp2 = p2[0];
314             p2 += refwx4;
315             tmp++;
316             tmp2 += tmp;
317             tmp = (cur_word & 0xFF);
318             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
319         }
320         while (--j);
321 
322         NUM_SAD_HP_MB();
323 
324         saddata[i] = sad;
325 
326         if (i > 0)
327         {
328             if (sad > ((uint32)dmin_rx >> 16))
329             {
330                 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
331                 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
332                 (*countbreak)++;
333                 return sad;
334             }
335         }
336     }
337     difmad = saddata[0] - ((saddata[1] + 1) >> 1);
338     (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
339     (*countbreak)++;
340 
341     return sad;
342 }
343 
AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)344 int AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
345 {
346     int i, j;
347     int sad = 0;
348     uint8 *p1;
349     int rx = dmin_rx & 0xFFFF;
350     int refwx4 = rx << 2;
351     int saddata[16];      /* used when collecting flag (global) is on */
352     int difmad, tmp, tmp2;
353     int madstar;
354     HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
355     int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
356     UInt *countbreak = &(htfm_stat->countbreak);
357     int *offsetRef = htfm_stat->offsetRef;
358     uint32 cur_word;
359 
360     madstar = (uint32)dmin_rx >> 20;
361 
362     NUM_SAD_HP_MB_CALL();
363 
364     blk -= 4;
365 
366     for (i = 0; i < 16; i++) /* 16 stages */
367     {
368         p1 = ref + offsetRef[i];
369 
370         j = 4; /* 4 lines */
371         do
372         {
373             cur_word = *((uint32*)(blk += 4));
374             tmp = p1[12];
375             tmp2 = p1[13];
376             tmp++;
377             tmp2 += tmp;
378             tmp = (cur_word >> 24) & 0xFF;
379             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
380             tmp = p1[8];
381             tmp2 = p1[9];
382             tmp++;
383             tmp2 += tmp;
384             tmp = (cur_word >> 16) & 0xFF;
385             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
386             tmp = p1[4];
387             tmp2 = p1[5];
388             tmp++;
389             tmp2 += tmp;
390             tmp = (cur_word >> 8) & 0xFF;
391             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
392             tmp = p1[0];
393             tmp2 = p1[1];
394             p1 += refwx4;
395             tmp++;
396             tmp2 += tmp;
397             tmp = (cur_word & 0xFF);
398             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
399         }
400         while (--j);
401 
402         NUM_SAD_HP_MB();
403 
404         saddata[i] = sad;
405 
406         if (i > 0)
407         {
408             if (sad > ((uint32)dmin_rx >> 16))
409             {
410                 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
411                 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
412                 (*countbreak)++;
413                 return sad;
414             }
415         }
416     }
417     difmad = saddata[0] - ((saddata[1] + 1) >> 1);
418     (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
419     (*countbreak)++;
420 
421     return sad;
422 }
423 
AVCSAD_MB_HP_HTFMxhyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)424 int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
425 {
426     int i, j;
427     int sad = 0, tmp, tmp2;
428     uint8 *p1, *p2;
429     int rx = dmin_rx & 0xFFFF;
430     int refwx4 = rx << 2;
431     int sadstar = 0, madstar;
432     int *nrmlz_th = (int*) extra_info;
433     int *offsetRef = nrmlz_th + 32;
434     uint32 cur_word;
435 
436     madstar = (uint32)dmin_rx >> 20;
437 
438     NUM_SAD_HP_MB_CALL();
439 
440     blk -= 4;
441 
442     for (i = 0; i < 16; i++) /* 16 stages */
443     {
444         p1 = ref + offsetRef[i];
445         p2 = p1 + rx;
446 
447         j = 4; /* 4 lines */
448         do
449         {
450             cur_word = *((uint32*)(blk += 4));
451             tmp = p1[12] + p2[12];
452             tmp2 = p1[13] + p2[13];
453             tmp += tmp2;
454             tmp2 = (cur_word >> 24) & 0xFF;
455             tmp += 2;
456             sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
457             tmp = p1[8] + p2[8];
458             tmp2 = p1[9] + p2[9];
459             tmp += tmp2;
460             tmp2 = (cur_word >> 16) & 0xFF;
461             tmp += 2;
462             sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
463             tmp = p1[4] + p2[4];
464             tmp2 = p1[5] + p2[5];
465             tmp += tmp2;
466             tmp2 = (cur_word >> 8) & 0xFF;
467             tmp += 2;
468             sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
469             tmp2 = p1[1] + p2[1];
470             tmp = p1[0] + p2[0];
471             p1 += refwx4;
472             p2 += refwx4;
473             tmp += tmp2;
474             tmp2 = (cur_word & 0xFF);
475             tmp += 2;
476             sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
477         }
478         while (--j);
479 
480         NUM_SAD_HP_MB();
481 
482         sadstar += madstar;
483         if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
484         {
485             return 65536;
486         }
487     }
488 
489     return sad;
490 }
491 
AVCSAD_MB_HP_HTFMyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)492 int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
493 {
494     int i, j;
495     int sad = 0, tmp, tmp2;
496     uint8 *p1, *p2;
497     int rx = dmin_rx & 0xFFFF;
498     int refwx4 = rx << 2;
499     int sadstar = 0, madstar;
500     int *nrmlz_th = (int*) extra_info;
501     int *offsetRef = nrmlz_th + 32;
502     uint32 cur_word;
503 
504     madstar = (uint32)dmin_rx >> 20;
505 
506     NUM_SAD_HP_MB_CALL();
507 
508     blk -= 4;
509 
510     for (i = 0; i < 16; i++) /* 16 stages */
511     {
512         p1 = ref + offsetRef[i];
513         p2 = p1 + rx;
514         j = 4;
515         do
516         {
517             cur_word = *((uint32*)(blk += 4));
518             tmp = p1[12];
519             tmp2 = p2[12];
520             tmp++;
521             tmp2 += tmp;
522             tmp = (cur_word >> 24) & 0xFF;
523             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
524             tmp = p1[8];
525             tmp2 = p2[8];
526             tmp++;
527             tmp2 += tmp;
528             tmp = (cur_word >> 16) & 0xFF;
529             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
530             tmp = p1[4];
531             tmp2 = p2[4];
532             tmp++;
533             tmp2 += tmp;
534             tmp = (cur_word >> 8) & 0xFF;
535             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
536             tmp = p1[0];
537             p1 += refwx4;
538             tmp2 = p2[0];
539             p2 += refwx4;
540             tmp++;
541             tmp2 += tmp;
542             tmp = (cur_word & 0xFF);
543             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
544         }
545         while (--j);
546 
547         NUM_SAD_HP_MB();
548         sadstar += madstar;
549         if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
550         {
551             return 65536;
552         }
553     }
554 
555     return sad;
556 }
557 
AVCSAD_MB_HP_HTFMxh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)558 int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
559 {
560     int i, j;
561     int sad = 0, tmp, tmp2;
562     uint8 *p1;
563     int rx = dmin_rx & 0xFFFF;
564     int refwx4 = rx << 2;
565     int sadstar = 0, madstar;
566     int *nrmlz_th = (int*) extra_info;
567     int *offsetRef = nrmlz_th + 32;
568     uint32 cur_word;
569 
570     madstar = (uint32)dmin_rx >> 20;
571 
572     NUM_SAD_HP_MB_CALL();
573 
574     blk -= 4;
575 
576     for (i = 0; i < 16; i++) /* 16 stages */
577     {
578         p1 = ref + offsetRef[i];
579 
580         j = 4;/* 4 lines */
581         do
582         {
583             cur_word = *((uint32*)(blk += 4));
584             tmp = p1[12];
585             tmp2 = p1[13];
586             tmp++;
587             tmp2 += tmp;
588             tmp = (cur_word >> 24) & 0xFF;
589             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
590             tmp = p1[8];
591             tmp2 = p1[9];
592             tmp++;
593             tmp2 += tmp;
594             tmp = (cur_word >> 16) & 0xFF;
595             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
596             tmp = p1[4];
597             tmp2 = p1[5];
598             tmp++;
599             tmp2 += tmp;
600             tmp = (cur_word >> 8) & 0xFF;
601             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
602             tmp = p1[0];
603             tmp2 = p1[1];
604             p1 += refwx4;
605             tmp++;
606             tmp2 += tmp;
607             tmp = (cur_word & 0xFF);
608             sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
609         }
610         while (--j);
611 
612         NUM_SAD_HP_MB();
613 
614         sadstar += madstar;
615         if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
616         {
617             return 65536;
618         }
619     }
620 
621     return sad;
622 }
623 
624 #endif /* HTFM */
625 
626 
627 
628 
629 
630