1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /* contains
19 int AVCHalfPel1_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
20 int AVCHalfPel2_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width)
21 int AVCHalfPel1_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
22 int AVCHalfPel2_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width)
23
24 int AVCSAD_MB_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
25 int AVCSAD_MB_HP_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
26 int AVCSAD_MB_HP_HTFM(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
27 int AVCSAD_Blk_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
28 */
29
30 #include "avcenc_lib.h"
31 #include "sad_halfpel_inline.h"
32
33 #ifdef _SAD_STAT
34 uint32 num_sad_HP_MB = 0;
35 uint32 num_sad_HP_Blk = 0;
36 uint32 num_sad_HP_MB_call = 0;
37 uint32 num_sad_HP_Blk_call = 0;
38 #define NUM_SAD_HP_MB_CALL() num_sad_HP_MB_call++
39 #define NUM_SAD_HP_MB() num_sad_HP_MB++
40 #define NUM_SAD_HP_BLK_CALL() num_sad_HP_Blk_call++
41 #define NUM_SAD_HP_BLK() num_sad_HP_Blk++
42 #else
43 #define NUM_SAD_HP_MB_CALL()
44 #define NUM_SAD_HP_MB()
45 #define NUM_SAD_HP_BLK_CALL()
46 #define NUM_SAD_HP_BLK()
47 #endif
48
49
50
51 /*===============================================================
52 Function: SAD_MB_HalfPel
53 Date: 09/17/2000
54 Purpose: Compute the SAD on the half-pel resolution
55 Input/Output: hmem is assumed to be a pointer to the starting
56 point of the search in the 33x33 matrix search region
57 Changes:
58 11/7/00: implemented MMX
59 ===============================================================*/
60 /*==================================================================
61 Function: AVCSAD_MB_HalfPel_C
62 Date: 04/30/2001
63 Purpose: Compute SAD 16x16 between blk and ref in halfpel
64 resolution,
65 Changes:
66 ==================================================================*/
67 /* One component is half-pel */
AVCSAD_MB_HalfPel_Cxhyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)68 int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
69 {
70 (void)(extra_info);
71
72 int i, j;
73 int sad = 0;
74 uint8 *kk, *p1, *p2, *p3, *p4;
75 // int sumref=0;
76 int temp;
77 int rx = dmin_rx & 0xFFFF;
78
79 NUM_SAD_HP_MB_CALL();
80
81 p1 = ref;
82 p2 = ref + 1;
83 p3 = ref + rx;
84 p4 = ref + rx + 1;
85 kk = blk;
86
87 for (i = 0; i < 16; i++)
88 {
89 for (j = 0; j < 16; j++)
90 {
91
92 temp = ((p1[j] + p2[j] + p3[j] + p4[j] + 2) >> 2) - *kk++;
93 sad += AVC_ABS(temp);
94 }
95
96 NUM_SAD_HP_MB();
97
98 if (sad > (int)((uint32)dmin_rx >> 16))
99 return sad;
100
101 p1 += rx;
102 p3 += rx;
103 p2 += rx;
104 p4 += rx;
105 }
106 return sad;
107 }
108
AVCSAD_MB_HalfPel_Cyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)109 int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
110 {
111 (void)(extra_info);
112
113 int i, j;
114 int sad = 0;
115 uint8 *kk, *p1, *p2;
116 // int sumref=0;
117 int temp;
118 int rx = dmin_rx & 0xFFFF;
119
120 NUM_SAD_HP_MB_CALL();
121
122 p1 = ref;
123 p2 = ref + rx; /* either left/right or top/bottom pixel */
124 kk = blk;
125
126 for (i = 0; i < 16; i++)
127 {
128 for (j = 0; j < 16; j++)
129 {
130
131 temp = ((p1[j] + p2[j] + 1) >> 1) - *kk++;
132 sad += AVC_ABS(temp);
133 }
134
135 NUM_SAD_HP_MB();
136
137 if (sad > (int)((uint32)dmin_rx >> 16))
138 return sad;
139 p1 += rx;
140 p2 += rx;
141 }
142 return sad;
143 }
144
AVCSAD_MB_HalfPel_Cxh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)145 int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
146 {
147 (void)(extra_info);
148
149 int i, j;
150 int sad = 0;
151 uint8 *kk, *p1;
152 int temp;
153 int rx = dmin_rx & 0xFFFF;
154
155 NUM_SAD_HP_MB_CALL();
156
157 p1 = ref;
158 kk = blk;
159
160 for (i = 0; i < 16; i++)
161 {
162 for (j = 0; j < 16; j++)
163 {
164
165 temp = ((p1[j] + p1[j+1] + 1) >> 1) - *kk++;
166 sad += AVC_ABS(temp);
167 }
168
169 NUM_SAD_HP_MB();
170
171 if (sad > (int)((uint32)dmin_rx >> 16))
172 return sad;
173 p1 += rx;
174 }
175 return sad;
176 }
177
178 #ifdef HTFM /* HTFM with uniform subsampling implementation, 2/28/01 */
179
180 //Checheck here
AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)181 int AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
182 {
183 int i, j;
184 int sad = 0;
185 uint8 *p1, *p2;
186 int rx = dmin_rx & 0xFFFF;
187 int refwx4 = rx << 2;
188 int saddata[16]; /* used when collecting flag (global) is on */
189 int difmad, tmp, tmp2;
190 int madstar;
191 HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
192 int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
193 UInt *countbreak = &(htfm_stat->countbreak);
194 int *offsetRef = htfm_stat->offsetRef;
195 uint32 cur_word;
196
197 madstar = (uint32)dmin_rx >> 20;
198
199 NUM_SAD_HP_MB_CALL();
200
201 blk -= 4;
202
203 for (i = 0; i < 16; i++) /* 16 stages */
204 {
205 p1 = ref + offsetRef[i];
206 p2 = p1 + rx;
207
208 j = 4;/* 4 lines */
209 do
210 {
211 cur_word = *((uint32*)(blk += 4));
212 tmp = p1[12] + p2[12];
213 tmp2 = p1[13] + p2[13];
214 tmp += tmp2;
215 tmp2 = (cur_word >> 24) & 0xFF;
216 tmp += 2;
217 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
218 tmp = p1[8] + p2[8];
219 tmp2 = p1[9] + p2[9];
220 tmp += tmp2;
221 tmp2 = (cur_word >> 16) & 0xFF;
222 tmp += 2;
223 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
224 tmp = p1[4] + p2[4];
225 tmp2 = p1[5] + p2[5];
226 tmp += tmp2;
227 tmp2 = (cur_word >> 8) & 0xFF;
228 tmp += 2;
229 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
230 tmp2 = p1[1] + p2[1];
231 tmp = p1[0] + p2[0];
232 p1 += refwx4;
233 p2 += refwx4;
234 tmp += tmp2;
235 tmp2 = (cur_word & 0xFF);
236 tmp += 2;
237 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
238 }
239 while (--j);
240
241 NUM_SAD_HP_MB();
242
243 saddata[i] = sad;
244
245 if (i > 0)
246 {
247 if (sad > ((uint32)dmin_rx >> 16))
248 {
249 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
250 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
251 (*countbreak)++;
252 return sad;
253 }
254 }
255 }
256 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
257 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
258 (*countbreak)++;
259
260 return sad;
261 }
262
AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)263 int AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
264 {
265 int i, j;
266 int sad = 0;
267 uint8 *p1, *p2;
268 int rx = dmin_rx & 0xFFFF;
269 int refwx4 = rx << 2;
270 int saddata[16]; /* used when collecting flag (global) is on */
271 int difmad, tmp, tmp2;
272 int madstar;
273 HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
274 int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
275 UInt *countbreak = &(htfm_stat->countbreak);
276 int *offsetRef = htfm_stat->offsetRef;
277 uint32 cur_word;
278
279 madstar = (uint32)dmin_rx >> 20;
280
281 NUM_SAD_HP_MB_CALL();
282
283 blk -= 4;
284
285 for (i = 0; i < 16; i++) /* 16 stages */
286 {
287 p1 = ref + offsetRef[i];
288 p2 = p1 + rx;
289 j = 4;
290 do
291 {
292 cur_word = *((uint32*)(blk += 4));
293 tmp = p1[12];
294 tmp2 = p2[12];
295 tmp++;
296 tmp2 += tmp;
297 tmp = (cur_word >> 24) & 0xFF;
298 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
299 tmp = p1[8];
300 tmp2 = p2[8];
301 tmp++;
302 tmp2 += tmp;
303 tmp = (cur_word >> 16) & 0xFF;
304 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
305 tmp = p1[4];
306 tmp2 = p2[4];
307 tmp++;
308 tmp2 += tmp;
309 tmp = (cur_word >> 8) & 0xFF;
310 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
311 tmp = p1[0];
312 p1 += refwx4;
313 tmp2 = p2[0];
314 p2 += refwx4;
315 tmp++;
316 tmp2 += tmp;
317 tmp = (cur_word & 0xFF);
318 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
319 }
320 while (--j);
321
322 NUM_SAD_HP_MB();
323
324 saddata[i] = sad;
325
326 if (i > 0)
327 {
328 if (sad > ((uint32)dmin_rx >> 16))
329 {
330 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
331 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
332 (*countbreak)++;
333 return sad;
334 }
335 }
336 }
337 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
338 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
339 (*countbreak)++;
340
341 return sad;
342 }
343
AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)344 int AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
345 {
346 int i, j;
347 int sad = 0;
348 uint8 *p1;
349 int rx = dmin_rx & 0xFFFF;
350 int refwx4 = rx << 2;
351 int saddata[16]; /* used when collecting flag (global) is on */
352 int difmad, tmp, tmp2;
353 int madstar;
354 HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
355 int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
356 UInt *countbreak = &(htfm_stat->countbreak);
357 int *offsetRef = htfm_stat->offsetRef;
358 uint32 cur_word;
359
360 madstar = (uint32)dmin_rx >> 20;
361
362 NUM_SAD_HP_MB_CALL();
363
364 blk -= 4;
365
366 for (i = 0; i < 16; i++) /* 16 stages */
367 {
368 p1 = ref + offsetRef[i];
369
370 j = 4; /* 4 lines */
371 do
372 {
373 cur_word = *((uint32*)(blk += 4));
374 tmp = p1[12];
375 tmp2 = p1[13];
376 tmp++;
377 tmp2 += tmp;
378 tmp = (cur_word >> 24) & 0xFF;
379 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
380 tmp = p1[8];
381 tmp2 = p1[9];
382 tmp++;
383 tmp2 += tmp;
384 tmp = (cur_word >> 16) & 0xFF;
385 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
386 tmp = p1[4];
387 tmp2 = p1[5];
388 tmp++;
389 tmp2 += tmp;
390 tmp = (cur_word >> 8) & 0xFF;
391 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
392 tmp = p1[0];
393 tmp2 = p1[1];
394 p1 += refwx4;
395 tmp++;
396 tmp2 += tmp;
397 tmp = (cur_word & 0xFF);
398 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
399 }
400 while (--j);
401
402 NUM_SAD_HP_MB();
403
404 saddata[i] = sad;
405
406 if (i > 0)
407 {
408 if (sad > ((uint32)dmin_rx >> 16))
409 {
410 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
411 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
412 (*countbreak)++;
413 return sad;
414 }
415 }
416 }
417 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
418 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
419 (*countbreak)++;
420
421 return sad;
422 }
423
AVCSAD_MB_HP_HTFMxhyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)424 int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
425 {
426 int i, j;
427 int sad = 0, tmp, tmp2;
428 uint8 *p1, *p2;
429 int rx = dmin_rx & 0xFFFF;
430 int refwx4 = rx << 2;
431 int sadstar = 0, madstar;
432 int *nrmlz_th = (int*) extra_info;
433 int *offsetRef = nrmlz_th + 32;
434 uint32 cur_word;
435
436 madstar = (uint32)dmin_rx >> 20;
437
438 NUM_SAD_HP_MB_CALL();
439
440 blk -= 4;
441
442 for (i = 0; i < 16; i++) /* 16 stages */
443 {
444 p1 = ref + offsetRef[i];
445 p2 = p1 + rx;
446
447 j = 4; /* 4 lines */
448 do
449 {
450 cur_word = *((uint32*)(blk += 4));
451 tmp = p1[12] + p2[12];
452 tmp2 = p1[13] + p2[13];
453 tmp += tmp2;
454 tmp2 = (cur_word >> 24) & 0xFF;
455 tmp += 2;
456 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
457 tmp = p1[8] + p2[8];
458 tmp2 = p1[9] + p2[9];
459 tmp += tmp2;
460 tmp2 = (cur_word >> 16) & 0xFF;
461 tmp += 2;
462 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
463 tmp = p1[4] + p2[4];
464 tmp2 = p1[5] + p2[5];
465 tmp += tmp2;
466 tmp2 = (cur_word >> 8) & 0xFF;
467 tmp += 2;
468 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
469 tmp2 = p1[1] + p2[1];
470 tmp = p1[0] + p2[0];
471 p1 += refwx4;
472 p2 += refwx4;
473 tmp += tmp2;
474 tmp2 = (cur_word & 0xFF);
475 tmp += 2;
476 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
477 }
478 while (--j);
479
480 NUM_SAD_HP_MB();
481
482 sadstar += madstar;
483 if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
484 {
485 return 65536;
486 }
487 }
488
489 return sad;
490 }
491
AVCSAD_MB_HP_HTFMyh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)492 int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
493 {
494 int i, j;
495 int sad = 0, tmp, tmp2;
496 uint8 *p1, *p2;
497 int rx = dmin_rx & 0xFFFF;
498 int refwx4 = rx << 2;
499 int sadstar = 0, madstar;
500 int *nrmlz_th = (int*) extra_info;
501 int *offsetRef = nrmlz_th + 32;
502 uint32 cur_word;
503
504 madstar = (uint32)dmin_rx >> 20;
505
506 NUM_SAD_HP_MB_CALL();
507
508 blk -= 4;
509
510 for (i = 0; i < 16; i++) /* 16 stages */
511 {
512 p1 = ref + offsetRef[i];
513 p2 = p1 + rx;
514 j = 4;
515 do
516 {
517 cur_word = *((uint32*)(blk += 4));
518 tmp = p1[12];
519 tmp2 = p2[12];
520 tmp++;
521 tmp2 += tmp;
522 tmp = (cur_word >> 24) & 0xFF;
523 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
524 tmp = p1[8];
525 tmp2 = p2[8];
526 tmp++;
527 tmp2 += tmp;
528 tmp = (cur_word >> 16) & 0xFF;
529 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
530 tmp = p1[4];
531 tmp2 = p2[4];
532 tmp++;
533 tmp2 += tmp;
534 tmp = (cur_word >> 8) & 0xFF;
535 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
536 tmp = p1[0];
537 p1 += refwx4;
538 tmp2 = p2[0];
539 p2 += refwx4;
540 tmp++;
541 tmp2 += tmp;
542 tmp = (cur_word & 0xFF);
543 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
544 }
545 while (--j);
546
547 NUM_SAD_HP_MB();
548 sadstar += madstar;
549 if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
550 {
551 return 65536;
552 }
553 }
554
555 return sad;
556 }
557
AVCSAD_MB_HP_HTFMxh(uint8 * ref,uint8 * blk,int dmin_rx,void * extra_info)558 int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
559 {
560 int i, j;
561 int sad = 0, tmp, tmp2;
562 uint8 *p1;
563 int rx = dmin_rx & 0xFFFF;
564 int refwx4 = rx << 2;
565 int sadstar = 0, madstar;
566 int *nrmlz_th = (int*) extra_info;
567 int *offsetRef = nrmlz_th + 32;
568 uint32 cur_word;
569
570 madstar = (uint32)dmin_rx >> 20;
571
572 NUM_SAD_HP_MB_CALL();
573
574 blk -= 4;
575
576 for (i = 0; i < 16; i++) /* 16 stages */
577 {
578 p1 = ref + offsetRef[i];
579
580 j = 4;/* 4 lines */
581 do
582 {
583 cur_word = *((uint32*)(blk += 4));
584 tmp = p1[12];
585 tmp2 = p1[13];
586 tmp++;
587 tmp2 += tmp;
588 tmp = (cur_word >> 24) & 0xFF;
589 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
590 tmp = p1[8];
591 tmp2 = p1[9];
592 tmp++;
593 tmp2 += tmp;
594 tmp = (cur_word >> 16) & 0xFF;
595 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
596 tmp = p1[4];
597 tmp2 = p1[5];
598 tmp++;
599 tmp2 += tmp;
600 tmp = (cur_word >> 8) & 0xFF;
601 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
602 tmp = p1[0];
603 tmp2 = p1[1];
604 p1 += refwx4;
605 tmp++;
606 tmp2 += tmp;
607 tmp = (cur_word & 0xFF);
608 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
609 }
610 while (--j);
611
612 NUM_SAD_HP_MB();
613
614 sadstar += madstar;
615 if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
616 {
617 return 65536;
618 }
619 }
620
621 return sad;
622 }
623
624 #endif /* HTFM */
625
626
627
628
629
630