1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "avcenc_lib.h"
19 #include "sad_inline.h"
20 
21 #define Cached_lx 176
22 
23 #ifdef _SAD_STAT
24 uint32 num_sad_MB = 0;
25 uint32 num_sad_Blk = 0;
26 uint32 num_sad_MB_call = 0;
27 uint32 num_sad_Blk_call = 0;
28 
29 #define NUM_SAD_MB_CALL()       num_sad_MB_call++
30 #define NUM_SAD_MB()            num_sad_MB++
31 #define NUM_SAD_BLK_CALL()      num_sad_Blk_call++
32 #define NUM_SAD_BLK()           num_sad_Blk++
33 
34 #else
35 
36 #define NUM_SAD_MB_CALL()
37 #define NUM_SAD_MB()
38 #define NUM_SAD_BLK_CALL()
39 #define NUM_SAD_BLK()
40 
41 #endif
42 
43 
44 /* consist of
45 int AVCSAD_Macroblock_C(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
46 int AVCSAD_MB_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
47 int AVCSAD_MB_HTFM(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
48 */
49 
50 
51 /*==================================================================
52     Function:   SAD_Macroblock
53     Date:       09/07/2000
54     Purpose:    Compute SAD 16x16 between blk and ref.
55     To do:      Uniform subsampling will be inserted later!
56                 Hypothesis Testing Fast Matching to be used later!
57     Changes:
58     11/7/00:    implemented MMX
59     1/24/01:    implemented SSE
60 ==================================================================*/
61 /********** C ************/
AVCSAD_Macroblock_C(uint8 * ref,uint8 * blk,int dmin_lx,void * extra_info)62 int AVCSAD_Macroblock_C(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
63 {
64     (void)(extra_info);
65 
66     int32 x10;
67     int dmin = (uint32)dmin_lx >> 16;
68     int lx = dmin_lx & 0xFFFF;
69 
70     NUM_SAD_MB_CALL();
71 
72     x10 = simd_sad_mb(ref, blk, dmin, lx);
73 
74     return x10;
75 }
76 
77 #ifdef HTFM   /* HTFM with uniform subsampling implementation 2/28/01 */
78 /*===============================================================
79     Function:   AVCAVCSAD_MB_HTFM_Collect and AVCSAD_MB_HTFM
80     Date:       3/2/1
81     Purpose:    Compute the SAD on a 16x16 block using
82                 uniform subsampling and hypothesis testing fast matching
83                 for early dropout. SAD_MB_HP_HTFM_Collect is to collect
84                 the statistics to compute the thresholds to be used in
85                 SAD_MB_HP_HTFM.
86     Input/Output:
87     Changes:
88   ===============================================================*/
89 
AVCAVCSAD_MB_HTFM_Collect(uint8 * ref,uint8 * blk,int dmin_lx,void * extra_info)90 int AVCAVCSAD_MB_HTFM_Collect(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
91 {
92     int i;
93     int sad = 0;
94     uint8 *p1;
95     int lx4 = (dmin_lx << 2) & 0x3FFFC;
96     uint32 cur_word;
97     int saddata[16], tmp, tmp2;    /* used when collecting flag (global) is on */
98     int difmad;
99     int madstar;
100     HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
101     int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
102     uint *countbreak = &(htfm_stat->countbreak);
103     int *offsetRef = htfm_stat->offsetRef;
104 
105     madstar = (uint32)dmin_lx >> 20;
106 
107     NUM_SAD_MB_CALL();
108 
109     blk -= 4;
110     for (i = 0; i < 16; i++)
111     {
112         p1 = ref + offsetRef[i];
113         cur_word = *((uint32*)(blk += 4));
114         tmp = p1[12];
115         tmp2 = (cur_word >> 24) & 0xFF;
116         sad = SUB_SAD(sad, tmp, tmp2);
117         tmp = p1[8];
118         tmp2 = (cur_word >> 16) & 0xFF;
119         sad = SUB_SAD(sad, tmp, tmp2);
120         tmp = p1[4];
121         tmp2 = (cur_word >> 8) & 0xFF;
122         sad = SUB_SAD(sad, tmp, tmp2);
123         tmp = p1[0];
124         p1 += lx4;
125         tmp2 = (cur_word & 0xFF);
126         sad = SUB_SAD(sad, tmp, tmp2);
127 
128         cur_word = *((uint32*)(blk += 4));
129         tmp = p1[12];
130         tmp2 = (cur_word >> 24) & 0xFF;
131         sad = SUB_SAD(sad, tmp, tmp2);
132         tmp = p1[8];
133         tmp2 = (cur_word >> 16) & 0xFF;
134         sad = SUB_SAD(sad, tmp, tmp2);
135         tmp = p1[4];
136         tmp2 = (cur_word >> 8) & 0xFF;
137         sad = SUB_SAD(sad, tmp, tmp2);
138         tmp = p1[0];
139         p1 += lx4;
140         tmp2 = (cur_word & 0xFF);
141         sad = SUB_SAD(sad, tmp, tmp2);
142 
143         cur_word = *((uint32*)(blk += 4));
144         tmp = p1[12];
145         tmp2 = (cur_word >> 24) & 0xFF;
146         sad = SUB_SAD(sad, tmp, tmp2);
147         tmp = p1[8];
148         tmp2 = (cur_word >> 16) & 0xFF;
149         sad = SUB_SAD(sad, tmp, tmp2);
150         tmp = p1[4];
151         tmp2 = (cur_word >> 8) & 0xFF;
152         sad = SUB_SAD(sad, tmp, tmp2);
153         tmp = p1[0];
154         p1 += lx4;
155         tmp2 = (cur_word & 0xFF);
156         sad = SUB_SAD(sad, tmp, tmp2);
157 
158         cur_word = *((uint32*)(blk += 4));
159         tmp = p1[12];
160         tmp2 = (cur_word >> 24) & 0xFF;
161         sad = SUB_SAD(sad, tmp, tmp2);
162         tmp = p1[8];
163         tmp2 = (cur_word >> 16) & 0xFF;
164         sad = SUB_SAD(sad, tmp, tmp2);
165         tmp = p1[4];
166         tmp2 = (cur_word >> 8) & 0xFF;
167         sad = SUB_SAD(sad, tmp, tmp2);
168         tmp = p1[0];
169         p1 += lx4;
170         tmp2 = (cur_word & 0xFF);
171         sad = SUB_SAD(sad, tmp, tmp2);
172 
173         NUM_SAD_MB();
174 
175         saddata[i] = sad;
176 
177         if (i > 0)
178         {
179             if ((uint32)sad > ((uint32)dmin_lx >> 16))
180             {
181                 difmad = saddata[0] - ((saddata[1] + 1) >> 1);
182                 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
183                 (*countbreak)++;
184                 return sad;
185             }
186         }
187     }
188 
189     difmad = saddata[0] - ((saddata[1] + 1) >> 1);
190     (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
191     (*countbreak)++;
192     return sad;
193 }
194 
AVCSAD_MB_HTFM(uint8 * ref,uint8 * blk,int dmin_lx,void * extra_info)195 int AVCSAD_MB_HTFM(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
196 {
197     int sad = 0;
198     uint8 *p1;
199 
200     int i;
201     int tmp, tmp2;
202     int lx4 = (dmin_lx << 2) & 0x3FFFC;
203     int sadstar = 0, madstar;
204     int *nrmlz_th = (int*) extra_info;
205     int *offsetRef = (int*) extra_info + 32;
206     uint32 cur_word;
207 
208     madstar = (uint32)dmin_lx >> 20;
209 
210     NUM_SAD_MB_CALL();
211 
212     blk -= 4;
213     for (i = 0; i < 16; i++)
214     {
215         p1 = ref + offsetRef[i];
216         cur_word = *((uint32*)(blk += 4));
217         tmp = p1[12];
218         tmp2 = (cur_word >> 24) & 0xFF;
219         sad = SUB_SAD(sad, tmp, tmp2);
220         tmp = p1[8];
221         tmp2 = (cur_word >> 16) & 0xFF;
222         sad = SUB_SAD(sad, tmp, tmp2);
223         tmp = p1[4];
224         tmp2 = (cur_word >> 8) & 0xFF;
225         sad = SUB_SAD(sad, tmp, tmp2);
226         tmp = p1[0];
227         p1 += lx4;
228         tmp2 = (cur_word & 0xFF);
229         sad = SUB_SAD(sad, tmp, tmp2);
230 
231         cur_word = *((uint32*)(blk += 4));
232         tmp = p1[12];
233         tmp2 = (cur_word >> 24) & 0xFF;
234         sad = SUB_SAD(sad, tmp, tmp2);
235         tmp = p1[8];
236         tmp2 = (cur_word >> 16) & 0xFF;
237         sad = SUB_SAD(sad, tmp, tmp2);
238         tmp = p1[4];
239         tmp2 = (cur_word >> 8) & 0xFF;
240         sad = SUB_SAD(sad, tmp, tmp2);
241         tmp = p1[0];
242         p1 += lx4;
243         tmp2 = (cur_word & 0xFF);
244         sad = SUB_SAD(sad, tmp, tmp2);
245 
246         cur_word = *((uint32*)(blk += 4));
247         tmp = p1[12];
248         tmp2 = (cur_word >> 24) & 0xFF;
249         sad = SUB_SAD(sad, tmp, tmp2);
250         tmp = p1[8];
251         tmp2 = (cur_word >> 16) & 0xFF;
252         sad = SUB_SAD(sad, tmp, tmp2);
253         tmp = p1[4];
254         tmp2 = (cur_word >> 8) & 0xFF;
255         sad = SUB_SAD(sad, tmp, tmp2);
256         tmp = p1[0];
257         p1 += lx4;
258         tmp2 = (cur_word & 0xFF);
259         sad = SUB_SAD(sad, tmp, tmp2);
260 
261         cur_word = *((uint32*)(blk += 4));
262         tmp = p1[12];
263         tmp2 = (cur_word >> 24) & 0xFF;
264         sad = SUB_SAD(sad, tmp, tmp2);
265         tmp = p1[8];
266         tmp2 = (cur_word >> 16) & 0xFF;
267         sad = SUB_SAD(sad, tmp, tmp2);
268         tmp = p1[4];
269         tmp2 = (cur_word >> 8) & 0xFF;
270         sad = SUB_SAD(sad, tmp, tmp2);
271         tmp = p1[0];
272         p1 += lx4;
273         tmp2 = (cur_word & 0xFF);
274         sad = SUB_SAD(sad, tmp, tmp2);
275 
276         NUM_SAD_MB();
277 
278         sadstar += madstar;
279         if (((uint32)sad <= ((uint32)dmin_lx >> 16)) && (sad <= (sadstar - *nrmlz_th++)))
280             ;
281         else
282             return 65536;
283     }
284 
285     return sad;
286 }
287 #endif /* HTFM */
288 
289 
290 
291