1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 #include "mp4def.h"
19 #include "mp4enc_lib.h"
20 #include "mp4lib_int.h"
21 #include "m4venc_oscl.h"
22
23 #define VOP_OFFSET ((lx<<4)+16) /* for offset to image area */
24 #define CVOP_OFFSET ((lx<<2)+8)
25
26 #define PREF_INTRA 512 /* bias for INTRA coding */
27
28 /*===============================================================
29 Function: ChooseMode
30 Date: 09/21/2000
31 Purpose: Choosing between INTRA or INTER
32 Input/Output: Pointer to the starting point of the macroblock.
33 Note:
34 ===============================================================*/
ChooseMode_C(UChar * Mode,UChar * cur,Int lx,Int min_SAD)35 void ChooseMode_C(UChar *Mode, UChar *cur, Int lx, Int min_SAD)
36 {
37 Int i, j;
38 Int MB_mean, A, tmp, Th;
39 Int offset = (lx >> 2) - 4;
40 UChar *p = cur;
41 Int *pint = (Int *) cur, temp = 0;
42 MB_mean = 0;
43 A = 0;
44 Th = (min_SAD - PREF_INTRA) >> 1;
45
46 for (j = 0; j < 8; j++)
47 {
48
49 /* Odd Rows */
50 temp += (*pint++) & 0x00FF00FF;
51 temp += (*pint++) & 0x00FF00FF;
52 temp += (*pint++) & 0x00FF00FF;
53 temp += (*pint++) & 0x00FF00FF;
54 pint += offset;
55
56 /* Even Rows */
57 temp += (*pint++ >> 8) & 0x00FF00FF;
58 temp += (*pint++ >> 8) & 0x00FF00FF;
59 temp += (*pint++ >> 8) & 0x00FF00FF;
60 temp += (*pint++ >> 8) & 0x00FF00FF;
61 pint += offset;
62
63 }
64
65 MB_mean = (((temp & 0x0000FFFF)) + ((temp & 0xFFFF0000) >> 16)) >> 7;
66
67 p = cur;
68 offset = lx - 16;
69 for (j = 0; j < 16; j++)
70 {
71 temp = (j & 1);
72 p += temp;
73 i = 8;
74 while (i--)
75 {
76 tmp = *p - MB_mean;
77 p += 2;
78 if (tmp > 0) A += tmp;
79 else A -= tmp;
80 }
81
82 if (A >= Th)
83 {
84 *Mode = MODE_INTER;
85 return ;
86 }
87 p += (offset - temp);
88 }
89
90 if (A < Th)
91 *Mode = MODE_INTRA;
92 else
93 *Mode = MODE_INTER;
94
95 return ;
96 }
97
98
99 /*===============================================================
100 Function: GetHalfPelMBRegion
101 Date: 09/17/2000
102 Purpose: Interpolate the search region for half-pel search
103 Input/Output: Center of the search, Half-pel memory, width
104 Note: rounding type should be parameterized.
105 Now fixed it to zero!!!!!!
106
107 ===============================================================*/
108
109
GetHalfPelMBRegion_C(UChar * cand,UChar * hmem,Int lx)110 void GetHalfPelMBRegion_C(UChar *cand, UChar *hmem, Int lx)
111 {
112 Int i, j;
113 UChar *p1, *p2, *p3, *p4;
114 UChar *hmem1 = hmem;
115 UChar *hmem2 = hmem1 + 33;
116 Int offset = lx - 17;
117
118 p1 = cand - lx - 1;
119 p2 = cand - lx;
120 p3 = cand - 1;
121 p4 = cand;
122
123 for (j = 0; j < 16; j++)
124 {
125 for (i = 0; i < 16; i++)
126 {
127 *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2;
128 *hmem1++ = ((*p2++) + *p4 + 1) >> 1;
129 *hmem2++ = ((*p3++) + *p4 + 1) >> 1;
130 *hmem2++ = *p4++;
131 }
132 /* last pixel */
133 *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2;
134 *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1;
135 hmem1 += 33;
136 hmem2 += 33;
137 p1 += offset;
138 p2 += offset;
139 p3 += offset;
140 p4 += offset;
141 }
142 /* last row */
143 for (i = 0; i < 16; i++)
144 {
145 *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2;
146 *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1;
147
148 }
149 *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2;
150
151 return ;
152 }
153
154 /*===============================================================
155 Function: GetHalfPelBlkRegion
156 Date: 09/20/2000
157 Purpose: Interpolate the search region for half-pel search
158 in 4MV mode.
159 Input/Output: Center of the search, Half-pel memory, width
160 Note: rounding type should be parameterized.
161 Now fixed it to zero!!!!!!
162
163 ===============================================================*/
164
165
GetHalfPelBlkRegion(UChar * cand,UChar * hmem,Int lx)166 void GetHalfPelBlkRegion(UChar *cand, UChar *hmem, Int lx)
167 {
168 Int i, j;
169 UChar *p1, *p2, *p3, *p4;
170 UChar *hmem1 = hmem;
171 UChar *hmem2 = hmem1 + 17;
172 Int offset = lx - 9;
173
174 p1 = cand - lx - 1;
175 p2 = cand - lx;
176 p3 = cand - 1;
177 p4 = cand;
178
179 for (j = 0; j < 8; j++)
180 {
181 for (i = 0; i < 8; i++)
182 {
183 *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2;
184 *hmem1++ = ((*p2++) + *p4 + 1) >> 1;
185 *hmem2++ = ((*p3++) + *p4 + 1) >> 1;
186 *hmem2++ = *p4++;
187 }
188 /* last pixel */
189 *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2;
190 *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1;
191 hmem1 += 17;
192 hmem2 += 17;
193 p1 += offset;
194 p2 += offset;
195 p3 += offset;
196 p4 += offset;
197 }
198 /* last row */
199 for (i = 0; i < 8; i++)
200 {
201 *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2;
202 *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1;
203
204 }
205 *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2;
206
207 return ;
208 }
209
210
211 /*=====================================================================
212 Function: PaddingEdge
213 Date: 09/16/2000
214 Purpose: Pad edge of a Vop
215 Modification: 09/20/05.
216 =====================================================================*/
217
PaddingEdge(Vop * refVop)218 void PaddingEdge(Vop *refVop)
219 {
220 UChar *src, *dst;
221 Int i;
222 Int pitch, width, height;
223 ULong temp1, temp2;
224
225 width = refVop->width;
226 height = refVop->height;
227 pitch = refVop->pitch;
228
229 /* pad top */
230 src = refVop->yChan;
231
232 temp1 = *src; /* top-left corner */
233 temp2 = src[width-1]; /* top-right corner */
234 temp1 |= (temp1 << 8);
235 temp1 |= (temp1 << 16);
236 temp2 |= (temp2 << 8);
237 temp2 |= (temp2 << 16);
238
239 dst = src - (pitch << 4);
240
241 *((ULong*)(dst - 16)) = temp1;
242 *((ULong*)(dst - 12)) = temp1;
243 *((ULong*)(dst - 8)) = temp1;
244 *((ULong*)(dst - 4)) = temp1;
245
246 M4VENC_MEMCPY(dst, src, width);
247
248 *((ULong*)(dst += width)) = temp2;
249 *((ULong*)(dst + 4)) = temp2;
250 *((ULong*)(dst + 8)) = temp2;
251 *((ULong*)(dst + 12)) = temp2;
252
253 dst = dst - width - 16;
254
255 i = 15;
256 while (i--)
257 {
258 M4VENC_MEMCPY(dst + pitch, dst, pitch);
259 dst += pitch;
260 }
261
262 /* pad sides */
263 dst += (pitch + 16);
264 src = dst;
265 i = height;
266 while (i--)
267 {
268 temp1 = *src;
269 temp2 = src[width-1];
270 temp1 |= (temp1 << 8);
271 temp1 |= (temp1 << 16);
272 temp2 |= (temp2 << 8);
273 temp2 |= (temp2 << 16);
274
275 *((ULong*)(dst - 16)) = temp1;
276 *((ULong*)(dst - 12)) = temp1;
277 *((ULong*)(dst - 8)) = temp1;
278 *((ULong*)(dst - 4)) = temp1;
279
280 *((ULong*)(dst += width)) = temp2;
281 *((ULong*)(dst + 4)) = temp2;
282 *((ULong*)(dst + 8)) = temp2;
283 *((ULong*)(dst + 12)) = temp2;
284
285 src += pitch;
286 dst = src;
287 }
288
289 /* pad bottom */
290 dst -= 16;
291 i = 16;
292 while (i--)
293 {
294 M4VENC_MEMCPY(dst, dst - pitch, pitch);
295 dst += pitch;
296 }
297
298
299 return ;
300 }
301
302 /*===================================================================
303 Function: ComputeMBSum
304 Date: 10/28/2000
305 Purpose: Compute sum of absolute value (SAV) of blocks in a macroblock
306 in INTRA mode needed for rate control. Thus, instead of
307 computing the SAV, we can compute first order moment or
308 variance .
309
310 11/28/00: add MMX
311 9/3/01: do parallel comp for C function.
312 ===================================================================*/
ComputeMBSum_C(UChar * cur,Int lx,MOT * mot_mb)313 void ComputeMBSum_C(UChar *cur, Int lx, MOT *mot_mb)
314 {
315 Int j;
316 Int *cInt, *cInt2;
317 Int sad1 = 0, sad2 = 0, sad3 = 0, sad4 = 0;
318 Int tmp, tmp2, mask = 0x00FF00FF;
319
320 cInt = (Int*)cur; /* make sure this is word-align */
321 cInt2 = (Int*)(cur + (lx << 3));
322 j = 8;
323 while (j--)
324 {
325 tmp = cInt[3]; /* load 4 pixels at a time */
326 tmp2 = tmp & mask;
327 tmp = (tmp >> 8) & mask;
328 tmp += tmp2;
329 sad2 += tmp;
330 tmp = cInt[2];
331 tmp2 = tmp & mask;
332 tmp = (tmp >> 8) & mask;
333 tmp += tmp2;
334 sad2 += tmp;
335 tmp = cInt[1];
336 tmp2 = tmp & mask;
337 tmp = (tmp >> 8) & mask;
338 tmp += tmp2;
339 sad1 += tmp;
340 tmp = *cInt;
341 cInt += (lx >> 2);
342 tmp2 = tmp & mask;
343 tmp = (tmp >> 8) & mask;
344 tmp += tmp2;
345 sad1 += tmp;
346
347 tmp = cInt2[3];
348 tmp2 = tmp & mask;
349 tmp = (tmp >> 8) & mask;
350 tmp += tmp2;
351 sad4 += tmp;
352 tmp = cInt2[2];
353 tmp2 = tmp & mask;
354 tmp = (tmp >> 8) & mask;
355 tmp += tmp2;
356 sad4 += tmp;
357 tmp = cInt2[1];
358 tmp2 = tmp & mask;
359 tmp = (tmp >> 8) & mask;
360 tmp += tmp2;
361 sad3 += tmp;
362 tmp = *cInt2;
363 cInt2 += (lx >> 2);
364 tmp2 = tmp & mask;
365 tmp = (tmp >> 8) & mask;
366 tmp += tmp2;
367 sad3 += tmp;
368 }
369 sad1 += (sad1 << 16);
370 sad2 += (sad2 << 16);
371 sad3 += (sad3 << 16);
372 sad4 += (sad4 << 16);
373 sad1 >>= 16;
374 sad2 >>= 16;
375 sad3 >>= 16;
376 sad4 >>= 16;
377
378 mot_mb[1].sad = sad1;
379 mot_mb[2].sad = sad2;
380 mot_mb[3].sad = sad3;
381 mot_mb[4].sad = sad4;
382 mot_mb[0].sad = sad1 + sad2 + sad3 + sad4;
383
384 return ;
385 }
386
387