1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "mp4def.h"
19 #include "mp4enc_lib.h"
20 #include "mp4lib_int.h"
21 #include "m4venc_oscl.h"
22 
23 #define VOP_OFFSET  ((lx<<4)+16)  /* for offset to image area */
24 #define CVOP_OFFSET ((lx<<2)+8)
25 
26 #define PREF_INTRA  512     /* bias for INTRA coding */
27 
28 /*===============================================================
29     Function:   ChooseMode
30     Date:       09/21/2000
31     Purpose:    Choosing between INTRA or INTER
32     Input/Output: Pointer to the starting point of the macroblock.
33     Note:
34 ===============================================================*/
ChooseMode_C(UChar * Mode,UChar * cur,Int lx,Int min_SAD)35 void ChooseMode_C(UChar *Mode, UChar *cur, Int lx, Int min_SAD)
36 {
37     Int i, j;
38     Int MB_mean, A, tmp, Th;
39     Int offset = (lx >> 2) - 4;
40     UChar *p = cur;
41     Int *pint = (Int *) cur, temp = 0;
42     MB_mean = 0;
43     A = 0;
44     Th = (min_SAD - PREF_INTRA) >> 1;
45 
46     for (j = 0; j < 8; j++)
47     {
48 
49         /* Odd Rows */
50         temp += (*pint++) & 0x00FF00FF;
51         temp += (*pint++) & 0x00FF00FF;
52         temp += (*pint++) & 0x00FF00FF;
53         temp += (*pint++) & 0x00FF00FF;
54         pint += offset;
55 
56         /* Even Rows */
57         temp += (*pint++ >> 8) & 0x00FF00FF;
58         temp += (*pint++ >> 8) & 0x00FF00FF;
59         temp += (*pint++ >> 8) & 0x00FF00FF;
60         temp += (*pint++ >> 8) & 0x00FF00FF;
61         pint += offset;
62 
63     }
64 
65     MB_mean = (((temp & 0x0000FFFF)) + ((temp & 0xFFFF0000) >> 16)) >> 7;
66 
67     p = cur;
68     offset = lx - 16;
69     for (j = 0; j < 16; j++)
70     {
71         temp = (j & 1);
72         p += temp;
73         i = 8;
74         while (i--)
75         {
76             tmp = *p - MB_mean;
77             p += 2;
78             if (tmp > 0) A += tmp;
79             else    A -= tmp;
80         }
81 
82         if (A >= Th)
83         {
84             *Mode = MODE_INTER;
85             return ;
86         }
87         p += (offset - temp);
88     }
89 
90     if (A < Th)
91         *Mode = MODE_INTRA;
92     else
93         *Mode = MODE_INTER;
94 
95     return ;
96 }
97 
98 
99 /*===============================================================
100     Function:   GetHalfPelMBRegion
101     Date:       09/17/2000
102     Purpose:    Interpolate the search region for half-pel search
103     Input/Output:   Center of the search, Half-pel memory, width
104     Note:       rounding type should be parameterized.
105                 Now fixed it to zero!!!!!!
106 
107 ===============================================================*/
108 
109 
GetHalfPelMBRegion_C(UChar * cand,UChar * hmem,Int lx)110 void GetHalfPelMBRegion_C(UChar *cand, UChar *hmem, Int lx)
111 {
112     Int i, j;
113     UChar *p1, *p2, *p3, *p4;
114     UChar *hmem1 = hmem;
115     UChar *hmem2 = hmem1 + 33;
116     Int offset = lx - 17;
117 
118     p1 = cand - lx - 1;
119     p2 = cand - lx;
120     p3 = cand - 1;
121     p4 = cand;
122 
123     for (j = 0; j < 16; j++)
124     {
125         for (i = 0; i < 16; i++)
126         {
127             *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2;
128             *hmem1++ = ((*p2++) + *p4 + 1) >> 1;
129             *hmem2++ = ((*p3++) + *p4 + 1) >> 1;
130             *hmem2++ = *p4++;
131         }
132         /*  last pixel */
133         *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2;
134         *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1;
135         hmem1 += 33;
136         hmem2 += 33;
137         p1 += offset;
138         p2 += offset;
139         p3 += offset;
140         p4 += offset;
141     }
142     /* last row */
143     for (i = 0; i < 16; i++)
144     {
145         *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2;
146         *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1;
147 
148     }
149     *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2;
150 
151     return ;
152 }
153 
154 /*===============================================================
155    Function:    GetHalfPelBlkRegion
156    Date:        09/20/2000
157    Purpose: Interpolate the search region for half-pel search
158             in 4MV mode.
159    Input/Output:    Center of the search, Half-pel memory, width
160    Note:        rounding type should be parameterized.
161             Now fixed it to zero!!!!!!
162 
163 ===============================================================*/
164 
165 
GetHalfPelBlkRegion(UChar * cand,UChar * hmem,Int lx)166 void GetHalfPelBlkRegion(UChar *cand, UChar *hmem, Int lx)
167 {
168     Int i, j;
169     UChar *p1, *p2, *p3, *p4;
170     UChar *hmem1 = hmem;
171     UChar *hmem2 = hmem1 + 17;
172     Int offset = lx - 9;
173 
174     p1 = cand - lx - 1;
175     p2 = cand - lx;
176     p3 = cand - 1;
177     p4 = cand;
178 
179     for (j = 0; j < 8; j++)
180     {
181         for (i = 0; i < 8; i++)
182         {
183             *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2;
184             *hmem1++ = ((*p2++) + *p4 + 1) >> 1;
185             *hmem2++ = ((*p3++) + *p4 + 1) >> 1;
186             *hmem2++ = *p4++;
187         }
188         /*  last pixel */
189         *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2;
190         *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1;
191         hmem1 += 17;
192         hmem2 += 17;
193         p1 += offset;
194         p2 += offset;
195         p3 += offset;
196         p4 += offset;
197     }
198     /* last row */
199     for (i = 0; i < 8; i++)
200     {
201         *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2;
202         *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1;
203 
204     }
205     *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2;
206 
207     return ;
208 }
209 
210 
211 /*=====================================================================
212     Function:   PaddingEdge
213     Date:       09/16/2000
214     Purpose:    Pad edge of a Vop
215     Modification: 09/20/05.
216 =====================================================================*/
217 
PaddingEdge(Vop * refVop)218 void  PaddingEdge(Vop *refVop)
219 {
220     UChar *src, *dst;
221     Int i;
222     Int pitch, width, height;
223     ULong temp1, temp2;
224 
225     width = refVop->width;
226     height = refVop->height;
227     pitch = refVop->pitch;
228 
229     /* pad top */
230     src = refVop->yChan;
231 
232     temp1 = *src; /* top-left corner */
233     temp2 = src[width-1]; /* top-right corner */
234     temp1 |= (temp1 << 8);
235     temp1 |= (temp1 << 16);
236     temp2 |= (temp2 << 8);
237     temp2 |= (temp2 << 16);
238 
239     dst = src - (pitch << 4);
240 
241     *((ULong*)(dst - 16)) = temp1;
242     *((ULong*)(dst - 12)) = temp1;
243     *((ULong*)(dst - 8)) = temp1;
244     *((ULong*)(dst - 4)) = temp1;
245 
246     M4VENC_MEMCPY(dst, src, width);
247 
248     *((ULong*)(dst += width)) = temp2;
249     *((ULong*)(dst + 4)) = temp2;
250     *((ULong*)(dst + 8)) = temp2;
251     *((ULong*)(dst + 12)) = temp2;
252 
253     dst = dst - width - 16;
254 
255     i = 15;
256     while (i--)
257     {
258         M4VENC_MEMCPY(dst + pitch, dst, pitch);
259         dst += pitch;
260     }
261 
262     /* pad sides */
263     dst += (pitch + 16);
264     src = dst;
265     i = height;
266     while (i--)
267     {
268         temp1 = *src;
269         temp2 = src[width-1];
270         temp1 |= (temp1 << 8);
271         temp1 |= (temp1 << 16);
272         temp2 |= (temp2 << 8);
273         temp2 |= (temp2 << 16);
274 
275         *((ULong*)(dst - 16)) = temp1;
276         *((ULong*)(dst - 12)) = temp1;
277         *((ULong*)(dst - 8)) = temp1;
278         *((ULong*)(dst - 4)) = temp1;
279 
280         *((ULong*)(dst += width)) = temp2;
281         *((ULong*)(dst + 4)) = temp2;
282         *((ULong*)(dst + 8)) = temp2;
283         *((ULong*)(dst + 12)) = temp2;
284 
285         src += pitch;
286         dst = src;
287     }
288 
289     /* pad bottom */
290     dst -= 16;
291     i = 16;
292     while (i--)
293     {
294         M4VENC_MEMCPY(dst, dst - pitch, pitch);
295         dst += pitch;
296     }
297 
298 
299     return ;
300 }
301 
302 /*===================================================================
303     Function:   ComputeMBSum
304     Date:       10/28/2000
305     Purpose:    Compute sum of absolute value (SAV) of blocks in a macroblock
306                 in INTRA mode needed for rate control. Thus, instead of
307                 computing the SAV, we can compute first order moment or
308                 variance .
309 
310     11/28/00:    add MMX
311     9/3/01:      do parallel comp for C function.
312 ===================================================================*/
ComputeMBSum_C(UChar * cur,Int lx,MOT * mot_mb)313 void ComputeMBSum_C(UChar *cur, Int lx, MOT *mot_mb)
314 {
315     Int j;
316     Int *cInt, *cInt2;
317     Int sad1 = 0, sad2 = 0, sad3 = 0, sad4 = 0;
318     Int tmp, tmp2, mask = 0x00FF00FF;
319 
320     cInt = (Int*)cur;   /* make sure this is word-align */
321     cInt2 = (Int*)(cur + (lx << 3));
322     j = 8;
323     while (j--)
324     {
325         tmp = cInt[3];  /* load 4 pixels at a time */
326         tmp2 = tmp & mask;
327         tmp = (tmp >> 8) & mask;
328         tmp += tmp2;
329         sad2 += tmp;
330         tmp = cInt[2];
331         tmp2 = tmp & mask;
332         tmp = (tmp >> 8) & mask;
333         tmp += tmp2;
334         sad2 += tmp;
335         tmp = cInt[1];
336         tmp2 = tmp & mask;
337         tmp = (tmp >> 8) & mask;
338         tmp += tmp2;
339         sad1 += tmp;
340         tmp = *cInt;
341         cInt += (lx >> 2);
342         tmp2 = tmp & mask;
343         tmp = (tmp >> 8) & mask;
344         tmp += tmp2;
345         sad1 += tmp;
346 
347         tmp = cInt2[3];
348         tmp2 = tmp & mask;
349         tmp = (tmp >> 8) & mask;
350         tmp += tmp2;
351         sad4 += tmp;
352         tmp = cInt2[2];
353         tmp2 = tmp & mask;
354         tmp = (tmp >> 8) & mask;
355         tmp += tmp2;
356         sad4 += tmp;
357         tmp = cInt2[1];
358         tmp2 = tmp & mask;
359         tmp = (tmp >> 8) & mask;
360         tmp += tmp2;
361         sad3 += tmp;
362         tmp = *cInt2;
363         cInt2 += (lx >> 2);
364         tmp2 = tmp & mask;
365         tmp = (tmp >> 8) & mask;
366         tmp += tmp2;
367         sad3 += tmp;
368     }
369     sad1 += (sad1 << 16);
370     sad2 += (sad2 << 16);
371     sad3 += (sad3 << 16);
372     sad4 += (sad4 << 16);
373     sad1 >>= 16;
374     sad2 >>= 16;
375     sad3 >>= 16;
376     sad4 >>= 16;
377 
378     mot_mb[1].sad = sad1;
379     mot_mb[2].sad = sad2;
380     mot_mb[3].sad = sad3;
381     mot_mb[4].sad = sad4;
382     mot_mb[0].sad = sad1 + sad2 + sad3 + sad4;
383 
384     return ;
385 }
386 
387