1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*------------------------------------------------------------------------------
18 
19     Table of contents
20 
21      1. Include headers
22      2. External compiler flags
23      3. Module defines
24      4. Local function prototypes
25      5. Functions
26           h264bsdWriteMacroblock
27           h264bsdWriteOutputBlocks
28 
29 ------------------------------------------------------------------------------*/
30 
31 /*------------------------------------------------------------------------------
32     1. Include headers
33 ------------------------------------------------------------------------------*/
34 
35 #include "h264bsd_image.h"
36 #include "h264bsd_util.h"
37 #include "h264bsd_neighbour.h"
38 
39 /*------------------------------------------------------------------------------
40     2. External compiler flags
41 --------------------------------------------------------------------------------
42 
43 --------------------------------------------------------------------------------
44     3. Module defines
45 ------------------------------------------------------------------------------*/
46 
47 /* x- and y-coordinates for each block, defined in h264bsd_intra_prediction.c */
48 extern const u32 h264bsdBlockX[];
49 extern const u32 h264bsdBlockY[];
50 
51 /* clipping table, defined in h264bsd_intra_prediction.c */
52 extern const u8 h264bsdClip[];
53 
54 /*------------------------------------------------------------------------------
55     4. Local function prototypes
56 ------------------------------------------------------------------------------*/
57 
58 
59 
60 /*------------------------------------------------------------------------------
61 
62     Function: h264bsdWriteMacroblock
63 
64         Functional description:
65             Write one macroblock into the image. Both luma and chroma
66             components will be written at the same time.
67 
68         Inputs:
69             data    pointer to macroblock data to be written, 256 values for
70                     luma followed by 64 values for both chroma components
71 
72         Outputs:
73             image   pointer to the image where the macroblock will be written
74 
75         Returns:
76             none
77 
78 ------------------------------------------------------------------------------*/
79 #ifndef H264DEC_NEON
h264bsdWriteMacroblock(image_t * image,u8 * data)80 void h264bsdWriteMacroblock(image_t *image, u8 *data)
81 {
82 
83 /* Variables */
84 
85     u32 i;
86     u32 width;
87     u32 *lum, *cb, *cr;
88     u32 *ptr;
89     u32 tmp1, tmp2;
90 
91 /* Code */
92 
93     ASSERT(image);
94     ASSERT(data);
95     ASSERT(!((u32)data&0x3));
96 
97     width = image->width;
98 
99     /*lint -save -e826 lum, cb and cr used to copy 4 bytes at the time, disable
100      * "area too small" info message */
101     lum = (u32*)image->luma;
102     cb = (u32*)image->cb;
103     cr = (u32*)image->cr;
104     ASSERT(!((u32)lum&0x3));
105     ASSERT(!((u32)cb&0x3));
106     ASSERT(!((u32)cr&0x3));
107 
108     ptr = (u32*)data;
109 
110     width *= 4;
111     for (i = 16; i ; i--)
112     {
113         tmp1 = *ptr++;
114         tmp2 = *ptr++;
115         *lum++ = tmp1;
116         *lum++ = tmp2;
117         tmp1 = *ptr++;
118         tmp2 = *ptr++;
119         *lum++ = tmp1;
120         *lum++ = tmp2;
121         lum += width-4;
122     }
123 
124     width >>= 1;
125     for (i = 8; i ; i--)
126     {
127         tmp1 = *ptr++;
128         tmp2 = *ptr++;
129         *cb++ = tmp1;
130         *cb++ = tmp2;
131         cb += width-2;
132     }
133 
134     for (i = 8; i ; i--)
135     {
136         tmp1 = *ptr++;
137         tmp2 = *ptr++;
138         *cr++ = tmp1;
139         *cr++ = tmp2;
140         cr += width-2;
141     }
142 
143 }
144 #endif
145 #ifndef H264DEC_OMXDL
146 /*------------------------------------------------------------------------------
147 
148     Function: h264bsdWriteOutputBlocks
149 
150         Functional description:
151             Write one macroblock into the image. Prediction for the macroblock
152             and the residual are given separately and will be combined while
153             writing the data to the image
154 
155         Inputs:
156             data        pointer to macroblock prediction data, 256 values for
157                         luma followed by 64 values for both chroma components
158             mbNum       number of the macroblock
159             residual    pointer to residual data, 16 16-element arrays for luma
160                         followed by 4 16-element arrays for both chroma
161                         components
162 
163         Outputs:
164             image       pointer to the image where the data will be written
165 
166         Returns:
167             none
168 
169 ------------------------------------------------------------------------------*/
170 
h264bsdWriteOutputBlocks(image_t * image,u32 mbNum,u8 * data,i32 residual[][16])171 void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data,
172         i32 residual[][16])
173 {
174 
175 /* Variables */
176 
177     u32 i;
178     u32 picWidth, picSize;
179     u8 *lum, *cb, *cr;
180     u8 *imageBlock;
181     u8 *tmp;
182     u32 row, col;
183     u32 block;
184     u32 x, y;
185     i32 *pRes;
186     i32 tmp1, tmp2, tmp3, tmp4;
187     const u8 *clp = h264bsdClip + 512;
188 
189 /* Code */
190 
191     ASSERT(image);
192     ASSERT(data);
193     ASSERT(mbNum < image->width * image->height);
194     ASSERT(!((u32)data&0x3));
195 
196     /* Image size in macroblocks */
197     picWidth = image->width;
198     picSize = picWidth * image->height;
199     row = mbNum / picWidth;
200     col = mbNum % picWidth;
201 
202     /* Output macroblock position in output picture */
203     lum = (image->data + row * picWidth * 256 + col * 16);
204     cb = (image->data + picSize * 256 + row * picWidth * 64 + col * 8);
205     cr = (cb + picSize * 64);
206 
207     picWidth *= 16;
208 
209     for (block = 0; block < 16; block++)
210     {
211         x = h264bsdBlockX[block];
212         y = h264bsdBlockY[block];
213 
214         pRes = residual[block];
215 
216         ASSERT(pRes);
217 
218         tmp = data + y*16 + x;
219         imageBlock = lum + y*picWidth + x;
220 
221         ASSERT(!((u32)tmp&0x3));
222         ASSERT(!((u32)imageBlock&0x3));
223 
224         if (IS_RESIDUAL_EMPTY(pRes))
225         {
226             /*lint -e826 */
227             i32 *in32 = (i32*)tmp;
228             i32 *out32 = (i32*)imageBlock;
229 
230             /* Residual is zero => copy prediction block to output */
231             tmp1 = *in32;  in32 += 4;
232             tmp2 = *in32;  in32 += 4;
233             *out32 = tmp1; out32 += picWidth/4;
234             *out32 = tmp2; out32 += picWidth/4;
235             tmp1 = *in32;  in32 += 4;
236             tmp2 = *in32;
237             *out32 = tmp1; out32 += picWidth/4;
238             *out32 = tmp2;
239         }
240         else
241         {
242 
243             RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
244 
245             /* Calculate image = prediction + residual
246              * Process four pixels in a loop */
247             for (i = 4; i; i--)
248             {
249                 tmp1 = tmp[0];
250                 tmp2 = *pRes++;
251                 tmp3 = tmp[1];
252                 tmp1 = clp[tmp1 + tmp2];
253                 tmp4 = *pRes++;
254                 imageBlock[0] = (u8)tmp1;
255                 tmp3 = clp[tmp3 + tmp4];
256                 tmp1 = tmp[2];
257                 tmp2 = *pRes++;
258                 imageBlock[1] = (u8)tmp3;
259                 tmp1 = clp[tmp1 + tmp2];
260                 tmp3 = tmp[3];
261                 tmp4 = *pRes++;
262                 imageBlock[2] = (u8)tmp1;
263                 tmp3 = clp[tmp3 + tmp4];
264                 tmp += 16;
265                 imageBlock[3] = (u8)tmp3;
266                 imageBlock += picWidth;
267             }
268         }
269 
270     }
271 
272     picWidth /= 2;
273 
274     for (block = 16; block <= 23; block++)
275     {
276         x = h264bsdBlockX[block & 0x3];
277         y = h264bsdBlockY[block & 0x3];
278 
279         pRes = residual[block];
280 
281         ASSERT(pRes);
282 
283         tmp = data + 256;
284         imageBlock = cb;
285 
286         if (block >= 20)
287         {
288             imageBlock = cr;
289             tmp += 64;
290         }
291 
292         tmp += y*8 + x;
293         imageBlock += y*picWidth + x;
294 
295         ASSERT(!((u32)tmp&0x3));
296         ASSERT(!((u32)imageBlock&0x3));
297 
298         if (IS_RESIDUAL_EMPTY(pRes))
299         {
300             /*lint -e826 */
301             i32 *in32 = (i32*)tmp;
302             i32 *out32 = (i32*)imageBlock;
303 
304             /* Residual is zero => copy prediction block to output */
305             tmp1 = *in32;  in32 += 2;
306             tmp2 = *in32;  in32 += 2;
307             *out32 = tmp1; out32 += picWidth/4;
308             *out32 = tmp2; out32 += picWidth/4;
309             tmp1 = *in32;  in32 += 2;
310             tmp2 = *in32;
311             *out32 = tmp1; out32 += picWidth/4;
312             *out32 = tmp2;
313         }
314         else
315         {
316 
317             RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
318 
319             for (i = 4; i; i--)
320             {
321                 tmp1 = tmp[0];
322                 tmp2 = *pRes++;
323                 tmp3 = tmp[1];
324                 tmp1 = clp[tmp1 + tmp2];
325                 tmp4 = *pRes++;
326                 imageBlock[0] = (u8)tmp1;
327                 tmp3 = clp[tmp3 + tmp4];
328                 tmp1 = tmp[2];
329                 tmp2 = *pRes++;
330                 imageBlock[1] = (u8)tmp3;
331                 tmp1 = clp[tmp1 + tmp2];
332                 tmp3 = tmp[3];
333                 tmp4 = *pRes++;
334                 imageBlock[2] = (u8)tmp1;
335                 tmp3 = clp[tmp3 + tmp4];
336                 tmp += 8;
337                 imageBlock[3] = (u8)tmp3;
338                 imageBlock += picWidth;
339             }
340         }
341     }
342 
343 }
344 #endif /* H264DEC_OMXDL */
345 
346