1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*------------------------------------------------------------------------------
18
19 Table of contents
20
21 1. Include headers
22 2. External compiler flags
23 3. Module defines
24 4. Local function prototypes
25 5. Functions
26
27 ------------------------------------------------------------------------------*/
28
29 /*------------------------------------------------------------------------------
30 1. Include headers
31 ------------------------------------------------------------------------------*/
32
33 #include "basetype.h"
34 #include "h264bsd_reconstruct.h"
35 #include "h264bsd_macroblock_layer.h"
36 #include "h264bsd_image.h"
37 #include "h264bsd_util.h"
38
39 #ifdef H264DEC_OMXDL
40 #include "omxtypes.h"
41 #include "omxVC.h"
42 #include "armVC.h"
43 #endif /* H264DEC_OMXDL */
44
45 #define UNUSED(x) (void)(x)
46
47 /*------------------------------------------------------------------------------
48 2. External compiler flags
49 --------------------------------------------------------------------------------
50
51 --------------------------------------------------------------------------------
52 3. Module defines
53 ------------------------------------------------------------------------------*/
54
55 /* Switch off the following Lint messages for this file:
56 * Info 701: Shift left of signed quantity (int)
57 * Info 702: Shift right of signed quantity (int)
58 */
59 /*lint -e701 -e702 */
60
61 /* Luma fractional-sample positions
62 *
63 * G a b c H
64 * d e f g
65 * h i j k m
66 * n p q r
67 * M s N
68 *
69 * G, H, M and N are integer sample positions
70 * a-s are fractional samples that need to be interpolated.
71 */
72 #ifndef H264DEC_OMXDL
73 static const u32 lumaFracPos[4][4] = {
74 /* G d h n a e i p b f j q c g k r */
75 {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}};
76 #endif /* H264DEC_OMXDL */
77
78 /* clipping table, defined in h264bsd_intra_prediction.c */
79 extern const u8 h264bsdClip[];
80
81 /*------------------------------------------------------------------------------
82 4. Local function prototypes
83 ------------------------------------------------------------------------------*/
84
85 #ifndef H264DEC_OMXDL
86
87 /*------------------------------------------------------------------------------
88
89 Function: h264bsdInterpolateChromaHor
90
91 Functional description:
92 This function performs chroma interpolation in horizontal direction.
93 Overfilling is done only if needed. Reference image (pRef) is
94 read at correct position and the predicted part is written to
95 macroblock's chrominance (predPartChroma)
96 Inputs:
97 pRef pointer to reference frame Cb top-left corner
98 x0 integer x-coordinate for prediction
99 y0 integer y-coordinate for prediction
100 width width of the reference frame chrominance in pixels
101 height height of the reference frame chrominance in pixels
102 xFrac horizontal fraction for prediction in 1/8 pixels
103 chromaPartWidth width of the predicted part in pixels
104 chromaPartHeight height of the predicted part in pixels
105 Outputs:
106 predPartChroma pointer where predicted part is written
107
108 ------------------------------------------------------------------------------*/
109 #ifndef H264DEC_ARM11
h264bsdInterpolateChromaHor(u8 * pRef,u8 * predPartChroma,i32 x0,i32 y0,u32 width,u32 height,u32 xFrac,u32 chromaPartWidth,u32 chromaPartHeight)110 void h264bsdInterpolateChromaHor(
111 u8 *pRef,
112 u8 *predPartChroma,
113 i32 x0,
114 i32 y0,
115 u32 width,
116 u32 height,
117 u32 xFrac,
118 u32 chromaPartWidth,
119 u32 chromaPartHeight)
120 {
121
122 /* Variables */
123
124 u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val;
125 u8 *ptrA, *cbr;
126 u32 comp;
127 u8 block[9*8*2];
128
129 /* Code */
130
131 ASSERT(predPartChroma);
132 ASSERT(chromaPartWidth);
133 ASSERT(chromaPartHeight);
134 ASSERT(xFrac < 8);
135 ASSERT(pRef);
136
137 if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
138 (y0 < 0) || ((u32)y0+chromaPartHeight > height))
139 {
140 h264bsdFillBlock(pRef, block, x0, y0, width, height,
141 chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1);
142 pRef += width * height;
143 h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight,
144 x0, y0, width, height, chromaPartWidth + 1,
145 chromaPartHeight, chromaPartWidth + 1);
146
147 pRef = block;
148 x0 = 0;
149 y0 = 0;
150 width = chromaPartWidth+1;
151 height = chromaPartHeight;
152 }
153
154 val = 8 - xFrac;
155
156 for (comp = 0; comp <= 1; comp++)
157 {
158
159 ptrA = pRef + (comp * height + (u32)y0) * width + x0;
160 cbr = predPartChroma + comp * 8 * 8;
161
162 /* 2x2 pels per iteration
163 * bilinear horizontal interpolation */
164 for (y = (chromaPartHeight >> 1); y; y--)
165 {
166 for (x = (chromaPartWidth >> 1); x; x--)
167 {
168 tmp1 = ptrA[width];
169 tmp2 = *ptrA++;
170 tmp3 = ptrA[width];
171 tmp4 = *ptrA++;
172 c = ((val * tmp1 + xFrac * tmp3) << 3) + 32;
173 c >>= 6;
174 cbr[8] = (u8)c;
175 c = ((val * tmp2 + xFrac * tmp4) << 3) + 32;
176 c >>= 6;
177 *cbr++ = (u8)c;
178 tmp1 = ptrA[width];
179 tmp2 = *ptrA;
180 c = ((val * tmp3 + xFrac * tmp1) << 3) + 32;
181 c >>= 6;
182 cbr[8] = (u8)c;
183 c = ((val * tmp4 + xFrac * tmp2) << 3) + 32;
184 c >>= 6;
185 *cbr++ = (u8)c;
186 }
187 cbr += 2*8 - chromaPartWidth;
188 ptrA += 2*width - chromaPartWidth;
189 }
190 }
191
192 }
193
194 /*------------------------------------------------------------------------------
195
196 Function: h264bsdInterpolateChromaVer
197
198 Functional description:
199 This function performs chroma interpolation in vertical direction.
200 Overfilling is done only if needed. Reference image (pRef) is
201 read at correct position and the predicted part is written to
202 macroblock's chrominance (predPartChroma)
203
204 ------------------------------------------------------------------------------*/
205
h264bsdInterpolateChromaVer(u8 * pRef,u8 * predPartChroma,i32 x0,i32 y0,u32 width,u32 height,u32 yFrac,u32 chromaPartWidth,u32 chromaPartHeight)206 void h264bsdInterpolateChromaVer(
207 u8 *pRef,
208 u8 *predPartChroma,
209 i32 x0,
210 i32 y0,
211 u32 width,
212 u32 height,
213 u32 yFrac,
214 u32 chromaPartWidth,
215 u32 chromaPartHeight)
216 {
217
218 /* Variables */
219
220 u32 x, y, tmp1, tmp2, tmp3, c, val;
221 u8 *ptrA, *cbr;
222 u32 comp;
223 u8 block[9*8*2];
224
225 /* Code */
226
227 ASSERT(predPartChroma);
228 ASSERT(chromaPartWidth);
229 ASSERT(chromaPartHeight);
230 ASSERT(yFrac < 8);
231 ASSERT(pRef);
232
233 if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) ||
234 (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
235 {
236 h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth,
237 chromaPartHeight + 1, chromaPartWidth);
238 pRef += width * height;
239 h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1),
240 x0, y0, width, height, chromaPartWidth,
241 chromaPartHeight + 1, chromaPartWidth);
242
243 pRef = block;
244 x0 = 0;
245 y0 = 0;
246 width = chromaPartWidth;
247 height = chromaPartHeight+1;
248 }
249
250 val = 8 - yFrac;
251
252 for (comp = 0; comp <= 1; comp++)
253 {
254
255 ptrA = pRef + (comp * height + (u32)y0) * width + x0;
256 cbr = predPartChroma + comp * 8 * 8;
257
258 /* 2x2 pels per iteration
259 * bilinear vertical interpolation */
260 for (y = (chromaPartHeight >> 1); y; y--)
261 {
262 for (x = (chromaPartWidth >> 1); x; x--)
263 {
264 tmp3 = ptrA[width*2];
265 tmp2 = ptrA[width];
266 tmp1 = *ptrA++;
267 c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
268 c >>= 6;
269 cbr[8] = (u8)c;
270 c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
271 c >>= 6;
272 *cbr++ = (u8)c;
273 tmp3 = ptrA[width*2];
274 tmp2 = ptrA[width];
275 tmp1 = *ptrA++;
276 c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
277 c >>= 6;
278 cbr[8] = (u8)c;
279 c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
280 c >>= 6;
281 *cbr++ = (u8)c;
282 }
283 cbr += 2*8 - chromaPartWidth;
284 ptrA += 2*width - chromaPartWidth;
285 }
286 }
287
288 }
289 #endif
290 /*------------------------------------------------------------------------------
291
292 Function: h264bsdInterpolateChromaHorVer
293
294 Functional description:
295 This function performs chroma interpolation in horizontal and
296 vertical direction. Overfilling is done only if needed. Reference
297 image (ref) is read at correct position and the predicted part
298 is written to macroblock's chrominance (predPartChroma)
299
300 ------------------------------------------------------------------------------*/
301
h264bsdInterpolateChromaHorVer(u8 * ref,u8 * predPartChroma,i32 x0,i32 y0,u32 width,u32 height,u32 xFrac,u32 yFrac,u32 chromaPartWidth,u32 chromaPartHeight)302 void h264bsdInterpolateChromaHorVer(
303 u8 *ref,
304 u8 *predPartChroma,
305 i32 x0,
306 i32 y0,
307 u32 width,
308 u32 height,
309 u32 xFrac,
310 u32 yFrac,
311 u32 chromaPartWidth,
312 u32 chromaPartHeight)
313 {
314 u8 block[9*9*2];
315 u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32;
316 u32 comp;
317 u8 *ptrA, *cbr;
318
319 /* Code */
320
321 ASSERT(predPartChroma);
322 ASSERT(chromaPartWidth);
323 ASSERT(chromaPartHeight);
324 ASSERT(xFrac < 8);
325 ASSERT(yFrac < 8);
326 ASSERT(ref);
327
328 if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
329 (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
330 {
331 h264bsdFillBlock(ref, block, x0, y0, width, height,
332 chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1);
333 ref += width * height;
334 h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1),
335 x0, y0, width, height, chromaPartWidth + 1,
336 chromaPartHeight + 1, chromaPartWidth + 1);
337
338 ref = block;
339 x0 = 0;
340 y0 = 0;
341 width = chromaPartWidth+1;
342 height = chromaPartHeight+1;
343 }
344
345 valX = 8 - xFrac;
346 valY = 8 - yFrac;
347
348 for (comp = 0; comp <= 1; comp++)
349 {
350
351 ptrA = ref + (comp * height + (u32)y0) * width + x0;
352 cbr = predPartChroma + comp * 8 * 8;
353
354 /* 2x2 pels per iteration
355 * bilinear vertical and horizontal interpolation */
356 for (y = (chromaPartHeight >> 1); y; y--)
357 {
358 tmp1 = *ptrA;
359 tmp3 = ptrA[width];
360 tmp5 = ptrA[width*2];
361 tmp1 *= valY;
362 tmp1 += tmp3 * yFrac;
363 tmp3 *= valY;
364 tmp3 += tmp5 * yFrac;
365 for (x = (chromaPartWidth >> 1); x; x--)
366 {
367 tmp2 = *++ptrA;
368 tmp4 = ptrA[width];
369 tmp6 = ptrA[width*2];
370 tmp2 *= valY;
371 tmp2 += tmp4 * yFrac;
372 tmp4 *= valY;
373 tmp4 += tmp6 * yFrac;
374 tmp1 = tmp1 * valX + plus32;
375 tmp3 = tmp3 * valX + plus32;
376 tmp1 += tmp2 * xFrac;
377 tmp1 >>= 6;
378 tmp3 += tmp4 * xFrac;
379 tmp3 >>= 6;
380 cbr[8] = (u8)tmp3;
381 *cbr++ = (u8)tmp1;
382
383 tmp1 = *++ptrA;
384 tmp3 = ptrA[width];
385 tmp5 = ptrA[width*2];
386 tmp1 *= valY;
387 tmp1 += tmp3 * yFrac;
388 tmp3 *= valY;
389 tmp3 += tmp5 * yFrac;
390 tmp2 = tmp2 * valX + plus32;
391 tmp4 = tmp4 * valX + plus32;
392 tmp2 += tmp1 * xFrac;
393 tmp2 >>= 6;
394 tmp4 += tmp3 * xFrac;
395 tmp4 >>= 6;
396 cbr[8] = (u8)tmp4;
397 *cbr++ = (u8)tmp2;
398 }
399 cbr += 2*8 - chromaPartWidth;
400 ptrA += 2*width - chromaPartWidth;
401 }
402 }
403
404 }
405
406 /*------------------------------------------------------------------------------
407
408 Function: PredictChroma
409
410 Functional description:
411 Top level chroma prediction function that calls the appropriate
412 interpolation function. The output is written to macroblock array.
413
414 ------------------------------------------------------------------------------*/
415
PredictChroma(u8 * mbPartChroma,u32 xAL,u32 yAL,u32 partWidth,u32 partHeight,mv_t * mv,image_t * refPic)416 static void PredictChroma(
417 u8 *mbPartChroma,
418 u32 xAL,
419 u32 yAL,
420 u32 partWidth,
421 u32 partHeight,
422 mv_t *mv,
423 image_t *refPic)
424 {
425
426 /* Variables */
427
428 u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight;
429 i32 xInt, yInt;
430 u8 *ref;
431
432 /* Code */
433
434 ASSERT(mv);
435 ASSERT(refPic);
436 ASSERT(refPic->data);
437 ASSERT(refPic->width);
438 ASSERT(refPic->height);
439
440 width = 8 * refPic->width;
441 height = 8 * refPic->height;
442
443 xInt = (xAL >> 1) + (mv->hor >> 3);
444 yInt = (yAL >> 1) + (mv->ver >> 3);
445 xFrac = mv->hor & 0x7;
446 yFrac = mv->ver & 0x7;
447
448 chromaPartWidth = partWidth >> 1;
449 chromaPartHeight = partHeight >> 1;
450 ref = refPic->data + 256 * refPic->width * refPic->height;
451
452 if (xFrac && yFrac)
453 {
454 h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width,
455 height, xFrac, yFrac, chromaPartWidth, chromaPartHeight);
456 }
457 else if (xFrac)
458 {
459 h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width,
460 height, xFrac, chromaPartWidth, chromaPartHeight);
461 }
462 else if (yFrac)
463 {
464 h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width,
465 height, yFrac, chromaPartWidth, chromaPartHeight);
466 }
467 else
468 {
469 h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height,
470 chromaPartWidth, chromaPartHeight, 8);
471 ref += width * height;
472 h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height,
473 chromaPartWidth, chromaPartHeight, 8);
474 }
475
476 }
477
478
479 /*------------------------------------------------------------------------------
480
481 Function: h264bsdInterpolateVerHalf
482
483 Functional description:
484 Function to perform vertical interpolation of pixel position 'h'
485 for a block. Overfilling is done only if needed. Reference
486 image (ref) is read at correct position and the predicted part
487 is written to macroblock array (mb)
488
489 ------------------------------------------------------------------------------*/
490 #ifndef H264DEC_ARM11
h264bsdInterpolateVerHalf(u8 * ref,u8 * mb,i32 x0,i32 y0,u32 width,u32 height,u32 partWidth,u32 partHeight)491 void h264bsdInterpolateVerHalf(
492 u8 *ref,
493 u8 *mb,
494 i32 x0,
495 i32 y0,
496 u32 width,
497 u32 height,
498 u32 partWidth,
499 u32 partHeight)
500 {
501 u32 p1[21*21/4+1];
502 u32 i, j;
503 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
504 u8 *ptrC, *ptrV;
505 const u8 *clp = h264bsdClip + 512;
506
507 /* Code */
508
509 ASSERT(ref);
510 ASSERT(mb);
511
512 if ((x0 < 0) || ((u32)x0+partWidth > width) ||
513 (y0 < 0) || ((u32)y0+partHeight+5 > height))
514 {
515 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
516 partWidth, partHeight+5, partWidth);
517
518 x0 = 0;
519 y0 = 0;
520 ref = (u8*)p1;
521 width = partWidth;
522 }
523
524 ref += (u32)y0 * width + (u32)x0;
525
526 ptrC = ref + width;
527 ptrV = ptrC + 5*width;
528
529 /* 4 pixels per iteration, interpolate using 5 vertical samples */
530 for (i = (partHeight >> 2); i; i--)
531 {
532 /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
533 for (j = partWidth; j; j--)
534 {
535 tmp4 = ptrV[-(i32)width*2];
536 tmp5 = ptrV[-(i32)width];
537 tmp1 = ptrV[width];
538 tmp2 = ptrV[width*2];
539 tmp6 = *ptrV++;
540
541 tmp7 = tmp4 + tmp1;
542 tmp2 -= (tmp7 << 2);
543 tmp2 -= tmp7;
544 tmp2 += 16;
545 tmp7 = tmp5 + tmp6;
546 tmp3 = ptrC[width*2];
547 tmp2 += (tmp7 << 4);
548 tmp2 += (tmp7 << 2);
549 tmp2 += tmp3;
550 tmp2 = clp[tmp2>>5];
551 tmp1 += 16;
552 mb[48] = (u8)tmp2;
553
554 tmp7 = tmp3 + tmp6;
555 tmp1 -= (tmp7 << 2);
556 tmp1 -= tmp7;
557 tmp7 = tmp4 + tmp5;
558 tmp2 = ptrC[width];
559 tmp1 += (tmp7 << 4);
560 tmp1 += (tmp7 << 2);
561 tmp1 += tmp2;
562 tmp1 = clp[tmp1>>5];
563 tmp6 += 16;
564 mb[32] = (u8)tmp1;
565
566 tmp7 = tmp2 + tmp5;
567 tmp6 -= (tmp7 << 2);
568 tmp6 -= tmp7;
569 tmp7 = tmp4 + tmp3;
570 tmp1 = *ptrC;
571 tmp6 += (tmp7 << 4);
572 tmp6 += (tmp7 << 2);
573 tmp6 += tmp1;
574 tmp6 = clp[tmp6>>5];
575 tmp5 += 16;
576 mb[16] = (u8)tmp6;
577
578 tmp1 += tmp4;
579 tmp5 -= (tmp1 << 2);
580 tmp5 -= tmp1;
581 tmp3 += tmp2;
582 tmp6 = ptrC[-(i32)width];
583 tmp5 += (tmp3 << 4);
584 tmp5 += (tmp3 << 2);
585 tmp5 += tmp6;
586 tmp5 = clp[tmp5>>5];
587 *mb++ = (u8)tmp5;
588 ptrC++;
589 }
590 ptrC += 4*width - partWidth;
591 ptrV += 4*width - partWidth;
592 mb += 4*16 - partWidth;
593 }
594
595 }
596
597 /*------------------------------------------------------------------------------
598
599 Function: h264bsdInterpolateVerQuarter
600
601 Functional description:
602 Function to perform vertical interpolation of pixel position 'd'
603 or 'n' for a block. Overfilling is done only if needed. Reference
604 image (ref) is read at correct position and the predicted part
605 is written to macroblock array (mb)
606
607 ------------------------------------------------------------------------------*/
608
h264bsdInterpolateVerQuarter(u8 * ref,u8 * mb,i32 x0,i32 y0,u32 width,u32 height,u32 partWidth,u32 partHeight,u32 verOffset)609 void h264bsdInterpolateVerQuarter(
610 u8 *ref,
611 u8 *mb,
612 i32 x0,
613 i32 y0,
614 u32 width,
615 u32 height,
616 u32 partWidth,
617 u32 partHeight,
618 u32 verOffset) /* 0 for pixel d, 1 for pixel n */
619 {
620 u32 p1[21*21/4+1];
621 u32 i, j;
622 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
623 u8 *ptrC, *ptrV, *ptrInt;
624 const u8 *clp = h264bsdClip + 512;
625
626 /* Code */
627
628 ASSERT(ref);
629 ASSERT(mb);
630
631 if ((x0 < 0) || ((u32)x0+partWidth > width) ||
632 (y0 < 0) || ((u32)y0+partHeight+5 > height))
633 {
634 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
635 partWidth, partHeight+5, partWidth);
636
637 x0 = 0;
638 y0 = 0;
639 ref = (u8*)p1;
640 width = partWidth;
641 }
642
643 ref += (u32)y0 * width + (u32)x0;
644
645 ptrC = ref + width;
646 ptrV = ptrC + 5*width;
647
648 /* Pointer to integer sample position, either M or R */
649 ptrInt = ptrC + (2+verOffset)*width;
650
651 /* 4 pixels per iteration
652 * interpolate using 5 vertical samples and average between
653 * interpolated value and integer sample value */
654 for (i = (partHeight >> 2); i; i--)
655 {
656 /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
657 for (j = partWidth; j; j--)
658 {
659 tmp4 = ptrV[-(i32)width*2];
660 tmp5 = ptrV[-(i32)width];
661 tmp1 = ptrV[width];
662 tmp2 = ptrV[width*2];
663 tmp6 = *ptrV++;
664
665 tmp7 = tmp4 + tmp1;
666 tmp2 -= (tmp7 << 2);
667 tmp2 -= tmp7;
668 tmp2 += 16;
669 tmp7 = tmp5 + tmp6;
670 tmp3 = ptrC[width*2];
671 tmp2 += (tmp7 << 4);
672 tmp2 += (tmp7 << 2);
673 tmp2 += tmp3;
674 tmp2 = clp[tmp2>>5];
675 tmp7 = ptrInt[width*2];
676 tmp1 += 16;
677 tmp2++;
678 mb[48] = (u8)((tmp2 + tmp7) >> 1);
679
680 tmp7 = tmp3 + tmp6;
681 tmp1 -= (tmp7 << 2);
682 tmp1 -= tmp7;
683 tmp7 = tmp4 + tmp5;
684 tmp2 = ptrC[width];
685 tmp1 += (tmp7 << 4);
686 tmp1 += (tmp7 << 2);
687 tmp1 += tmp2;
688 tmp1 = clp[tmp1>>5];
689 tmp7 = ptrInt[width];
690 tmp6 += 16;
691 tmp1++;
692 mb[32] = (u8)((tmp1 + tmp7) >> 1);
693
694 tmp7 = tmp2 + tmp5;
695 tmp6 -= (tmp7 << 2);
696 tmp6 -= tmp7;
697 tmp7 = tmp4 + tmp3;
698 tmp1 = *ptrC;
699 tmp6 += (tmp7 << 4);
700 tmp6 += (tmp7 << 2);
701 tmp6 += tmp1;
702 tmp6 = clp[tmp6>>5];
703 tmp7 = *ptrInt;
704 tmp5 += 16;
705 tmp6++;
706 mb[16] = (u8)((tmp6 + tmp7) >> 1);
707
708 tmp1 += tmp4;
709 tmp5 -= (tmp1 << 2);
710 tmp5 -= tmp1;
711 tmp3 += tmp2;
712 tmp6 = ptrC[-(i32)width];
713 tmp5 += (tmp3 << 4);
714 tmp5 += (tmp3 << 2);
715 tmp5 += tmp6;
716 tmp5 = clp[tmp5>>5];
717 tmp7 = ptrInt[-(i32)width];
718 tmp5++;
719 *mb++ = (u8)((tmp5 + tmp7) >> 1);
720 ptrC++;
721 ptrInt++;
722 }
723 ptrC += 4*width - partWidth;
724 ptrV += 4*width - partWidth;
725 ptrInt += 4*width - partWidth;
726 mb += 4*16 - partWidth;
727 }
728
729 }
730
731 /*------------------------------------------------------------------------------
732
733 Function: h264bsdInterpolateHorHalf
734
735 Functional description:
736 Function to perform horizontal interpolation of pixel position 'b'
737 for a block. Overfilling is done only if needed. Reference
738 image (ref) is read at correct position and the predicted part
739 is written to macroblock array (mb)
740
741 ------------------------------------------------------------------------------*/
742
h264bsdInterpolateHorHalf(u8 * ref,u8 * mb,i32 x0,i32 y0,u32 width,u32 height,u32 partWidth,u32 partHeight)743 void h264bsdInterpolateHorHalf(
744 u8 *ref,
745 u8 *mb,
746 i32 x0,
747 i32 y0,
748 u32 width,
749 u32 height,
750 u32 partWidth,
751 u32 partHeight)
752 {
753 u32 p1[21*21/4+1];
754 u8 *ptrJ;
755 u32 x, y;
756 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
757 const u8 *clp = h264bsdClip + 512;
758
759 /* Code */
760
761 ASSERT(ref);
762 ASSERT(mb);
763 ASSERT((partWidth&0x3) == 0);
764 ASSERT((partHeight&0x3) == 0);
765
766 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
767 (y0 < 0) || ((u32)y0+partHeight > height))
768 {
769 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
770 partWidth+5, partHeight, partWidth+5);
771
772 x0 = 0;
773 y0 = 0;
774 ref = (u8*)p1;
775 width = partWidth + 5;
776 }
777
778 ref += (u32)y0 * width + (u32)x0;
779
780 ptrJ = ref + 5;
781
782 for (y = partHeight; y; y--)
783 {
784 tmp6 = *(ptrJ - 5);
785 tmp5 = *(ptrJ - 4);
786 tmp4 = *(ptrJ - 3);
787 tmp3 = *(ptrJ - 2);
788 tmp2 = *(ptrJ - 1);
789
790 /* calculate 4 pels per iteration */
791 for (x = (partWidth >> 2); x; x--)
792 {
793 /* First pixel */
794 tmp6 += 16;
795 tmp7 = tmp3 + tmp4;
796 tmp6 += (tmp7 << 4);
797 tmp6 += (tmp7 << 2);
798 tmp7 = tmp2 + tmp5;
799 tmp1 = *ptrJ++;
800 tmp6 -= (tmp7 << 2);
801 tmp6 -= tmp7;
802 tmp6 += tmp1;
803 tmp6 = clp[tmp6>>5];
804 /* Second pixel */
805 tmp5 += 16;
806 tmp7 = tmp2 + tmp3;
807 *mb++ = (u8)tmp6;
808 tmp5 += (tmp7 << 4);
809 tmp5 += (tmp7 << 2);
810 tmp7 = tmp1 + tmp4;
811 tmp6 = *ptrJ++;
812 tmp5 -= (tmp7 << 2);
813 tmp5 -= tmp7;
814 tmp5 += tmp6;
815 tmp5 = clp[tmp5>>5];
816 /* Third pixel */
817 tmp4 += 16;
818 tmp7 = tmp1 + tmp2;
819 *mb++ = (u8)tmp5;
820 tmp4 += (tmp7 << 4);
821 tmp4 += (tmp7 << 2);
822 tmp7 = tmp6 + tmp3;
823 tmp5 = *ptrJ++;
824 tmp4 -= (tmp7 << 2);
825 tmp4 -= tmp7;
826 tmp4 += tmp5;
827 tmp4 = clp[tmp4>>5];
828 /* Fourth pixel */
829 tmp3 += 16;
830 tmp7 = tmp6 + tmp1;
831 *mb++ = (u8)tmp4;
832 tmp3 += (tmp7 << 4);
833 tmp3 += (tmp7 << 2);
834 tmp7 = tmp5 + tmp2;
835 tmp4 = *ptrJ++;
836 tmp3 -= (tmp7 << 2);
837 tmp3 -= tmp7;
838 tmp3 += tmp4;
839 tmp3 = clp[tmp3>>5];
840 tmp7 = tmp4;
841 tmp4 = tmp6;
842 tmp6 = tmp2;
843 tmp2 = tmp7;
844 *mb++ = (u8)tmp3;
845 tmp3 = tmp5;
846 tmp5 = tmp1;
847 }
848 ptrJ += width - partWidth;
849 mb += 16 - partWidth;
850 }
851
852 }
853
854 /*------------------------------------------------------------------------------
855
856 Function: h264bsdInterpolateHorQuarter
857
858 Functional description:
859 Function to perform horizontal interpolation of pixel position 'a'
860 or 'c' for a block. Overfilling is done only if needed. Reference
861 image (ref) is read at correct position and the predicted part
862 is written to macroblock array (mb)
863
864 ------------------------------------------------------------------------------*/
865
h264bsdInterpolateHorQuarter(u8 * ref,u8 * mb,i32 x0,i32 y0,u32 width,u32 height,u32 partWidth,u32 partHeight,u32 horOffset)866 void h264bsdInterpolateHorQuarter(
867 u8 *ref,
868 u8 *mb,
869 i32 x0,
870 i32 y0,
871 u32 width,
872 u32 height,
873 u32 partWidth,
874 u32 partHeight,
875 u32 horOffset) /* 0 for pixel a, 1 for pixel c */
876 {
877 u32 p1[21*21/4+1];
878 u8 *ptrJ;
879 u32 x, y;
880 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
881 const u8 *clp = h264bsdClip + 512;
882
883 /* Code */
884
885 ASSERT(ref);
886 ASSERT(mb);
887
888 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
889 (y0 < 0) || ((u32)y0+partHeight > height))
890 {
891 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
892 partWidth+5, partHeight, partWidth+5);
893
894 x0 = 0;
895 y0 = 0;
896 ref = (u8*)p1;
897 width = partWidth + 5;
898 }
899
900 ref += (u32)y0 * width + (u32)x0;
901
902 ptrJ = ref + 5;
903
904 for (y = partHeight; y; y--)
905 {
906 tmp6 = *(ptrJ - 5);
907 tmp5 = *(ptrJ - 4);
908 tmp4 = *(ptrJ - 3);
909 tmp3 = *(ptrJ - 2);
910 tmp2 = *(ptrJ - 1);
911
912 /* calculate 4 pels per iteration */
913 for (x = (partWidth >> 2); x; x--)
914 {
915 /* First pixel */
916 tmp6 += 16;
917 tmp7 = tmp3 + tmp4;
918 tmp6 += (tmp7 << 4);
919 tmp6 += (tmp7 << 2);
920 tmp7 = tmp2 + tmp5;
921 tmp1 = *ptrJ++;
922 tmp6 -= (tmp7 << 2);
923 tmp6 -= tmp7;
924 tmp6 += tmp1;
925 tmp6 = clp[tmp6>>5];
926 tmp5 += 16;
927 if (!horOffset)
928 tmp6 += tmp4;
929 else
930 tmp6 += tmp3;
931 *mb++ = (u8)((tmp6 + 1) >> 1);
932 /* Second pixel */
933 tmp7 = tmp2 + tmp3;
934 tmp5 += (tmp7 << 4);
935 tmp5 += (tmp7 << 2);
936 tmp7 = tmp1 + tmp4;
937 tmp6 = *ptrJ++;
938 tmp5 -= (tmp7 << 2);
939 tmp5 -= tmp7;
940 tmp5 += tmp6;
941 tmp5 = clp[tmp5>>5];
942 tmp4 += 16;
943 if (!horOffset)
944 tmp5 += tmp3;
945 else
946 tmp5 += tmp2;
947 *mb++ = (u8)((tmp5 + 1) >> 1);
948 /* Third pixel */
949 tmp7 = tmp1 + tmp2;
950 tmp4 += (tmp7 << 4);
951 tmp4 += (tmp7 << 2);
952 tmp7 = tmp6 + tmp3;
953 tmp5 = *ptrJ++;
954 tmp4 -= (tmp7 << 2);
955 tmp4 -= tmp7;
956 tmp4 += tmp5;
957 tmp4 = clp[tmp4>>5];
958 tmp3 += 16;
959 if (!horOffset)
960 tmp4 += tmp2;
961 else
962 tmp4 += tmp1;
963 *mb++ = (u8)((tmp4 + 1) >> 1);
964 /* Fourth pixel */
965 tmp7 = tmp6 + tmp1;
966 tmp3 += (tmp7 << 4);
967 tmp3 += (tmp7 << 2);
968 tmp7 = tmp5 + tmp2;
969 tmp4 = *ptrJ++;
970 tmp3 -= (tmp7 << 2);
971 tmp3 -= tmp7;
972 tmp3 += tmp4;
973 tmp3 = clp[tmp3>>5];
974 if (!horOffset)
975 tmp3 += tmp1;
976 else
977 tmp3 += tmp6;
978 *mb++ = (u8)((tmp3 + 1) >> 1);
979 tmp3 = tmp5;
980 tmp5 = tmp1;
981 tmp7 = tmp4;
982 tmp4 = tmp6;
983 tmp6 = tmp2;
984 tmp2 = tmp7;
985 }
986 ptrJ += width - partWidth;
987 mb += 16 - partWidth;
988 }
989
990 }
991
992 /*------------------------------------------------------------------------------
993
994 Function: h264bsdInterpolateHorVerQuarter
995
996 Functional description:
997 Function to perform horizontal and vertical interpolation of pixel
998 position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only
999 if needed. Reference image (ref) is read at correct position and
1000 the predicted part is written to macroblock array (mb)
1001
1002 ------------------------------------------------------------------------------*/
1003
h264bsdInterpolateHorVerQuarter(u8 * ref,u8 * mb,i32 x0,i32 y0,u32 width,u32 height,u32 partWidth,u32 partHeight,u32 horVerOffset)1004 void h264bsdInterpolateHorVerQuarter(
1005 u8 *ref,
1006 u8 *mb,
1007 i32 x0,
1008 i32 y0,
1009 u32 width,
1010 u32 height,
1011 u32 partWidth,
1012 u32 partHeight,
1013 u32 horVerOffset) /* 0 for pixel e, 1 for pixel g,
1014 2 for pixel p, 3 for pixel r */
1015 {
1016 u32 p1[21*21/4+1];
1017 u8 *ptrC, *ptrJ, *ptrV;
1018 u32 x, y;
1019 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1020 const u8 *clp = h264bsdClip + 512;
1021
1022 /* Code */
1023
1024 ASSERT(ref);
1025 ASSERT(mb);
1026
1027 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
1028 (y0 < 0) || ((u32)y0+partHeight+5 > height))
1029 {
1030 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
1031 partWidth+5, partHeight+5, partWidth+5);
1032
1033 x0 = 0;
1034 y0 = 0;
1035 ref = (u8*)p1;
1036 width = partWidth+5;
1037 }
1038
1039 /* Ref points to G + (-2, -2) */
1040 ref += (u32)y0 * width + (u32)x0;
1041
1042 /* ptrJ points to either J or Q, depending on vertical offset */
1043 ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5;
1044
1045 /* ptrC points to either C or D, depending on horizontal offset */
1046 ptrC = ref + width + 2 + (horVerOffset & 0x1);
1047
1048 for (y = partHeight; y; y--)
1049 {
1050 tmp6 = *(ptrJ - 5);
1051 tmp5 = *(ptrJ - 4);
1052 tmp4 = *(ptrJ - 3);
1053 tmp3 = *(ptrJ - 2);
1054 tmp2 = *(ptrJ - 1);
1055
1056 /* Horizontal interpolation, calculate 4 pels per iteration */
1057 for (x = (partWidth >> 2); x; x--)
1058 {
1059 /* First pixel */
1060 tmp6 += 16;
1061 tmp7 = tmp3 + tmp4;
1062 tmp6 += (tmp7 << 4);
1063 tmp6 += (tmp7 << 2);
1064 tmp7 = tmp2 + tmp5;
1065 tmp1 = *ptrJ++;
1066 tmp6 -= (tmp7 << 2);
1067 tmp6 -= tmp7;
1068 tmp6 += tmp1;
1069 tmp6 = clp[tmp6>>5];
1070 /* Second pixel */
1071 tmp5 += 16;
1072 tmp7 = tmp2 + tmp3;
1073 *mb++ = (u8)tmp6;
1074 tmp5 += (tmp7 << 4);
1075 tmp5 += (tmp7 << 2);
1076 tmp7 = tmp1 + tmp4;
1077 tmp6 = *ptrJ++;
1078 tmp5 -= (tmp7 << 2);
1079 tmp5 -= tmp7;
1080 tmp5 += tmp6;
1081 tmp5 = clp[tmp5>>5];
1082 /* Third pixel */
1083 tmp4 += 16;
1084 tmp7 = tmp1 + tmp2;
1085 *mb++ = (u8)tmp5;
1086 tmp4 += (tmp7 << 4);
1087 tmp4 += (tmp7 << 2);
1088 tmp7 = tmp6 + tmp3;
1089 tmp5 = *ptrJ++;
1090 tmp4 -= (tmp7 << 2);
1091 tmp4 -= tmp7;
1092 tmp4 += tmp5;
1093 tmp4 = clp[tmp4>>5];
1094 /* Fourth pixel */
1095 tmp3 += 16;
1096 tmp7 = tmp6 + tmp1;
1097 *mb++ = (u8)tmp4;
1098 tmp3 += (tmp7 << 4);
1099 tmp3 += (tmp7 << 2);
1100 tmp7 = tmp5 + tmp2;
1101 tmp4 = *ptrJ++;
1102 tmp3 -= (tmp7 << 2);
1103 tmp3 -= tmp7;
1104 tmp3 += tmp4;
1105 tmp3 = clp[tmp3>>5];
1106 tmp7 = tmp4;
1107 tmp4 = tmp6;
1108 tmp6 = tmp2;
1109 tmp2 = tmp7;
1110 *mb++ = (u8)tmp3;
1111 tmp3 = tmp5;
1112 tmp5 = tmp1;
1113 }
1114 ptrJ += width - partWidth;
1115 mb += 16 - partWidth;
1116 }
1117
1118 mb -= 16*partHeight;
1119 ptrV = ptrC + 5*width;
1120
1121 for (y = (partHeight >> 2); y; y--)
1122 {
1123 /* Vertical interpolation and averaging, 4 pels per iteration */
1124 for (x = partWidth; x; x--)
1125 {
1126 tmp4 = ptrV[-(i32)width*2];
1127 tmp5 = ptrV[-(i32)width];
1128 tmp1 = ptrV[width];
1129 tmp2 = ptrV[width*2];
1130 tmp6 = *ptrV++;
1131
1132 tmp7 = tmp4 + tmp1;
1133 tmp2 -= (tmp7 << 2);
1134 tmp2 -= tmp7;
1135 tmp2 += 16;
1136 tmp7 = tmp5 + tmp6;
1137 tmp3 = ptrC[width*2];
1138 tmp2 += (tmp7 << 4);
1139 tmp2 += (tmp7 << 2);
1140 tmp2 += tmp3;
1141 tmp7 = clp[tmp2>>5];
1142 tmp2 = mb[48];
1143 tmp1 += 16;
1144 tmp7++;
1145 mb[48] = (u8)((tmp2 + tmp7) >> 1);
1146
1147 tmp7 = tmp3 + tmp6;
1148 tmp1 -= (tmp7 << 2);
1149 tmp1 -= tmp7;
1150 tmp7 = tmp4 + tmp5;
1151 tmp2 = ptrC[width];
1152 tmp1 += (tmp7 << 4);
1153 tmp1 += (tmp7 << 2);
1154 tmp1 += tmp2;
1155 tmp7 = clp[tmp1>>5];
1156 tmp1 = mb[32];
1157 tmp6 += 16;
1158 tmp7++;
1159 mb[32] = (u8)((tmp1 + tmp7) >> 1);
1160
1161 tmp1 = *ptrC;
1162 tmp7 = tmp2 + tmp5;
1163 tmp6 -= (tmp7 << 2);
1164 tmp6 -= tmp7;
1165 tmp7 = tmp4 + tmp3;
1166 tmp6 += (tmp7 << 4);
1167 tmp6 += (tmp7 << 2);
1168 tmp6 += tmp1;
1169 tmp7 = clp[tmp6>>5];
1170 tmp6 = mb[16];
1171 tmp5 += 16;
1172 tmp7++;
1173 mb[16] = (u8)((tmp6 + tmp7) >> 1);
1174
1175 tmp6 = ptrC[-(i32)width];
1176 tmp1 += tmp4;
1177 tmp5 -= (tmp1 << 2);
1178 tmp5 -= tmp1;
1179 tmp3 += tmp2;
1180 tmp5 += (tmp3 << 4);
1181 tmp5 += (tmp3 << 2);
1182 tmp5 += tmp6;
1183 tmp7 = clp[tmp5>>5];
1184 tmp5 = *mb;
1185 tmp7++;
1186 *mb++ = (u8)((tmp5 + tmp7) >> 1);
1187 ptrC++;
1188
1189 }
1190 ptrC += 4*width - partWidth;
1191 ptrV += 4*width - partWidth;
1192 mb += 4*16 - partWidth;
1193 }
1194
1195 }
1196 #endif
1197
1198 /*------------------------------------------------------------------------------
1199
1200 Function: h264bsdInterpolateMidHalf
1201
1202 Functional description:
1203 Function to perform horizontal and vertical interpolation of pixel
1204 position 'j' for a block. Overfilling is done only if needed.
1205 Reference image (ref) is read at correct position and the predicted
1206 part is written to macroblock array (mb)
1207
1208 ------------------------------------------------------------------------------*/
1209
h264bsdInterpolateMidHalf(u8 * ref,u8 * mb,i32 x0,i32 y0,u32 width,u32 height,u32 partWidth,u32 partHeight)1210 void h264bsdInterpolateMidHalf(
1211 u8 *ref,
1212 u8 *mb,
1213 i32 x0,
1214 i32 y0,
1215 u32 width,
1216 u32 height,
1217 u32 partWidth,
1218 u32 partHeight)
1219 {
1220 u32 p1[21*21/4+1];
1221 u32 x, y;
1222 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1223 i32 *ptrC, *ptrV, *b1;
1224 u8 *ptrJ;
1225 i32 table[21*16];
1226 const u8 *clp = h264bsdClip + 512;
1227
1228 /* Code */
1229
1230 ASSERT(ref);
1231 ASSERT(mb);
1232
1233 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
1234 (y0 < 0) || ((u32)y0+partHeight+5 > height))
1235 {
1236 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
1237 partWidth+5, partHeight+5, partWidth+5);
1238
1239 x0 = 0;
1240 y0 = 0;
1241 ref = (u8*)p1;
1242 width = partWidth+5;
1243 }
1244
1245 ref += (u32)y0 * width + (u32)x0;
1246
1247 b1 = table;
1248 ptrJ = ref + 5;
1249
1250 /* First step: calculate intermediate values for
1251 * horizontal interpolation */
1252 for (y = partHeight + 5; y; y--)
1253 {
1254 tmp6 = *(ptrJ - 5);
1255 tmp5 = *(ptrJ - 4);
1256 tmp4 = *(ptrJ - 3);
1257 tmp3 = *(ptrJ - 2);
1258 tmp2 = *(ptrJ - 1);
1259
1260 /* 4 pels per iteration */
1261 for (x = (partWidth >> 2); x; x--)
1262 {
1263 /* First pixel */
1264 tmp7 = tmp3 + tmp4;
1265 tmp6 += (tmp7 << 4);
1266 tmp6 += (tmp7 << 2);
1267 tmp7 = tmp2 + tmp5;
1268 tmp1 = *ptrJ++;
1269 tmp6 -= (tmp7 << 2);
1270 tmp6 -= tmp7;
1271 tmp6 += tmp1;
1272 *b1++ = tmp6;
1273 /* Second pixel */
1274 tmp7 = tmp2 + tmp3;
1275 tmp5 += (tmp7 << 4);
1276 tmp5 += (tmp7 << 2);
1277 tmp7 = tmp1 + tmp4;
1278 tmp6 = *ptrJ++;
1279 tmp5 -= (tmp7 << 2);
1280 tmp5 -= tmp7;
1281 tmp5 += tmp6;
1282 *b1++ = tmp5;
1283 /* Third pixel */
1284 tmp7 = tmp1 + tmp2;
1285 tmp4 += (tmp7 << 4);
1286 tmp4 += (tmp7 << 2);
1287 tmp7 = tmp6 + tmp3;
1288 tmp5 = *ptrJ++;
1289 tmp4 -= (tmp7 << 2);
1290 tmp4 -= tmp7;
1291 tmp4 += tmp5;
1292 *b1++ = tmp4;
1293 /* Fourth pixel */
1294 tmp7 = tmp6 + tmp1;
1295 tmp3 += (tmp7 << 4);
1296 tmp3 += (tmp7 << 2);
1297 tmp7 = tmp5 + tmp2;
1298 tmp4 = *ptrJ++;
1299 tmp3 -= (tmp7 << 2);
1300 tmp3 -= tmp7;
1301 tmp3 += tmp4;
1302 *b1++ = tmp3;
1303 tmp7 = tmp4;
1304 tmp4 = tmp6;
1305 tmp6 = tmp2;
1306 tmp2 = tmp7;
1307 tmp3 = tmp5;
1308 tmp5 = tmp1;
1309 }
1310 ptrJ += width - partWidth;
1311 }
1312
1313 /* Second step: calculate vertical interpolation */
1314 ptrC = table + partWidth;
1315 ptrV = ptrC + 5*partWidth;
1316 for (y = (partHeight >> 2); y; y--)
1317 {
1318 /* 4 pels per iteration */
1319 for (x = partWidth; x; x--)
1320 {
1321 tmp4 = ptrV[-(i32)partWidth*2];
1322 tmp5 = ptrV[-(i32)partWidth];
1323 tmp1 = ptrV[partWidth];
1324 tmp2 = ptrV[partWidth*2];
1325 tmp6 = *ptrV++;
1326
1327 tmp7 = tmp4 + tmp1;
1328 tmp2 -= (tmp7 << 2);
1329 tmp2 -= tmp7;
1330 tmp2 += 512;
1331 tmp7 = tmp5 + tmp6;
1332 tmp3 = ptrC[partWidth*2];
1333 tmp2 += (tmp7 << 4);
1334 tmp2 += (tmp7 << 2);
1335 tmp2 += tmp3;
1336 tmp7 = clp[tmp2>>10];
1337 tmp1 += 512;
1338 mb[48] = (u8)tmp7;
1339
1340 tmp7 = tmp3 + tmp6;
1341 tmp1 -= (tmp7 << 2);
1342 tmp1 -= tmp7;
1343 tmp7 = tmp4 + tmp5;
1344 tmp2 = ptrC[partWidth];
1345 tmp1 += (tmp7 << 4);
1346 tmp1 += (tmp7 << 2);
1347 tmp1 += tmp2;
1348 tmp7 = clp[tmp1>>10];
1349 tmp6 += 512;
1350 mb[32] = (u8)tmp7;
1351
1352 tmp1 = *ptrC;
1353 tmp7 = tmp2 + tmp5;
1354 tmp6 -= (tmp7 << 2);
1355 tmp6 -= tmp7;
1356 tmp7 = tmp4 + tmp3;
1357 tmp6 += (tmp7 << 4);
1358 tmp6 += (tmp7 << 2);
1359 tmp6 += tmp1;
1360 tmp7 = clp[tmp6>>10];
1361 tmp5 += 512;
1362 mb[16] = (u8)tmp7;
1363
1364 tmp6 = ptrC[-(i32)partWidth];
1365 tmp1 += tmp4;
1366 tmp5 -= (tmp1 << 2);
1367 tmp5 -= tmp1;
1368 tmp3 += tmp2;
1369 tmp5 += (tmp3 << 4);
1370 tmp5 += (tmp3 << 2);
1371 tmp5 += tmp6;
1372 tmp7 = clp[tmp5>>10];
1373 *mb++ = (u8)tmp7;
1374 ptrC++;
1375 }
1376 mb += 4*16 - partWidth;
1377 ptrC += 3*partWidth;
1378 ptrV += 3*partWidth;
1379 }
1380
1381 }
1382
1383
1384 /*------------------------------------------------------------------------------
1385
1386 Function: h264bsdInterpolateMidVerQuarter
1387
1388 Functional description:
1389 Function to perform horizontal and vertical interpolation of pixel
1390 position 'f' or 'q' for a block. Overfilling is done only if needed.
1391 Reference image (ref) is read at correct position and the predicted
1392 part is written to macroblock array (mb)
1393
1394 ------------------------------------------------------------------------------*/
1395
h264bsdInterpolateMidVerQuarter(u8 * ref,u8 * mb,i32 x0,i32 y0,u32 width,u32 height,u32 partWidth,u32 partHeight,u32 verOffset)1396 void h264bsdInterpolateMidVerQuarter(
1397 u8 *ref,
1398 u8 *mb,
1399 i32 x0,
1400 i32 y0,
1401 u32 width,
1402 u32 height,
1403 u32 partWidth,
1404 u32 partHeight,
1405 u32 verOffset) /* 0 for pixel f, 1 for pixel q */
1406 {
1407 u32 p1[21*21/4+1];
1408 u32 x, y;
1409 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1410 i32 *ptrC, *ptrV, *ptrInt, *b1;
1411 u8 *ptrJ;
1412 i32 table[21*16];
1413 const u8 *clp = h264bsdClip + 512;
1414
1415 /* Code */
1416
1417 ASSERT(ref);
1418 ASSERT(mb);
1419
1420 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
1421 (y0 < 0) || ((u32)y0+partHeight+5 > height))
1422 {
1423 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
1424 partWidth+5, partHeight+5, partWidth+5);
1425
1426 x0 = 0;
1427 y0 = 0;
1428 ref = (u8*)p1;
1429 width = partWidth+5;
1430 }
1431
1432 ref += (u32)y0 * width + (u32)x0;
1433
1434 b1 = table;
1435 ptrJ = ref + 5;
1436
1437 /* First step: calculate intermediate values for
1438 * horizontal interpolation */
1439 for (y = partHeight + 5; y; y--)
1440 {
1441 tmp6 = *(ptrJ - 5);
1442 tmp5 = *(ptrJ - 4);
1443 tmp4 = *(ptrJ - 3);
1444 tmp3 = *(ptrJ - 2);
1445 tmp2 = *(ptrJ - 1);
1446 for (x = (partWidth >> 2); x; x--)
1447 {
1448 /* First pixel */
1449 tmp7 = tmp3 + tmp4;
1450 tmp6 += (tmp7 << 4);
1451 tmp6 += (tmp7 << 2);
1452 tmp7 = tmp2 + tmp5;
1453 tmp1 = *ptrJ++;
1454 tmp6 -= (tmp7 << 2);
1455 tmp6 -= tmp7;
1456 tmp6 += tmp1;
1457 *b1++ = tmp6;
1458 /* Second pixel */
1459 tmp7 = tmp2 + tmp3;
1460 tmp5 += (tmp7 << 4);
1461 tmp5 += (tmp7 << 2);
1462 tmp7 = tmp1 + tmp4;
1463 tmp6 = *ptrJ++;
1464 tmp5 -= (tmp7 << 2);
1465 tmp5 -= tmp7;
1466 tmp5 += tmp6;
1467 *b1++ = tmp5;
1468 /* Third pixel */
1469 tmp7 = tmp1 + tmp2;
1470 tmp4 += (tmp7 << 4);
1471 tmp4 += (tmp7 << 2);
1472 tmp7 = tmp6 + tmp3;
1473 tmp5 = *ptrJ++;
1474 tmp4 -= (tmp7 << 2);
1475 tmp4 -= tmp7;
1476 tmp4 += tmp5;
1477 *b1++ = tmp4;
1478 /* Fourth pixel */
1479 tmp7 = tmp6 + tmp1;
1480 tmp3 += (tmp7 << 4);
1481 tmp3 += (tmp7 << 2);
1482 tmp7 = tmp5 + tmp2;
1483 tmp4 = *ptrJ++;
1484 tmp3 -= (tmp7 << 2);
1485 tmp3 -= tmp7;
1486 tmp3 += tmp4;
1487 *b1++ = tmp3;
1488 tmp7 = tmp4;
1489 tmp4 = tmp6;
1490 tmp6 = tmp2;
1491 tmp2 = tmp7;
1492 tmp3 = tmp5;
1493 tmp5 = tmp1;
1494 }
1495 ptrJ += width - partWidth;
1496 }
1497
1498 /* Second step: calculate vertical interpolation and average */
1499 ptrC = table + partWidth;
1500 ptrV = ptrC + 5*partWidth;
1501 /* Pointer to integer sample position, either M or R */
1502 ptrInt = ptrC + (2+verOffset)*partWidth;
1503 for (y = (partHeight >> 2); y; y--)
1504 {
1505 for (x = partWidth; x; x--)
1506 {
1507 tmp4 = ptrV[-(i32)partWidth*2];
1508 tmp5 = ptrV[-(i32)partWidth];
1509 tmp1 = ptrV[partWidth];
1510 tmp2 = ptrV[partWidth*2];
1511 tmp6 = *ptrV++;
1512
1513 tmp7 = tmp4 + tmp1;
1514 tmp2 -= (tmp7 << 2);
1515 tmp2 -= tmp7;
1516 tmp2 += 512;
1517 tmp7 = tmp5 + tmp6;
1518 tmp3 = ptrC[partWidth*2];
1519 tmp2 += (tmp7 << 4);
1520 tmp2 += (tmp7 << 2);
1521 tmp7 = ptrInt[partWidth*2];
1522 tmp2 += tmp3;
1523 tmp2 = clp[tmp2>>10];
1524 tmp7 += 16;
1525 tmp7 = clp[tmp7>>5];
1526 tmp1 += 512;
1527 tmp2++;
1528 mb[48] = (u8)((tmp7 + tmp2) >> 1);
1529
1530 tmp7 = tmp3 + tmp6;
1531 tmp1 -= (tmp7 << 2);
1532 tmp1 -= tmp7;
1533 tmp7 = tmp4 + tmp5;
1534 tmp2 = ptrC[partWidth];
1535 tmp1 += (tmp7 << 4);
1536 tmp1 += (tmp7 << 2);
1537 tmp7 = ptrInt[partWidth];
1538 tmp1 += tmp2;
1539 tmp1 = clp[tmp1>>10];
1540 tmp7 += 16;
1541 tmp7 = clp[tmp7>>5];
1542 tmp6 += 512;
1543 tmp1++;
1544 mb[32] = (u8)((tmp7 + tmp1) >> 1);
1545
1546 tmp1 = *ptrC;
1547 tmp7 = tmp2 + tmp5;
1548 tmp6 -= (tmp7 << 2);
1549 tmp6 -= tmp7;
1550 tmp7 = tmp4 + tmp3;
1551 tmp6 += (tmp7 << 4);
1552 tmp6 += (tmp7 << 2);
1553 tmp7 = *ptrInt;
1554 tmp6 += tmp1;
1555 tmp6 = clp[tmp6>>10];
1556 tmp7 += 16;
1557 tmp7 = clp[tmp7>>5];
1558 tmp5 += 512;
1559 tmp6++;
1560 mb[16] = (u8)((tmp7 + tmp6) >> 1);
1561
1562 tmp6 = ptrC[-(i32)partWidth];
1563 tmp1 += tmp4;
1564 tmp5 -= (tmp1 << 2);
1565 tmp5 -= tmp1;
1566 tmp3 += tmp2;
1567 tmp5 += (tmp3 << 4);
1568 tmp5 += (tmp3 << 2);
1569 tmp7 = ptrInt[-(i32)partWidth];
1570 tmp5 += tmp6;
1571 tmp5 = clp[tmp5>>10];
1572 tmp7 += 16;
1573 tmp7 = clp[tmp7>>5];
1574 tmp5++;
1575 *mb++ = (u8)((tmp7 + tmp5) >> 1);
1576 ptrC++;
1577 ptrInt++;
1578 }
1579 mb += 4*16 - partWidth;
1580 ptrC += 3*partWidth;
1581 ptrV += 3*partWidth;
1582 ptrInt += 3*partWidth;
1583 }
1584
1585 }
1586
1587
1588 /*------------------------------------------------------------------------------
1589
1590 Function: h264bsdInterpolateMidHorQuarter
1591
1592 Functional description:
1593 Function to perform horizontal and vertical interpolation of pixel
1594 position 'i' or 'k' for a block. Overfilling is done only if needed.
1595 Reference image (ref) is read at correct position and the predicted
1596 part is written to macroblock array (mb)
1597
1598 ------------------------------------------------------------------------------*/
1599
h264bsdInterpolateMidHorQuarter(u8 * ref,u8 * mb,i32 x0,i32 y0,u32 width,u32 height,u32 partWidth,u32 partHeight,u32 horOffset)1600 void h264bsdInterpolateMidHorQuarter(
1601 u8 *ref,
1602 u8 *mb,
1603 i32 x0,
1604 i32 y0,
1605 u32 width,
1606 u32 height,
1607 u32 partWidth,
1608 u32 partHeight,
1609 u32 horOffset) /* 0 for pixel i, 1 for pixel k */
1610 {
1611 u32 p1[21*21/4+1];
1612 u32 x, y;
1613 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1614 i32 *ptrJ, *ptrInt, *h1;
1615 u8 *ptrC, *ptrV;
1616 i32 table[21*16];
1617 i32 tableWidth = (i32)partWidth+5;
1618 const u8 *clp = h264bsdClip + 512;
1619
1620 /* Code */
1621
1622 ASSERT(ref);
1623 ASSERT(mb);
1624
1625 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
1626 (y0 < 0) || ((u32)y0+partHeight+5 > height))
1627 {
1628 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
1629 partWidth+5, partHeight+5, partWidth+5);
1630
1631 x0 = 0;
1632 y0 = 0;
1633 ref = (u8*)p1;
1634 width = partWidth+5;
1635 }
1636
1637 ref += (u32)y0 * width + (u32)x0;
1638
1639 h1 = table + tableWidth;
1640 ptrC = ref + width;
1641 ptrV = ptrC + 5*width;
1642
1643 /* First step: calculate intermediate values for
1644 * vertical interpolation */
1645 for (y = (partHeight >> 2); y; y--)
1646 {
1647 for (x = (u32)tableWidth; x; x--)
1648 {
1649 tmp4 = ptrV[-(i32)width*2];
1650 tmp5 = ptrV[-(i32)width];
1651 tmp1 = ptrV[width];
1652 tmp2 = ptrV[width*2];
1653 tmp6 = *ptrV++;
1654
1655 tmp7 = tmp4 + tmp1;
1656 tmp2 -= (tmp7 << 2);
1657 tmp2 -= tmp7;
1658 tmp7 = tmp5 + tmp6;
1659 tmp3 = ptrC[width*2];
1660 tmp2 += (tmp7 << 4);
1661 tmp2 += (tmp7 << 2);
1662 tmp2 += tmp3;
1663 h1[tableWidth*2] = tmp2;
1664
1665 tmp7 = tmp3 + tmp6;
1666 tmp1 -= (tmp7 << 2);
1667 tmp1 -= tmp7;
1668 tmp7 = tmp4 + tmp5;
1669 tmp2 = ptrC[width];
1670 tmp1 += (tmp7 << 4);
1671 tmp1 += (tmp7 << 2);
1672 tmp1 += tmp2;
1673 h1[tableWidth] = tmp1;
1674
1675 tmp1 = *ptrC;
1676 tmp7 = tmp2 + tmp5;
1677 tmp6 -= (tmp7 << 2);
1678 tmp6 -= tmp7;
1679 tmp7 = tmp4 + tmp3;
1680 tmp6 += (tmp7 << 4);
1681 tmp6 += (tmp7 << 2);
1682 tmp6 += tmp1;
1683 *h1 = tmp6;
1684
1685 tmp6 = ptrC[-(i32)width];
1686 tmp1 += tmp4;
1687 tmp5 -= (tmp1 << 2);
1688 tmp5 -= tmp1;
1689 tmp3 += tmp2;
1690 tmp5 += (tmp3 << 4);
1691 tmp5 += (tmp3 << 2);
1692 tmp5 += tmp6;
1693 h1[-tableWidth] = tmp5;
1694 h1++;
1695 ptrC++;
1696 }
1697 ptrC += 4*width - partWidth - 5;
1698 ptrV += 4*width - partWidth - 5;
1699 h1 += 3*tableWidth;
1700 }
1701
1702 /* Second step: calculate horizontal interpolation and average */
1703 ptrJ = table + 5;
1704 /* Pointer to integer sample position, either G or H */
1705 ptrInt = table + 2 + horOffset;
1706 for (y = partHeight; y; y--)
1707 {
1708 tmp6 = *(ptrJ - 5);
1709 tmp5 = *(ptrJ - 4);
1710 tmp4 = *(ptrJ - 3);
1711 tmp3 = *(ptrJ - 2);
1712 tmp2 = *(ptrJ - 1);
1713 for (x = (partWidth>>2); x; x--)
1714 {
1715 /* First pixel */
1716 tmp6 += 512;
1717 tmp7 = tmp3 + tmp4;
1718 tmp6 += (tmp7 << 4);
1719 tmp6 += (tmp7 << 2);
1720 tmp7 = tmp2 + tmp5;
1721 tmp1 = *ptrJ++;
1722 tmp6 -= (tmp7 << 2);
1723 tmp6 -= tmp7;
1724 tmp7 = *ptrInt++;
1725 tmp6 += tmp1;
1726 tmp6 = clp[tmp6 >> 10];
1727 tmp7 += 16;
1728 tmp7 = clp[tmp7 >> 5];
1729 tmp5 += 512;
1730 tmp6++;
1731 *mb++ = (u8)((tmp6 + tmp7) >> 1);
1732 /* Second pixel */
1733 tmp7 = tmp2 + tmp3;
1734 tmp5 += (tmp7 << 4);
1735 tmp5 += (tmp7 << 2);
1736 tmp7 = tmp1 + tmp4;
1737 tmp6 = *ptrJ++;
1738 tmp5 -= (tmp7 << 2);
1739 tmp5 -= tmp7;
1740 tmp7 = *ptrInt++;
1741 tmp5 += tmp6;
1742 tmp5 = clp[tmp5 >> 10];
1743 tmp7 += 16;
1744 tmp7 = clp[tmp7 >> 5];
1745 tmp4 += 512;
1746 tmp5++;
1747 *mb++ = (u8)((tmp5 + tmp7) >> 1);
1748 /* Third pixel */
1749 tmp7 = tmp1 + tmp2;
1750 tmp4 += (tmp7 << 4);
1751 tmp4 += (tmp7 << 2);
1752 tmp7 = tmp6 + tmp3;
1753 tmp5 = *ptrJ++;
1754 tmp4 -= (tmp7 << 2);
1755 tmp4 -= tmp7;
1756 tmp7 = *ptrInt++;
1757 tmp4 += tmp5;
1758 tmp4 = clp[tmp4 >> 10];
1759 tmp7 += 16;
1760 tmp7 = clp[tmp7 >> 5];
1761 tmp3 += 512;
1762 tmp4++;
1763 *mb++ = (u8)((tmp4 + tmp7) >> 1);
1764 /* Fourth pixel */
1765 tmp7 = tmp6 + tmp1;
1766 tmp3 += (tmp7 << 4);
1767 tmp3 += (tmp7 << 2);
1768 tmp7 = tmp5 + tmp2;
1769 tmp4 = *ptrJ++;
1770 tmp3 -= (tmp7 << 2);
1771 tmp3 -= tmp7;
1772 tmp7 = *ptrInt++;
1773 tmp3 += tmp4;
1774 tmp3 = clp[tmp3 >> 10];
1775 tmp7 += 16;
1776 tmp7 = clp[tmp7 >> 5];
1777 tmp3++;
1778 *mb++ = (u8)((tmp3 + tmp7) >> 1);
1779 tmp3 = tmp5;
1780 tmp5 = tmp1;
1781 tmp7 = tmp4;
1782 tmp4 = tmp6;
1783 tmp6 = tmp2;
1784 tmp2 = tmp7;
1785 }
1786 ptrJ += 5;
1787 ptrInt += 5;
1788 mb += 16 - partWidth;
1789 }
1790
1791 }
1792
1793
1794 /*------------------------------------------------------------------------------
1795
1796 Function: h264bsdPredictSamples
1797
1798 Functional description:
1799 This function reconstructs a prediction for a macroblock partition.
1800 The prediction is either copied or interpolated using the reference
1801 frame and the motion vector. Both luminance and chrominance parts are
1802 predicted. The prediction is stored in given macroblock array (data).
1803 Inputs:
1804 data pointer to macroblock array (384 bytes) for output
1805 mv pointer to motion vector used for prediction
1806 refPic pointer to reference picture structure
1807 xA x-coordinate for current macroblock
1808 yA y-coordinate for current macroblock
1809 partX x-offset for partition in macroblock
1810 partY y-offset for partition in macroblock
1811 partWidth width of partition
1812 partHeight height of partition
1813 Outputs:
1814 data macroblock array (16x16+8x8+8x8) where predicted
1815 partition is stored at correct position
1816
1817 ------------------------------------------------------------------------------*/
1818
h264bsdPredictSamples(u8 * data,mv_t * mv,image_t * refPic,u32 xA,u32 yA,u32 partX,u32 partY,u32 partWidth,u32 partHeight)1819 void h264bsdPredictSamples(
1820 u8 *data,
1821 mv_t *mv,
1822 image_t *refPic,
1823 u32 xA,
1824 u32 yA,
1825 u32 partX,
1826 u32 partY,
1827 u32 partWidth,
1828 u32 partHeight)
1829
1830 {
1831
1832 /* Variables */
1833
1834 u32 xFrac, yFrac, width, height;
1835 i32 xInt, yInt;
1836 u8 *lumaPartData;
1837
1838 /* Code */
1839
1840 ASSERT(data);
1841 ASSERT(mv);
1842 ASSERT(partWidth);
1843 ASSERT(partHeight);
1844 ASSERT(refPic);
1845 ASSERT(refPic->data);
1846 ASSERT(refPic->width);
1847 ASSERT(refPic->height);
1848
1849 /* luma */
1850 lumaPartData = data + 16*partY + partX;
1851
1852 xFrac = mv->hor & 0x3;
1853 yFrac = mv->ver & 0x3;
1854
1855 width = 16 * refPic->width;
1856 height = 16 * refPic->height;
1857
1858 xInt = (i32)xA + (i32)partX + (mv->hor >> 2);
1859 yInt = (i32)yA + (i32)partY + (mv->ver >> 2);
1860
1861 ASSERT(lumaFracPos[xFrac][yFrac] < 16);
1862
1863 switch (lumaFracPos[xFrac][yFrac])
1864 {
1865 case 0: /* G */
1866 h264bsdFillBlock(refPic->data, lumaPartData,
1867 xInt,yInt,width,height,partWidth,partHeight,16);
1868 break;
1869 case 1: /* d */
1870 h264bsdInterpolateVerQuarter(refPic->data, lumaPartData,
1871 xInt, yInt-2, width, height, partWidth, partHeight, 0);
1872 break;
1873 case 2: /* h */
1874 h264bsdInterpolateVerHalf(refPic->data, lumaPartData,
1875 xInt, yInt-2, width, height, partWidth, partHeight);
1876 break;
1877 case 3: /* n */
1878 h264bsdInterpolateVerQuarter(refPic->data, lumaPartData,
1879 xInt, yInt-2, width, height, partWidth, partHeight, 1);
1880 break;
1881 case 4: /* a */
1882 h264bsdInterpolateHorQuarter(refPic->data, lumaPartData,
1883 xInt-2, yInt, width, height, partWidth, partHeight, 0);
1884 break;
1885 case 5: /* e */
1886 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
1887 xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
1888 break;
1889 case 6: /* i */
1890 h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData,
1891 xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
1892 break;
1893 case 7: /* p */
1894 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
1895 xInt-2, yInt-2, width, height, partWidth, partHeight, 2);
1896 break;
1897 case 8: /* b */
1898 h264bsdInterpolateHorHalf(refPic->data, lumaPartData,
1899 xInt-2, yInt, width, height, partWidth, partHeight);
1900 break;
1901 case 9: /* f */
1902 h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData,
1903 xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
1904 break;
1905 case 10: /* j */
1906 h264bsdInterpolateMidHalf(refPic->data, lumaPartData,
1907 xInt-2, yInt-2, width, height, partWidth, partHeight);
1908 break;
1909 case 11: /* q */
1910 h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData,
1911 xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
1912 break;
1913 case 12: /* c */
1914 h264bsdInterpolateHorQuarter(refPic->data, lumaPartData,
1915 xInt-2, yInt, width, height, partWidth, partHeight, 1);
1916 break;
1917 case 13: /* g */
1918 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
1919 xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
1920 break;
1921 case 14: /* k */
1922 h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData,
1923 xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
1924 break;
1925 default: /* case 15, r */
1926 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
1927 xInt-2, yInt-2, width, height, partWidth, partHeight, 3);
1928 break;
1929 }
1930
1931 /* chroma */
1932 PredictChroma(
1933 data + 16*16 + (partY>>1)*8 + (partX>>1),
1934 xA + partX,
1935 yA + partY,
1936 partWidth,
1937 partHeight,
1938 mv,
1939 refPic);
1940
1941 }
1942
1943 #else /* H264DEC_OMXDL */
1944 /*------------------------------------------------------------------------------
1945
1946 Function: h264bsdPredictSamples
1947
1948 Functional description:
1949 This function reconstructs a prediction for a macroblock partition.
1950 The prediction is either copied or interpolated using the reference
1951 frame and the motion vector. Both luminance and chrominance parts are
1952 predicted. The prediction is stored in given macroblock array (data).
1953 Inputs:
1954 data pointer to macroblock array (384 bytes) for output
1955 mv pointer to motion vector used for prediction
1956 refPic pointer to reference picture structure
1957 xA x-coordinate for current macroblock
1958 yA y-coordinate for current macroblock
1959 partX x-offset for partition in macroblock
1960 partY y-offset for partition in macroblock
1961 partWidth width of partition
1962 partHeight height of partition
1963 Outputs:
1964 data macroblock array (16x16+8x8+8x8) where predicted
1965 partition is stored at correct position
1966
1967 ------------------------------------------------------------------------------*/
1968
1969 /*lint -e{550} Symbol 'res' not accessed */
h264bsdPredictSamples(u8 * data,mv_t * mv,image_t * refPic,u32 colAndRow,u32 part,u8 * pFill)1970 void h264bsdPredictSamples(
1971 u8 *data,
1972 mv_t *mv,
1973 image_t *refPic,
1974 u32 colAndRow,
1975 u32 part,
1976 u8 *pFill)
1977
1978 {
1979
1980 /* Variables */
1981
1982 u32 xFrac, yFrac;
1983 u32 width, height;
1984 i32 xInt, yInt, x0, y0;
1985 u8 *partData, *ref;
1986 OMXSize roi;
1987 u32 fillWidth;
1988 u32 fillHeight;
1989 OMXResult res;
1990 u32 xA, yA;
1991 u32 partX, partY;
1992 u32 partWidth, partHeight;
1993
1994 /* Code */
1995
1996 ASSERT(data);
1997 ASSERT(mv);
1998 ASSERT(refPic);
1999 ASSERT(refPic->data);
2000 ASSERT(refPic->width);
2001 ASSERT(refPic->height);
2002
2003 xA = (colAndRow & 0xFFFF0000) >> 16;
2004 yA = (colAndRow & 0x0000FFFF);
2005
2006 partX = (part & 0xFF000000) >> 24;
2007 partY = (part & 0x00FF0000) >> 16;
2008 partWidth = (part & 0x0000FF00) >> 8;
2009 partHeight = (part & 0x000000FF);
2010
2011 ASSERT(partWidth);
2012 ASSERT(partHeight);
2013
2014 /* luma */
2015 partData = data + 16*partY + partX;
2016
2017 xFrac = mv->hor & 0x3;
2018 yFrac = mv->ver & 0x3;
2019
2020 width = 16 * refPic->width;
2021 height = 16 * refPic->height;
2022
2023 xInt = (i32)xA + (i32)partX + (mv->hor >> 2);
2024 yInt = (i32)yA + (i32)partY + (mv->ver >> 2);
2025
2026 x0 = (xFrac) ? xInt-2 : xInt;
2027 y0 = (yFrac) ? yInt-2 : yInt;
2028
2029 if (xFrac)
2030 {
2031 if (partWidth == 16)
2032 fillWidth = 32;
2033 else
2034 fillWidth = 16;
2035 }
2036 else
2037 fillWidth = (partWidth*2);
2038 if (yFrac)
2039 fillHeight = partHeight+5;
2040 else
2041 fillHeight = partHeight;
2042
2043
2044 if ((x0 < 0) || ((u32)x0+fillWidth > width) ||
2045 (y0 < 0) || ((u32)y0+fillHeight > height))
2046 {
2047 h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height,
2048 fillWidth, fillHeight, fillWidth);
2049
2050 x0 = 0;
2051 y0 = 0;
2052 ref = pFill;
2053 width = fillWidth;
2054 if (yFrac)
2055 ref += 2*width;
2056 if (xFrac)
2057 ref += 2;
2058 }
2059 else
2060 {
2061 /*lint --e(737) Loss of sign */
2062 ref = refPic->data + yInt*width + xInt;
2063 }
2064 /* Luma interpolation */
2065 roi.width = (i32)partWidth;
2066 roi.height = (i32)partHeight;
2067
2068 res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16,
2069 (i32)xFrac, (i32)yFrac, roi);
2070 ASSERT(res == 0);
2071
2072 /* Chroma */
2073 width = 8 * refPic->width;
2074 height = 8 * refPic->height;
2075
2076 x0 = ((xA + partX) >> 1) + (mv->hor >> 3);
2077 y0 = ((yA + partY) >> 1) + (mv->ver >> 3);
2078 xFrac = mv->hor & 0x7;
2079 yFrac = mv->ver & 0x7;
2080
2081 ref = refPic->data + 256 * refPic->width * refPic->height;
2082
2083 roi.width = (i32)(partWidth >> 1);
2084 fillWidth = ((partWidth >> 1) + 8) & ~0x7;
2085 roi.height = (i32)(partHeight >> 1);
2086 fillHeight = (partHeight >> 1) + 1;
2087
2088 if ((x0 < 0) || ((u32)x0+fillWidth > width) ||
2089 (y0 < 0) || ((u32)y0+fillHeight > height))
2090 {
2091 h264bsdFillBlock(ref, pFill, x0, y0, width, height,
2092 fillWidth, fillHeight, fillWidth);
2093 ref += width * height;
2094 h264bsdFillBlock(ref, pFill + fillWidth*fillHeight,
2095 x0, y0, width, height, fillWidth,
2096 fillHeight, fillWidth);
2097
2098 ref = pFill;
2099 x0 = 0;
2100 y0 = 0;
2101 width = fillWidth;
2102 height = fillHeight;
2103 }
2104
2105 partData = data + 16*16 + (partY>>1)*8 + (partX>>1);
2106
2107 /* Chroma interpolation */
2108 /*lint --e(737) Loss of sign */
2109 ref += y0 * width + x0;
2110 res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8,
2111 (u32)roi.width, (u32)roi.height, xFrac, yFrac);
2112 ASSERT(res == 0);
2113 partData += 8 * 8;
2114 ref += height * width;
2115 res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8,
2116 (u32)roi.width, (u32)roi.height, xFrac, yFrac);
2117 ASSERT(res == 0);
2118
2119 }
2120
2121 #endif /* H264DEC_OMXDL */
2122
2123
2124 /*------------------------------------------------------------------------------
2125
2126 Function: FillRow1
2127
2128 Functional description:
2129 This function gets a row of reference pels in a 'normal' case when no
2130 overfilling is necessary.
2131
2132 ------------------------------------------------------------------------------*/
2133
FillRow1(u8 * ref,u8 * fill,i32 left,i32 center,i32 right)2134 static void FillRow1(
2135 u8 *ref,
2136 u8 *fill,
2137 i32 left,
2138 i32 center,
2139 i32 right)
2140 {
2141 UNUSED(left);
2142 UNUSED(right);
2143 ASSERT(ref);
2144 ASSERT(fill);
2145
2146 H264SwDecMemcpy(fill, ref, (u32)center);
2147
2148 /*lint -e(715) */
2149 }
2150
2151
2152 /*------------------------------------------------------------------------------
2153
2154 Function: h264bsdFillRow7
2155
2156 Functional description:
2157 This function gets a row of reference pels when horizontal coordinate
2158 is partly negative or partly greater than reference picture width
2159 (overfilling some pels on left and/or right edge).
2160 Inputs:
2161 ref pointer to reference samples
2162 left amount of pixels to overfill on left-edge
2163 center amount of pixels to copy
2164 right amount of pixels to overfill on right-edge
2165 Outputs:
2166 fill pointer where samples are stored
2167
2168 ------------------------------------------------------------------------------*/
2169 #ifndef H264DEC_NEON
h264bsdFillRow7(u8 * ref,u8 * fill,i32 left,i32 center,i32 right)2170 void h264bsdFillRow7(
2171 u8 *ref,
2172 u8 *fill,
2173 i32 left,
2174 i32 center,
2175 i32 right)
2176 {
2177 u8 tmp;
2178
2179 ASSERT(ref);
2180 ASSERT(fill);
2181
2182 if (left)
2183 tmp = *ref;
2184
2185 for ( ; left; left--)
2186 /*lint -esym(644,tmp) tmp is initialized if used */
2187 *fill++ = tmp;
2188
2189 for ( ; center; center--)
2190 *fill++ = *ref++;
2191
2192 if (right)
2193 tmp = ref[-1];
2194
2195 for ( ; right; right--)
2196 /*lint -esym(644,tmp) tmp is initialized if used */
2197 *fill++ = tmp;
2198 }
2199 #endif
2200 /*------------------------------------------------------------------------------
2201
2202 Function: h264bsdFillBlock
2203
2204 Functional description:
2205 This function gets a block of reference pels. It determines whether
2206 overfilling is needed or not and repeatedly calls an appropriate
2207 function (by using a function pointer) that fills one row the block.
2208 Inputs:
2209 ref pointer to reference frame
2210 x0 x-coordinate for block
2211 y0 y-coordinate for block
2212 width width of reference frame
2213 height height of reference frame
2214 blockWidth width of block
2215 blockHeight height of block
2216 fillScanLength length of a line in output array (pixels)
2217 Outputs:
2218 fill pointer to array where output block is written
2219
2220 ------------------------------------------------------------------------------*/
2221
h264bsdFillBlock(u8 * ref,u8 * fill,i32 x0,i32 y0,u32 width,u32 height,u32 blockWidth,u32 blockHeight,u32 fillScanLength)2222 void h264bsdFillBlock(
2223 u8 *ref,
2224 u8 *fill,
2225 i32 x0,
2226 i32 y0,
2227 u32 width,
2228 u32 height,
2229 u32 blockWidth,
2230 u32 blockHeight,
2231 u32 fillScanLength)
2232
2233 {
2234
2235 /* Variables */
2236
2237 i32 xstop, ystop;
2238 void (*fp)(u8*, u8*, i32, i32, i32);
2239 i32 left, x, right;
2240 i32 top, y, bottom;
2241
2242 /* Code */
2243
2244 ASSERT(ref);
2245 ASSERT(fill);
2246 ASSERT(width);
2247 ASSERT(height);
2248 ASSERT(fill);
2249 ASSERT(blockWidth);
2250 ASSERT(blockHeight);
2251
2252 xstop = x0 + (i32)blockWidth;
2253 ystop = y0 + (i32)blockHeight;
2254
2255 /* Choose correct function whether overfilling on left-edge or right-edge
2256 * is needed or not */
2257 if (x0 >= 0 && xstop <= (i32)width)
2258 fp = FillRow1;
2259 else
2260 fp = h264bsdFillRow7;
2261
2262 if (ystop < 0)
2263 y0 = -(i32)blockHeight;
2264
2265 if (xstop < 0)
2266 x0 = -(i32)blockWidth;
2267
2268 if (y0 > (i32)height)
2269 y0 = (i32)height;
2270
2271 if (x0 > (i32)width)
2272 x0 = (i32)width;
2273
2274 xstop = x0 + (i32)blockWidth;
2275 ystop = y0 + (i32)blockHeight;
2276
2277 if (x0 > 0)
2278 ref += x0;
2279
2280 if (y0 > 0)
2281 ref += y0 * (i32)width;
2282
2283 left = x0 < 0 ? -x0 : 0;
2284 right = xstop > (i32)width ? xstop - (i32)width : 0;
2285 x = (i32)blockWidth - left - right;
2286
2287 top = y0 < 0 ? -y0 : 0;
2288 bottom = ystop > (i32)height ? ystop - (i32)height : 0;
2289 y = (i32)blockHeight - top - bottom;
2290
2291 /* Top-overfilling */
2292 for ( ; top; top-- )
2293 {
2294 (*fp)(ref, fill, left, x, right);
2295 fill += fillScanLength;
2296 }
2297
2298 /* Lines inside reference image */
2299 for ( ; y; y-- )
2300 {
2301 (*fp)(ref, fill, left, x, right);
2302 ref += width;
2303 fill += fillScanLength;
2304 }
2305
2306 ref -= width;
2307
2308 /* Bottom-overfilling */
2309 for ( ; bottom; bottom-- )
2310 {
2311 (*fp)(ref, fill, left, x, right);
2312 fill += fillScanLength;
2313 }
2314 }
2315
2316 /*lint +e701 +e702 */
2317
2318
2319