1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 #include "mp4def.h"
19 #include "mp4lib_int.h"
20 #include "mp4enc_lib.h"
21 #include "dct.h"
22 #include "m4venc_oscl.h"
23
24 /* ======================================================================== */
25 /* Function : CodeMB_H263( ) */
26 /* Date : 8/15/2001 */
27 /* Purpose : Perform residue calc (only zero MV), DCT, H263 Quant/Dequant,*/
28 /* IDCT and motion compensation.Modified from FastCodeMB() */
29 /* Input : */
30 /* video Video encoder data structure */
31 /* function Approximate DCT function, scaling and threshold */
32 /* ncoefblck Array for last nonzero coeff for speedup in VlcEncode */
33 /* QP Combined offset from the origin to the current */
34 /* macroblock and QP for current MB. */
35 /* Output : */
36 /* video->outputMB Quantized DCT coefficients. */
37 /* currVop->yChan,uChan,vChan Reconstructed pixels */
38 /* */
39 /* Return : PV_STATUS */
40 /* Modified : */
41 /* 2/26/01
42 -modified threshold based on correlation coeff 0.75 only for mode H.263
43 -ncoefblck[] as input, to keep position of last non-zero coeff*/
44 /* 8/10/01
45 -modified threshold based on correlation coeff 0.5
46 -used column threshold to speedup column DCT.
47 -used bitmap zigzag to speedup RunLevel(). */
48 /* ======================================================================== */
49
CodeMB_H263(VideoEncData * video,approxDCT * function,Int QP,Int ncoefblck[])50 PV_STATUS CodeMB_H263(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[])
51 {
52 Int sad, k, CBP, mbnum = video->mbnum;
53 Short *output, *dataBlock;
54 UChar Mode = video->headerInfo.Mode[mbnum];
55 UChar *bitmapcol, *bitmaprow = video->bitmaprow;
56 UInt *bitmapzz ;
57 UChar shortHeader = video->vol[video->currLayer]->shortVideoHeader;
58 Int dc_scaler = 8;
59 Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q);
60 struct QPstruct QuantParam;
61 Int dctMode, DctTh1;
62 Int ColTh;
63 Int(*BlockQuantDequantH263)(Short *, Short *, struct QPstruct *,
64 UChar[], UChar *, UInt *, Int, Int, Int, UChar);
65 Int(*BlockQuantDequantH263DC)(Short *, Short *, struct QPstruct *,
66 UChar *, UInt *, Int, UChar);
67 void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int);
68 void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int);
69 void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int);
70 void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int);
71
72 /* motion comp. related var. */
73 Vop *currVop = video->currVop;
74 VideoEncFrameIO *inputFrame = video->input;
75 Int ind_x = video->outputMB->mb_x;
76 Int ind_y = video->outputMB->mb_y;
77 Int lx = currVop->pitch;
78 Int width = currVop->width;
79 UChar *rec, *input, *pred;
80 Int offset = QP >> 5; /* QP is combined offset and QP */
81 Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */
82 /*****************************/
83
84 OSCL_UNUSED_ARG(function);
85
86 output = video->outputMB->block[0];
87 CBP = 0;
88 QP = QP & 0x1F;
89 // M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/
90
91 QuantParam.QPx2 = QP << 1;
92 QuantParam.QP = QP;
93 QuantParam.QPdiv2 = QP >> 1;
94 QuantParam.QPx2plus = QuantParam.QPx2 + QuantParam.QPdiv2;
95 QuantParam.Addition = QP - 1 + (QP & 0x1);
96
97 if (intra)
98 {
99 BlockDCT1x1 = &Block1x1DCTIntra;
100 BlockDCT2x2 = &Block2x2DCT_AANIntra;
101 BlockDCT4x4 = &Block4x4DCT_AANIntra;
102 BlockDCT8x8 = &BlockDCT_AANIntra;
103 BlockQuantDequantH263 = &BlockQuantDequantH263Intra;
104 BlockQuantDequantH263DC = &BlockQuantDequantH263DCIntra;
105 if (shortHeader)
106 {
107 dc_scaler = 8;
108 }
109 else
110 {
111 dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */
112 }
113 DctTh1 = (Int)(dc_scaler * 3);//*1.829
114 ColTh = ColThIntra[QP];
115 }
116 else
117 {
118 BlockDCT1x1 = &Block1x1DCTwSub;
119 BlockDCT2x2 = &Block2x2DCT_AANwSub;
120 BlockDCT4x4 = &Block4x4DCT_AANwSub;
121 BlockDCT8x8 = &BlockDCT_AANwSub;
122
123 BlockQuantDequantH263 = &BlockQuantDequantH263Inter;
124 BlockQuantDequantH263DC = &BlockQuantDequantH263DCInter;
125 ColTh = ColThInter[QP];
126 DctTh1 = (Int)(16 * QP); //9*QP;
127 }
128
129 rec = currVop->yChan + offset;
130 input = inputFrame->yChan + offset;
131 if (lx != width) input -= (ind_y << 9); /* non-padded offset */
132
133 dataBlock = video->dataBlock;
134 pred = video->predictedMB;
135
136 for (k = 0; k < 6; k++)
137 {
138 CBP <<= 1;
139 bitmapcol = video->bitmapcol[k];
140 bitmapzz = video->bitmapzz[k]; /* 7/30/01 */
141 if (k < 4)
142 {
143 sad = video->mot[mbnum][k+1].sad;
144 if (k&1)
145 {
146 rec += 8;
147 input += 8;
148 }
149 else if (k == 2)
150 {
151 dctMode = ((width << 3) - 8);
152 input += dctMode;
153 dctMode = ((lx << 3) - 8);
154 rec += dctMode;
155 }
156 }
157 else
158 {
159 if (k == 4)
160 {
161 rec = currVop->uChan + offsetc;
162 input = inputFrame->uChan + offsetc;
163 if (lx != width) input -= (ind_y << 7);
164 lx >>= 1;
165 width >>= 1;
166 if (intra)
167 {
168 sad = getBlockSum(input, width);
169 if (shortHeader)
170 dc_scaler = 8;
171 else
172 {
173 dc_scaler = cal_dc_scalerENC(QP, 2); /* chrominance blocks */
174 }
175 DctTh1 = (Int)(dc_scaler * 3);//*1.829
176 }
177 else
178 sad = Sad8x8(input, pred, width);
179 }
180 else
181 {
182 rec = currVop->vChan + offsetc;
183 input = inputFrame->vChan + offsetc;
184 if (lx != width) input -= (ind_y << 7);
185 if (intra)
186 {
187 sad = getBlockSum(input, width);
188 }
189 else
190 sad = Sad8x8(input, pred, width);
191 }
192 }
193
194 if (sad < DctTh1 && !(shortHeader && intra)) /* all-zero */
195 { /* For shortHeader intra block, DC value cannot be zero */
196 dctMode = 0;
197 CBP |= 0;
198 ncoefblck[k] = 0;
199 }
200 else if (sad < 18*QP/*(QP<<4)*/) /* DC-only */
201 {
202 dctMode = 1;
203 BlockDCT1x1(dataBlock, input, pred, width);
204
205 CBP |= (*BlockQuantDequantH263DC)(dataBlock, output, &QuantParam,
206 bitmaprow + k, bitmapzz, dc_scaler, shortHeader);
207 ncoefblck[k] = 1;
208 }
209 else
210 {
211
212 dataBlock[64] = ColTh;
213
214 if (sad < 22*QP/*(QP<<4)+(QP<<1)*/) /* 2x2 DCT */
215 {
216 dctMode = 2;
217 BlockDCT2x2(dataBlock, input, pred, width);
218 ncoefblck[k] = 6;
219 }
220 else if (sad < (QP << 5)) /* 4x4 DCT */
221 {
222 dctMode = 4;
223 BlockDCT4x4(dataBlock, input, pred, width);
224 ncoefblck[k] = 26;
225 }
226 else /* Full-DCT */
227 {
228 dctMode = 8;
229 BlockDCT8x8(dataBlock, input, pred, width);
230 ncoefblck[k] = 64;
231 }
232
233 CBP |= (*BlockQuantDequantH263)(dataBlock, output, &QuantParam,
234 bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler, shortHeader);
235 }
236 BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | intra);
237 output += 64;
238 if (!(k&1))
239 {
240 pred += 8;
241 }
242 else
243 {
244 pred += 120;
245 }
246 }
247
248 video->headerInfo.CBP[mbnum] = CBP; /* 5/18/2001 */
249 return PV_SUCCESS;
250 }
251
252 #ifndef NO_MPEG_QUANT
253 /* ======================================================================== */
254 /* Function : CodeMB_MPEG( ) */
255 /* Date : 8/15/2001 */
256 /* Purpose : Perform residue calc (only zero MV), DCT, MPEG Quant/Dequant,*/
257 /* IDCT and motion compensation.Modified from FastCodeMB() */
258 /* Input : */
259 /* video Video encoder data structure */
260 /* function Approximate DCT function, scaling and threshold */
261 /* ncoefblck Array for last nonzero coeff for speedup in VlcEncode */
262 /* QP Combined offset from the origin to the current */
263 /* macroblock and QP for current MB. */
264 /* Output : */
265 /* video->outputMB Quantized DCT coefficients. */
266 /* currVop->yChan,uChan,vChan Reconstructed pixels */
267 /* */
268 /* Return : PV_STATUS */
269 /* Modified : */
270 /* 2/26/01
271 -modified threshold based on correlation coeff 0.75 only for mode H.263
272 -ncoefblck[] as input, keep position of last non-zero coeff*/
273 /* 8/10/01
274 -modified threshold based on correlation coeff 0.5
275 -used column threshold to speedup column DCT.
276 -used bitmap zigzag to speedup RunLevel(). */
277 /* ======================================================================== */
278
CodeMB_MPEG(VideoEncData * video,approxDCT * function,Int QP,Int ncoefblck[])279 PV_STATUS CodeMB_MPEG(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[])
280 {
281 Int sad, k, CBP, mbnum = video->mbnum;
282 Short *output, *dataBlock;
283 UChar Mode = video->headerInfo.Mode[mbnum];
284 UChar *bitmapcol, *bitmaprow = video->bitmaprow;
285 UInt *bitmapzz ;
286 Int dc_scaler = 8;
287 Vol *currVol = video->vol[video->currLayer];
288 Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q);
289 Int *qmat;
290 Int dctMode, DctTh1, DctTh2, DctTh3, DctTh4;
291 Int ColTh;
292
293 Int(*BlockQuantDequantMPEG)(Short *, Short *, Int, Int *,
294 UChar [], UChar *, UInt *, Int, Int, Int);
295 Int(*BlockQuantDequantMPEGDC)(Short *, Short *, Int, Int *,
296 UChar [], UChar *, UInt *, Int);
297
298 void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int);
299 void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int);
300 void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int);
301 void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int);
302
303 /* motion comp. related var. */
304 Vop *currVop = video->currVop;
305 VideoEncFrameIO *inputFrame = video->input;
306 Int ind_x = video->outputMB->mb_x;
307 Int ind_y = video->outputMB->mb_y;
308 Int lx = currVop->pitch;
309 Int width = currVop->width;
310 UChar *rec, *input, *pred;
311 Int offset = QP >> 5;
312 Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */
313 /*****************************/
314
315 OSCL_UNUSED_ARG(function);
316
317 output = video->outputMB->block[0];
318 CBP = 0;
319 QP = QP & 0x1F;
320 // M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/
321
322 if (intra)
323 {
324 BlockDCT1x1 = &Block1x1DCTIntra;
325 BlockDCT2x2 = &Block2x2DCT_AANIntra;
326 BlockDCT4x4 = &Block4x4DCT_AANIntra;
327 BlockDCT8x8 = &BlockDCT_AANIntra;
328
329 BlockQuantDequantMPEG = &BlockQuantDequantMPEGIntra;
330 BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCIntra;
331 dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */
332 qmat = currVol->iqmat;
333 DctTh1 = (Int)(3 * dc_scaler);//2*dc_scaler);
334 DctTh2 = (Int)((1.25 * QP - 1) * qmat[1] * 0.45);//0.567);//0.567);
335 DctTh3 = (Int)((1.25 * QP - 1) * qmat[2] * 0.55);//1.162); /* 8/2/2001 */
336 DctTh4 = (Int)((1.25 * QP - 1) * qmat[32] * 0.8);//1.7583);//0.7942);
337 ColTh = ColThIntra[QP];
338 }
339 else
340 {
341 BlockDCT1x1 = &Block1x1DCTwSub;
342 BlockDCT2x2 = &Block2x2DCT_AANwSub;
343 BlockDCT4x4 = &Block4x4DCT_AANwSub;
344 BlockDCT8x8 = &BlockDCT_AANwSub;
345
346 BlockQuantDequantMPEG = &BlockQuantDequantMPEGInter;
347 BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCInter;
348 qmat = currVol->niqmat;
349 DctTh1 = (Int)(((QP << 1) - 0.5) * qmat[0] * 0.4);//0.2286);//0.3062);
350 DctTh2 = (Int)(((QP << 1) - 0.5) * qmat[1] * 0.45);//0.567);//0.4);
351 DctTh3 = (Int)(((QP << 1) - 0.5) * qmat[2] * 0.55);//1.162); /* 8/2/2001 */
352 DctTh4 = (Int)(((QP << 1) - 0.5) * qmat[32] * 0.8);//1.7583);//0.7942);
353 ColTh = ColThInter[QP];
354 }// get qmat, DctTh1, DctTh2, DctTh3
355
356 rec = currVop->yChan + offset;
357 input = inputFrame->yChan + offset;
358 if (lx != width) input -= (ind_y << 9); /* non-padded offset */
359
360 dataBlock = video->dataBlock;
361 pred = video->predictedMB;
362
363 for (k = 0; k < 6; k++)
364 {
365 CBP <<= 1;
366 bitmapcol = video->bitmapcol[k];
367 bitmapzz = video->bitmapzz[k]; /* 8/2/01 */
368 if (k < 4)
369 {//Y block
370 sad = video->mot[mbnum][k+1].sad;
371 if (k&1)
372 {
373 rec += 8;
374 input += 8;
375 }
376 else if (k == 2)
377 {
378 dctMode = ((width << 3) - 8);
379 input += dctMode;
380 dctMode = ((lx << 3) - 8);
381 rec += dctMode;
382 }
383 }
384 else
385 {// U, V block
386 if (k == 4)
387 {
388 rec = currVop->uChan + offsetc;
389 input = inputFrame->uChan + offsetc;
390 if (lx != width) input -= (ind_y << 7);
391 lx >>= 1;
392 width >>= 1;
393 if (intra)
394 {
395 dc_scaler = cal_dc_scalerENC(QP, 2); /* luminance blocks */
396 DctTh1 = dc_scaler * 3;
397 sad = getBlockSum(input, width);
398 }
399 else
400 sad = Sad8x8(input, pred, width);
401 }
402 else
403 {
404 rec = currVop->vChan + offsetc;
405 input = inputFrame->vChan + offsetc;
406 if (lx != width) input -= (ind_y << 7);
407 if (intra)
408 sad = getBlockSum(input, width);
409 else
410 sad = Sad8x8(input, pred, width);
411 }
412 }
413
414 if (sad < DctTh1) /* all-zero */
415 {
416 dctMode = 0;
417 CBP |= 0;
418 ncoefblck[k] = 0;
419 }
420 else if (sad < DctTh2) /* DC-only */
421 {
422 dctMode = 1;
423 BlockDCT1x1(dataBlock, input, pred, width);
424
425 CBP |= (*BlockQuantDequantMPEGDC)(dataBlock, output, QP, qmat,
426 bitmapcol, bitmaprow + k, bitmapzz, dc_scaler);
427 ncoefblck[k] = 1;
428 }
429 else
430 {
431 dataBlock[64] = ColTh;
432
433 if (sad < DctTh3) /* 2x2-DCT */
434 {
435 dctMode = 2;
436 BlockDCT2x2(dataBlock, input, pred, width);
437 ncoefblck[k] = 6;
438 }
439 else if (sad < DctTh4) /* 4x4 DCT */
440 {
441 dctMode = 4;
442 BlockDCT4x4(dataBlock, input, pred, width);
443 ncoefblck[k] = 26;
444 }
445 else /* full-DCT */
446 {
447 dctMode = 8;
448 BlockDCT8x8(dataBlock, input, pred, width);
449 ncoefblck[k] = 64;
450 }
451
452 CBP |= (*BlockQuantDequantMPEG)(dataBlock, output, QP, qmat,
453 bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler); //
454 }
455 dctMode = 8; /* for mismatch handle */
456 BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | (intra));
457
458 output += 64;
459 if (!(k&1))
460 {
461 pred += 8;
462 }
463 else
464 {
465 pred += 120;
466 }
467 }
468
469 video->headerInfo.CBP[mbnum] = CBP; /* 5/18/2001 */
470 return PV_SUCCESS;
471 }
472
473 #endif
474
475 /* ======================================================================== */
476 /* Function : getBlockSAV( ) */
477 /* Date : 8/10/2000 */
478 /* Purpose : Get SAV for one block */
479 /* In/out : block[64] contain one block data */
480 /* Return : */
481 /* Modified : */
482 /* ======================================================================== */
483 /* can be written in MMX or SSE, 2/22/2001 */
getBlockSAV(Short block[])484 Int getBlockSAV(Short block[])
485 {
486 Int i, val, sav = 0;
487
488 i = 8;
489 while (i--)
490 {
491 val = *block++;
492 if (val > 0) sav += val;
493 else sav -= val;
494 val = *block++;
495 if (val > 0) sav += val;
496 else sav -= val;
497 val = *block++;
498 if (val > 0) sav += val;
499 else sav -= val;
500 val = *block++;
501 if (val > 0) sav += val;
502 else sav -= val;
503 val = *block++;
504 if (val > 0) sav += val;
505 else sav -= val;
506 val = *block++;
507 if (val > 0) sav += val;
508 else sav -= val;
509 val = *block++;
510 if (val > 0) sav += val;
511 else sav -= val;
512 val = *block++;
513 if (val > 0) sav += val;
514 else sav -= val;
515 }
516
517 return sav;
518
519 }
520
521 /* ======================================================================== */
522 /* Function : Sad8x8( ) */
523 /* Date : 8/10/2000 */
524 /* Purpose : Find SAD between prev block and current block */
525 /* In/out : Previous and current frame block pointers, and frame width */
526 /* Return : */
527 /* Modified : */
528 /* 8/15/01, - do 4 pixel at a time assuming 32 bit register */
529 /* ======================================================================== */
530 #ifdef __clang__
531 __attribute((no_sanitize("integer")))
532 #endif
Sad8x8(UChar * cur,UChar * prev,Int width)533 Int Sad8x8(UChar *cur, UChar *prev, Int width)
534 {
535 UChar *end = cur + (width << 3);
536 Int sad = 0;
537 Int *curInt = (Int*) cur;
538 Int *prevInt = (Int*) prev;
539 Int cur1, cur2, prev1, prev2;
540 UInt mask, sgn_msk = 0x80808080;
541 Int sum2 = 0, sum4 = 0;
542 Int tmp;
543 do
544 {
545 mask = ~(0xFF00);
546 cur1 = curInt[1]; /* load cur[4..7] */
547 cur2 = curInt[0];
548 curInt += (width >> 2); /* load cur[0..3] and +=lx */
549 prev1 = prevInt[1];
550 prev2 = prevInt[0];
551 prevInt += 4;
552
553 tmp = prev2 ^ cur2;
554 cur2 = prev2 - cur2;
555 tmp = tmp ^ cur2; /* (^)^(-) last bit is one if carry */
556 tmp = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */
557 if (cur2 < 0) tmp = tmp | 0x80000000; /* corcurt sign of first byte */
558 tmp = (tmp << 8) - tmp; /* carry borrowed bytes are marked with 0x1FE */
559 cur2 = cur2 + (tmp >> 7); /* negative bytes is added with 0xFF, -1 */
560 cur2 = cur2 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */
561
562 tmp = prev1 ^ cur1;
563 cur1 = prev1 - cur1;
564 tmp = tmp ^ cur1; /* (^)^(-) last bit is one if carry */
565 tmp = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */
566 if (cur1 < 0) tmp = tmp | 0x80000000; /* corcurt sign of first byte */
567 tmp = (tmp << 8) - tmp; /* carry borrowed bytes are marked with 0x1FE */
568 cur1 = cur1 + (tmp >> 7); /* negative bytes is added with 0xFF, -1 */
569 cur1 = cur1 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */
570
571 sum4 = sum4 + cur1;
572 cur1 = cur1 & (mask << 8); /* mask first and third bytes */
573 sum2 = sum2 + ((UInt)cur1 >> 8);
574 sum4 = sum4 + cur2;
575 cur2 = cur2 & (mask << 8); /* mask first and third bytes */
576 sum2 = sum2 + ((UInt)cur2 >> 8);
577 }
578 while ((uintptr_t)curInt < (uintptr_t)end);
579
580 cur1 = sum4 - (sum2 << 8); /* get even-sum */
581 cur1 = cur1 + sum2; /* add 16 bit even-sum and odd-sum*/
582 cur1 = cur1 + (cur1 << 16); /* add upper and lower 16 bit sum */
583 sad = ((UInt)cur1 >> 16); /* take upper 16 bit */
584 return sad;
585 }
586
587 /* ======================================================================== */
588 /* Function : getBlockSum( ) */
589 /* Date : 8/10/2000 */
590 /* Purpose : Find summation of value within a block. */
591 /* In/out : Pointer to current block in a frame and frame width */
592 /* Return : */
593 /* Modified : */
594 /* 8/15/01, - SIMD 4 pixels at a time */
595 /* ======================================================================== */
596 #ifdef __clang__
597 __attribute((no_sanitize("integer")))
598 #endif
getBlockSum(UChar * cur,Int width)599 Int getBlockSum(UChar *cur, Int width)
600 {
601 Int sad = 0, sum4 = 0, sum2 = 0;
602 UChar *end = cur + (width << 3);
603 Int *curInt = (Int*)cur;
604 UInt mask = ~(0xFF00);
605 Int load1, load2;
606
607 do
608 {
609 load1 = curInt[1];
610 load2 = curInt[0];
611 curInt += (width >> 2);
612 sum4 += load1;
613 load1 = load1 & (mask << 8); /* even bytes */
614 sum2 += ((UInt)load1 >> 8); /* sum even bytes, 16 bit */
615 sum4 += load2;
616 load2 = load2 & (mask << 8); /* even bytes */
617 sum2 += ((UInt)load2 >> 8); /* sum even bytes, 16 bit */
618 }
619 while ((uintptr_t)curInt < (uintptr_t)end);
620 load1 = sum4 - (sum2 << 8); /* get even-sum */
621 load1 = load1 + sum2; /* add 16 bit even-sum and odd-sum*/
622 load1 = load1 + (load1 << 16); /* add upper and lower 16 bit sum */
623 sad = ((UInt)load1 >> 16); /* take upper 16 bit */
624
625 return sad;
626 }
627
628