1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "mp4def.h"
19 #include "mp4lib_int.h"
20 #include "mp4enc_lib.h"
21 #include "dct.h"
22 #include "m4venc_oscl.h"
23 
24 /* ======================================================================== */
25 /*  Function : CodeMB_H263( )                                               */
26 /*  Date     : 8/15/2001                                                    */
27 /*  Purpose  : Perform residue calc (only zero MV), DCT, H263 Quant/Dequant,*/
28 /*              IDCT and motion compensation.Modified from FastCodeMB()     */
29 /*  Input    :                                                              */
30 /*      video       Video encoder data structure                            */
31 /*      function    Approximate DCT function, scaling and threshold         */
32 /*      ncoefblck   Array for last nonzero coeff for speedup in VlcEncode   */
33 /*      QP      Combined offset from the origin to the current          */
34 /*                  macroblock  and QP  for current MB.                     */
35 /*    Output     :                                                          */
36 /*      video->outputMB     Quantized DCT coefficients.                     */
37 /*      currVop->yChan,uChan,vChan  Reconstructed pixels                    */
38 /*                                                                          */
39 /*  Return   :   PV_STATUS                                                  */
40 /*  Modified :                                                              */
41 /*           2/26/01
42             -modified threshold based on correlation coeff 0.75 only for mode H.263
43             -ncoefblck[] as input,  to keep position of last non-zero coeff*/
44 /*           8/10/01
45             -modified threshold based on correlation coeff 0.5
46             -used column threshold to speedup column DCT.
47             -used bitmap zigzag to speedup RunLevel().                      */
48 /* ======================================================================== */
49 
CodeMB_H263(VideoEncData * video,approxDCT * function,Int QP,Int ncoefblck[])50 PV_STATUS CodeMB_H263(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[])
51 {
52     Int sad, k, CBP, mbnum = video->mbnum;
53     Short *output, *dataBlock;
54     UChar Mode = video->headerInfo.Mode[mbnum];
55     UChar *bitmapcol, *bitmaprow = video->bitmaprow;
56     UInt  *bitmapzz ;
57     UChar shortHeader = video->vol[video->currLayer]->shortVideoHeader;
58     Int dc_scaler = 8;
59     Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q);
60     struct QPstruct QuantParam;
61     Int dctMode, DctTh1;
62     Int ColTh;
63     Int(*BlockQuantDequantH263)(Short *, Short *, struct QPstruct *,
64                                 UChar[], UChar *, UInt *, Int, Int, Int, UChar);
65     Int(*BlockQuantDequantH263DC)(Short *, Short *, struct QPstruct *,
66                                   UChar *, UInt *, Int, UChar);
67     void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int);
68     void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int);
69     void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int);
70     void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int);
71 
72     /* motion comp. related var. */
73     Vop *currVop = video->currVop;
74     VideoEncFrameIO *inputFrame = video->input;
75     Int ind_x = video->outputMB->mb_x;
76     Int ind_y = video->outputMB->mb_y;
77     Int lx = currVop->pitch;
78     Int width = currVop->width;
79     UChar *rec, *input, *pred;
80     Int offset = QP >> 5;  /* QP is combined offset and QP */
81     Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */
82     /*****************************/
83 
84     OSCL_UNUSED_ARG(function);
85 
86     output = video->outputMB->block[0];
87     CBP = 0;
88     QP = QP & 0x1F;
89 //  M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/
90 
91     QuantParam.QPx2 = QP << 1;
92     QuantParam.QP = QP;
93     QuantParam.QPdiv2 = QP >> 1;
94     QuantParam.QPx2plus = QuantParam.QPx2 + QuantParam.QPdiv2;
95     QuantParam.Addition = QP - 1 + (QP & 0x1);
96 
97     if (intra)
98     {
99         BlockDCT1x1 = &Block1x1DCTIntra;
100         BlockDCT2x2 = &Block2x2DCT_AANIntra;
101         BlockDCT4x4 = &Block4x4DCT_AANIntra;
102         BlockDCT8x8 = &BlockDCT_AANIntra;
103         BlockQuantDequantH263 = &BlockQuantDequantH263Intra;
104         BlockQuantDequantH263DC = &BlockQuantDequantH263DCIntra;
105         if (shortHeader)
106         {
107             dc_scaler = 8;
108         }
109         else
110         {
111             dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */
112         }
113         DctTh1 = (Int)(dc_scaler * 3);//*1.829
114         ColTh = ColThIntra[QP];
115     }
116     else
117     {
118         BlockDCT1x1 = &Block1x1DCTwSub;
119         BlockDCT2x2 = &Block2x2DCT_AANwSub;
120         BlockDCT4x4 = &Block4x4DCT_AANwSub;
121         BlockDCT8x8 = &BlockDCT_AANwSub;
122 
123         BlockQuantDequantH263 = &BlockQuantDequantH263Inter;
124         BlockQuantDequantH263DC = &BlockQuantDequantH263DCInter;
125         ColTh = ColThInter[QP];
126         DctTh1 = (Int)(16 * QP);  //9*QP;
127     }
128 
129     rec = currVop->yChan + offset;
130     input = inputFrame->yChan + offset;
131     if (lx != width) input -= (ind_y << 9);  /* non-padded offset */
132 
133     dataBlock = video->dataBlock;
134     pred = video->predictedMB;
135 
136     for (k = 0; k < 6; k++)
137     {
138         CBP <<= 1;
139         bitmapcol = video->bitmapcol[k];
140         bitmapzz = video->bitmapzz[k];  /*  7/30/01 */
141         if (k < 4)
142         {
143             sad = video->mot[mbnum][k+1].sad;
144             if (k&1)
145             {
146                 rec += 8;
147                 input += 8;
148             }
149             else if (k == 2)
150             {
151                 dctMode = ((width << 3) - 8);
152                 input += dctMode;
153                 dctMode = ((lx << 3) - 8);
154                 rec += dctMode;
155             }
156         }
157         else
158         {
159             if (k == 4)
160             {
161                 rec = currVop->uChan + offsetc;
162                 input = inputFrame->uChan + offsetc;
163                 if (lx != width) input -= (ind_y << 7);
164                 lx >>= 1;
165                 width >>= 1;
166                 if (intra)
167                 {
168                     sad = getBlockSum(input, width);
169                     if (shortHeader)
170                         dc_scaler = 8;
171                     else
172                     {
173                         dc_scaler = cal_dc_scalerENC(QP, 2); /* chrominance blocks */
174                     }
175                     DctTh1 = (Int)(dc_scaler * 3);//*1.829
176                 }
177                 else
178                     sad = Sad8x8(input, pred, width);
179             }
180             else
181             {
182                 rec = currVop->vChan + offsetc;
183                 input = inputFrame->vChan + offsetc;
184                 if (lx != width) input -= (ind_y << 7);
185                 if (intra)
186                 {
187                     sad = getBlockSum(input, width);
188                 }
189                 else
190                     sad = Sad8x8(input, pred, width);
191             }
192         }
193 
194         if (sad < DctTh1 && !(shortHeader && intra)) /* all-zero */
195         {                       /* For shortHeader intra block, DC value cannot be zero */
196             dctMode = 0;
197             CBP |= 0;
198             ncoefblck[k] = 0;
199         }
200         else if (sad < 18*QP/*(QP<<4)*/) /* DC-only */
201         {
202             dctMode = 1;
203             BlockDCT1x1(dataBlock, input, pred, width);
204 
205             CBP |= (*BlockQuantDequantH263DC)(dataBlock, output, &QuantParam,
206                                               bitmaprow + k, bitmapzz, dc_scaler, shortHeader);
207             ncoefblck[k] = 1;
208         }
209         else
210         {
211 
212             dataBlock[64] = ColTh;
213 
214             if (sad < 22*QP/*(QP<<4)+(QP<<1)*/)  /* 2x2 DCT */
215             {
216                 dctMode = 2;
217                 BlockDCT2x2(dataBlock, input, pred, width);
218                 ncoefblck[k] = 6;
219             }
220             else if (sad < (QP << 5)) /* 4x4 DCT */
221             {
222                 dctMode = 4;
223                 BlockDCT4x4(dataBlock, input, pred, width);
224                 ncoefblck[k] = 26;
225             }
226             else /* Full-DCT */
227             {
228                 dctMode = 8;
229                 BlockDCT8x8(dataBlock, input, pred, width);
230                 ncoefblck[k] = 64;
231             }
232 
233             CBP |= (*BlockQuantDequantH263)(dataBlock, output, &QuantParam,
234                                             bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler, shortHeader);
235         }
236         BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | intra);
237         output += 64;
238         if (!(k&1))
239         {
240             pred += 8;
241         }
242         else
243         {
244             pred += 120;
245         }
246     }
247 
248     video->headerInfo.CBP[mbnum] = CBP; /*  5/18/2001 */
249     return PV_SUCCESS;
250 }
251 
252 #ifndef NO_MPEG_QUANT
253 /* ======================================================================== */
254 /*  Function : CodeMB_MPEG( )                                               */
255 /*  Date     : 8/15/2001                                                    */
256 /*  Purpose  : Perform residue calc (only zero MV), DCT, MPEG Quant/Dequant,*/
257 /*              IDCT and motion compensation.Modified from FastCodeMB()     */
258 /*  Input    :                                                              */
259 /*      video       Video encoder data structure                            */
260 /*      function    Approximate DCT function, scaling and threshold         */
261 /*      ncoefblck   Array for last nonzero coeff for speedup in VlcEncode   */
262 /*      QP      Combined offset from the origin to the current          */
263 /*                  macroblock  and QP  for current MB.                     */
264 /*    Output     :                                                          */
265 /*      video->outputMB     Quantized DCT coefficients.                     */
266 /*      currVop->yChan,uChan,vChan  Reconstructed pixels                    */
267 /*                                                                          */
268 /*  Return   :   PV_STATUS                                                  */
269 /*  Modified :                                                              */
270 /*           2/26/01
271             -modified threshold based on correlation coeff 0.75 only for mode H.263
272             -ncoefblck[] as input, keep position of last non-zero coeff*/
273 /*           8/10/01
274             -modified threshold based on correlation coeff 0.5
275             -used column threshold to speedup column DCT.
276             -used bitmap zigzag to speedup RunLevel().                      */
277 /* ======================================================================== */
278 
CodeMB_MPEG(VideoEncData * video,approxDCT * function,Int QP,Int ncoefblck[])279 PV_STATUS CodeMB_MPEG(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[])
280 {
281     Int sad, k, CBP, mbnum = video->mbnum;
282     Short *output, *dataBlock;
283     UChar Mode = video->headerInfo.Mode[mbnum];
284     UChar *bitmapcol, *bitmaprow = video->bitmaprow;
285     UInt  *bitmapzz ;
286     Int dc_scaler = 8;
287     Vol *currVol = video->vol[video->currLayer];
288     Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q);
289     Int *qmat;
290     Int dctMode, DctTh1, DctTh2, DctTh3, DctTh4;
291     Int ColTh;
292 
293     Int(*BlockQuantDequantMPEG)(Short *, Short *, Int, Int *,
294                                 UChar [], UChar *, UInt *, Int,  Int, Int);
295     Int(*BlockQuantDequantMPEGDC)(Short *, Short *, Int, Int *,
296                                   UChar [], UChar *, UInt *, Int);
297 
298     void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int);
299     void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int);
300     void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int);
301     void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int);
302 
303     /* motion comp. related var. */
304     Vop *currVop = video->currVop;
305     VideoEncFrameIO *inputFrame = video->input;
306     Int ind_x = video->outputMB->mb_x;
307     Int ind_y = video->outputMB->mb_y;
308     Int lx = currVop->pitch;
309     Int width = currVop->width;
310     UChar *rec, *input, *pred;
311     Int offset = QP >> 5;
312     Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */
313     /*****************************/
314 
315     OSCL_UNUSED_ARG(function);
316 
317     output = video->outputMB->block[0];
318     CBP = 0;
319     QP = QP & 0x1F;
320 //  M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero ,  7/24/01*/
321 
322     if (intra)
323     {
324         BlockDCT1x1 = &Block1x1DCTIntra;
325         BlockDCT2x2 = &Block2x2DCT_AANIntra;
326         BlockDCT4x4 = &Block4x4DCT_AANIntra;
327         BlockDCT8x8 = &BlockDCT_AANIntra;
328 
329         BlockQuantDequantMPEG = &BlockQuantDequantMPEGIntra;
330         BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCIntra;
331         dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */
332         qmat = currVol->iqmat;
333         DctTh1 = (Int)(3 * dc_scaler);//2*dc_scaler);
334         DctTh2 = (Int)((1.25 * QP - 1) * qmat[1] * 0.45);//0.567);//0.567);
335         DctTh3 = (Int)((1.25 * QP - 1) * qmat[2] * 0.55);//1.162); /*  8/2/2001 */
336         DctTh4 = (Int)((1.25 * QP - 1) * qmat[32] * 0.8);//1.7583);//0.7942);
337         ColTh = ColThIntra[QP];
338     }
339     else
340     {
341         BlockDCT1x1 = &Block1x1DCTwSub;
342         BlockDCT2x2 = &Block2x2DCT_AANwSub;
343         BlockDCT4x4 = &Block4x4DCT_AANwSub;
344         BlockDCT8x8 = &BlockDCT_AANwSub;
345 
346         BlockQuantDequantMPEG = &BlockQuantDequantMPEGInter;
347         BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCInter;
348         qmat = currVol->niqmat;
349         DctTh1 = (Int)(((QP << 1) - 0.5) * qmat[0] * 0.4);//0.2286);//0.3062);
350         DctTh2 = (Int)(((QP << 1) - 0.5) * qmat[1] * 0.45);//0.567);//0.4);
351         DctTh3 = (Int)(((QP << 1) - 0.5) * qmat[2] * 0.55);//1.162); /*  8/2/2001 */
352         DctTh4 = (Int)(((QP << 1) - 0.5) * qmat[32] * 0.8);//1.7583);//0.7942);
353         ColTh = ColThInter[QP];
354     }// get qmat, DctTh1, DctTh2, DctTh3
355 
356     rec = currVop->yChan + offset;
357     input = inputFrame->yChan + offset;
358     if (lx != width) input -= (ind_y << 9);  /* non-padded offset */
359 
360     dataBlock = video->dataBlock;
361     pred = video->predictedMB;
362 
363     for (k = 0; k < 6; k++)
364     {
365         CBP <<= 1;
366         bitmapcol = video->bitmapcol[k];
367         bitmapzz = video->bitmapzz[k];  /*  8/2/01 */
368         if (k < 4)
369         {//Y block
370             sad = video->mot[mbnum][k+1].sad;
371             if (k&1)
372             {
373                 rec += 8;
374                 input += 8;
375             }
376             else if (k == 2)
377             {
378                 dctMode = ((width << 3) - 8);
379                 input += dctMode;
380                 dctMode = ((lx << 3) - 8);
381                 rec += dctMode;
382             }
383         }
384         else
385         {// U, V block
386             if (k == 4)
387             {
388                 rec = currVop->uChan + offsetc;
389                 input = inputFrame->uChan + offsetc;
390                 if (lx != width) input -= (ind_y << 7);
391                 lx >>= 1;
392                 width >>= 1;
393                 if (intra)
394                 {
395                     dc_scaler = cal_dc_scalerENC(QP, 2); /* luminance blocks */
396                     DctTh1 = dc_scaler * 3;
397                     sad = getBlockSum(input, width);
398                 }
399                 else
400                     sad = Sad8x8(input, pred, width);
401             }
402             else
403             {
404                 rec = currVop->vChan + offsetc;
405                 input = inputFrame->vChan + offsetc;
406                 if (lx != width) input -= (ind_y << 7);
407                 if (intra)
408                     sad = getBlockSum(input, width);
409                 else
410                     sad = Sad8x8(input, pred, width);
411             }
412         }
413 
414         if (sad < DctTh1) /* all-zero */
415         {
416             dctMode = 0;
417             CBP |= 0;
418             ncoefblck[k] = 0;
419         }
420         else if (sad < DctTh2) /* DC-only */
421         {
422             dctMode = 1;
423             BlockDCT1x1(dataBlock, input, pred, width);
424 
425             CBP |= (*BlockQuantDequantMPEGDC)(dataBlock, output, QP, qmat,
426                                               bitmapcol, bitmaprow + k, bitmapzz, dc_scaler);
427             ncoefblck[k] = 1;
428         }
429         else
430         {
431             dataBlock[64] = ColTh;
432 
433             if (sad < DctTh3) /* 2x2-DCT */
434             {
435                 dctMode = 2;
436                 BlockDCT2x2(dataBlock, input, pred, width);
437                 ncoefblck[k] = 6;
438             }
439             else if (sad < DctTh4) /* 4x4 DCT */
440             {
441                 dctMode = 4;
442                 BlockDCT4x4(dataBlock, input, pred, width);
443                 ncoefblck[k] = 26;
444             }
445             else /* full-DCT */
446             {
447                 dctMode = 8;
448                 BlockDCT8x8(dataBlock, input, pred, width);
449                 ncoefblck[k] = 64;
450             }
451 
452             CBP |= (*BlockQuantDequantMPEG)(dataBlock, output, QP, qmat,
453                                             bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler); //
454         }
455         dctMode = 8; /* for mismatch handle */
456         BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | (intra));
457 
458         output += 64;
459         if (!(k&1))
460         {
461             pred += 8;
462         }
463         else
464         {
465             pred += 120;
466         }
467     }
468 
469     video->headerInfo.CBP[mbnum] = CBP; /*  5/18/2001 */
470     return PV_SUCCESS;
471 }
472 
473 #endif
474 
475 /* ======================================================================== */
476 /*  Function : getBlockSAV( )                                               */
477 /*  Date     : 8/10/2000                                                    */
478 /*  Purpose  : Get SAV for one block                                        */
479 /*  In/out   : block[64] contain one block data                             */
480 /*  Return   :                                                              */
481 /*  Modified :                                                              */
482 /* ======================================================================== */
483 /* can be written in MMX or SSE,  2/22/2001 */
getBlockSAV(Short block[])484 Int getBlockSAV(Short block[])
485 {
486     Int i, val, sav = 0;
487 
488     i = 8;
489     while (i--)
490     {
491         val = *block++;
492         if (val > 0)    sav += val;
493         else        sav -= val;
494         val = *block++;
495         if (val > 0)    sav += val;
496         else        sav -= val;
497         val = *block++;
498         if (val > 0)    sav += val;
499         else        sav -= val;
500         val = *block++;
501         if (val > 0)    sav += val;
502         else        sav -= val;
503         val = *block++;
504         if (val > 0)    sav += val;
505         else        sav -= val;
506         val = *block++;
507         if (val > 0)    sav += val;
508         else        sav -= val;
509         val = *block++;
510         if (val > 0)    sav += val;
511         else        sav -= val;
512         val = *block++;
513         if (val > 0)    sav += val;
514         else        sav -= val;
515     }
516 
517     return sav;
518 
519 }
520 
521 /* ======================================================================== */
522 /*  Function : Sad8x8( )                                                    */
523 /*  Date     : 8/10/2000                                                    */
524 /*  Purpose  : Find SAD between prev block and current block                */
525 /*  In/out   : Previous and current frame block pointers, and frame width   */
526 /*  Return   :                                                              */
527 /*  Modified :                                                              */
528 /*      8/15/01,  - do 4 pixel at a time    assuming 32 bit register        */
529 /* ======================================================================== */
530 #ifdef __clang__
531 __attribute((no_sanitize("integer")))
532 #endif
Sad8x8(UChar * cur,UChar * prev,Int width)533 Int Sad8x8(UChar *cur, UChar *prev, Int width)
534 {
535     UChar *end = cur + (width << 3);
536     Int sad = 0;
537     Int *curInt = (Int*) cur;
538     Int *prevInt = (Int*) prev;
539     Int cur1, cur2, prev1, prev2;
540     UInt mask, sgn_msk = 0x80808080;
541     Int  sum2 = 0, sum4 = 0;
542     Int  tmp;
543     do
544     {
545         mask    = ~(0xFF00);
546         cur1    = curInt[1];        /* load cur[4..7] */
547         cur2    = curInt[0];
548         curInt += (width >> 2);     /* load cur[0..3] and +=lx */
549         prev1   = prevInt[1];
550         prev2   = prevInt[0];
551         prevInt += 4;
552 
553         tmp     = prev2 ^ cur2;
554         cur2    = prev2 - cur2;
555         tmp     = tmp ^ cur2;       /* (^)^(-) last bit is one if carry */
556         tmp     = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */
557         if (cur2 < 0)   tmp = tmp | 0x80000000; /* corcurt sign of first byte */
558         tmp     = (tmp << 8) - tmp;     /* carry borrowed bytes are marked with 0x1FE */
559         cur2    = cur2 + (tmp >> 7);     /* negative bytes is added with 0xFF, -1 */
560         cur2    = cur2 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */
561 
562         tmp     = prev1 ^ cur1;
563         cur1    = prev1 - cur1;
564         tmp     = tmp ^ cur1;       /* (^)^(-) last bit is one if carry */
565         tmp     = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */
566         if (cur1 < 0)   tmp = tmp | 0x80000000; /* corcurt sign of first byte */
567         tmp     = (tmp << 8) - tmp;     /* carry borrowed bytes are marked with 0x1FE */
568         cur1    = cur1 + (tmp >> 7);     /* negative bytes is added with 0xFF, -1 */
569         cur1    = cur1 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */
570 
571         sum4    = sum4 + cur1;
572         cur1    = cur1 & (mask << 8);   /* mask first and third bytes */
573         sum2    = sum2 + ((UInt)cur1 >> 8);
574         sum4    = sum4 + cur2;
575         cur2    = cur2 & (mask << 8);   /* mask first and third bytes */
576         sum2    = sum2 + ((UInt)cur2 >> 8);
577     }
578     while ((uintptr_t)curInt < (uintptr_t)end);
579 
580     cur1 = sum4 - (sum2 << 8);  /* get even-sum */
581     cur1 = cur1 + sum2;         /* add 16 bit even-sum and odd-sum*/
582     cur1 = cur1 + (cur1 << 16); /* add upper and lower 16 bit sum */
583     sad  = ((UInt)cur1 >> 16);  /* take upper 16 bit */
584     return sad;
585 }
586 
587 /* ======================================================================== */
588 /*  Function : getBlockSum( )                                               */
589 /*  Date     : 8/10/2000                                                    */
590 /*  Purpose  : Find summation of value within a block.                      */
591 /*  In/out   : Pointer to current block in a frame and frame width          */
592 /*  Return   :                                                              */
593 /*  Modified :                                                              */
594 /*          8/15/01,  - SIMD 4 pixels at a time                         */
595 /* ======================================================================== */
596 #ifdef __clang__
597 __attribute((no_sanitize("integer")))
598 #endif
getBlockSum(UChar * cur,Int width)599 Int getBlockSum(UChar *cur, Int width)
600 {
601     Int sad = 0, sum4 = 0, sum2 = 0;
602     UChar *end = cur + (width << 3);
603     Int *curInt = (Int*)cur;
604     UInt mask   = ~(0xFF00);
605     Int load1, load2;
606 
607     do
608     {
609         load1 = curInt[1];
610         load2 = curInt[0];
611         curInt += (width >> 2);
612         sum4 += load1;
613         load1 = load1 & (mask << 8); /* even bytes */
614         sum2 += ((UInt)load1 >> 8); /* sum even bytes, 16 bit */
615         sum4 += load2;
616         load2 = load2 & (mask << 8); /* even bytes */
617         sum2 += ((UInt)load2 >> 8); /* sum even bytes, 16 bit */
618     }
619     while ((uintptr_t)curInt < (uintptr_t)end);
620     load1 = sum4 - (sum2 << 8);     /* get even-sum */
621     load1 = load1 + sum2;           /* add 16 bit even-sum and odd-sum*/
622     load1 = load1 + (load1 << 16);  /* add upper and lower 16 bit sum */
623     sad  = ((UInt)load1 >> 16); /* take upper 16 bit */
624 
625     return sad;
626 }
627 
628