1 /* Compute the sum of the squares of a vector of signed shorts
2 
3  *  MMX-assisted version (also used on SSE)
4 
5  * The SSE2 and MMX assist routines both operate on multiples of
6  * 8 words; they differ only in their alignment requirements (8 bytes
7  * for MMX, 16 bytes for SSE2)
8 
9  * Copyright 2004 Phil Karn, KA9Q
10  * May be used under the terms of the GNU Lesser Public License (LGPL)
11  */
12 
13 long long sumsq_mmx_assist(signed short *,int);
14 
sumsq_mmx(signed short * in,int cnt)15 long long sumsq_mmx(signed short *in,int cnt){
16   long long sum = 0;
17 
18   /* Handle stuff before the next 8-byte boundary */
19   while(((int)in & 7) != 0 && cnt != 0){
20     sum += (long)in[0] * in[0];
21     in++;
22     cnt--;
23   }
24   sum += sumsq_mmx_assist(in,cnt);
25   in += cnt & ~7;
26   cnt &= 7;
27 
28   /* Handle up to 7 words at end */
29   while(cnt != 0){
30     sum += (long)in[0] * in[0];
31     in++;
32     cnt--;
33   }
34   return sum;
35 }
36