1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
18 
19 /*****************************************************************
20 *    Lean and mean begins here                                   *
21 *****************************************************************/
22 
23 #include "db_utilities.h"
24 #include "db_feature_detection.h"
25 #ifdef _VERBOSE_
26 #include <iostream>
27 #endif
28 #include <float.h>
29 
30 #define DB_SUB_PIXEL
31 
32 #define BORDER 10 // 5
33 
db_AllocStrengthImage_f(float ** im,int w,int h)34 float** db_AllocStrengthImage_f(float **im,int w,int h)
35 {
36     int i,n,aw;
37     long c,size;
38     float **img,*aim,*p;
39 
40     /*Determine number of 124 element chunks needed*/
41     n=(db_maxi(1,w-6)+123)/124;
42     /*Determine the total allocation width aw*/
43     aw=n*124+8;
44     /*Allocate*/
45     size=aw*h+16;
46     *im=new float [size];
47     /*Clean up*/
48     p=(*im);
49     for(c=0;c<size;c++) p[c]=0.0;
50     /*Get a 16 byte aligned pointer*/
51     aim=db_AlignPointer_f(*im,16);
52     /*Allocate pointer table*/
53     img=new float* [h];
54     /*Initialize the pointer table*/
55     for(i=0;i<h;i++)
56     {
57         img[i]=aim+aw*i+1;
58     }
59 
60     return(img);
61 }
62 
db_FreeStrengthImage_f(float * im,float ** img,int h)63 void db_FreeStrengthImage_f(float *im,float **img,int h)
64 {
65     delete [] im;
66     delete [] img;
67 }
68 
69 /*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width
70 Memory references occur one pixel outside the subrow*/
db_IxIyRow_f(float * Ix,float * Iy,const float * const * img,int i,int j,int chunk_width)71 inline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width)
72 {
73     int c;
74 
75     for(c=0;c<chunk_width;c++)
76     {
77         Ix[c]=img[i][j+c-1]-img[i][j+c+1];
78         Iy[c]=img[i-1][j+c]-img[i+1][j+c];
79     }
80 }
81 
82 /*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128
83 Memory references occur one pixel outside the subrow*/
db_IxIyRow_u(int * dxx,const unsigned char * const * img,int i,int j,int nc)84 inline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc)
85 {
86 #ifdef DB_USE_MMX
87     const unsigned char *r1,*r2,*r3;
88 
89     r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j;
90 
91     _asm
92     {
93         mov esi,16
94         mov eax,r1
95         mov ebx,r2
96         mov ecx,r3
97         mov edx,dxx
98 
99         /*Get bitmask into mm7*/
100         mov       edi,7F7F7F7Fh
101         movd      mm7,edi
102         punpckldq mm7,mm7
103 
104 loopstart:
105         /***************dx part 1-12*********************************/
106         movq       mm0,[eax]       /*1 Get upper*/
107          pxor      mm6,mm6         /*2 Set to zero*/
108         movq       mm1,[ecx]       /*3 Get lower*/
109          psrlq     mm0,1           /*4 Shift*/
110         psrlq      mm1,1           /*5 Shift*/
111          pand      mm0,mm7         /*6 And*/
112         movq       mm2,[ebx-1]     /*13 Get left*/
113          pand      mm1,mm7         /*7 And*/
114         psubb      mm0,mm1         /*8 Subtract*/
115          pxor      mm5,mm5         /*14 Set to zero*/
116         movq       mm1,mm0         /*9 Copy*/
117          pcmpgtb   mm6,mm0         /*10 Create unpack mask*/
118         movq       mm3,[ebx+1]     /*15 Get right*/
119          punpcklbw mm0,mm6         /*11 Unpack low*/
120         punpckhbw  mm1,mm6         /*12 Unpack high*/
121         /***************dy part 13-24*********************************/
122          movq      mm4,mm0         /*25 Copy dx*/
123         psrlq      mm2,1           /*16 Shift*/
124          pmullw    mm0,mm0         /*26 Multiply dx*dx*/
125         psrlq      mm3,1           /*17 Shift*/
126          pand      mm2,mm7         /*18 And*/
127         pand       mm3,mm7         /*19 And*/
128          /*Stall*/
129         psubb      mm2,mm3         /*20 Subtract*/
130          /*Stall*/
131         movq       mm3,mm2         /*21 Copy*/
132          pcmpgtb   mm5,mm2         /*22 Create unpack mask*/
133         punpcklbw  mm2,mm5         /*23 Unpack low*/
134          /*Stall*/
135         punpckhbw  mm3,mm5         /*24 Unpack high*/
136         /***************dxx dxy dyy low part 25-49*********************************/
137          pmullw    mm4,mm2         /*27 Multiply dx*dy*/
138         pmullw     mm2,mm2         /*28 Multiply dy*dy*/
139          pxor      mm6,mm6         /*29 Set to zero*/
140         movq       mm5,mm0         /*30 Copy dx*dx*/
141          pcmpgtw   mm6,mm0         /*31 Create unpack mask for dx*dx*/
142         punpcklwd  mm0,mm6         /*32 Unpack dx*dx lows*/
143          /*Stall*/
144         punpckhwd  mm5,mm6         /*33 Unpack dx*dx highs*/
145          pxor      mm6,mm6         /*36 Set to zero*/
146         movq       [edx],mm0       /*34 Store dx*dx lows*/
147          movq      mm0,mm4         /*37 Copy dx*dy*/
148         movq       [edx+8],mm5     /*35 Store dx*dx highs*/
149          pcmpgtw   mm6,mm4         /*38 Create unpack mask for dx*dy*/
150         punpcklwd  mm4,mm6         /*39 Unpack dx*dy lows*/
151          /*Stall*/
152         punpckhwd  mm0,mm6         /*40 Unpack dx*dy highs*/
153          pxor      mm6,mm6         /*43 Set to zero*/
154         movq       [edx+512],mm4   /*41 Store dx*dy lows*/
155          movq      mm5,mm2         /*44 Copy dy*dy*/
156         movq       [edx+520],mm0   /*42 Store dx*dy highs*/
157          pcmpgtw   mm6,mm2         /*45 Create unpack mask for dy*dy*/
158         punpcklwd  mm2,mm6         /*46 Unpack dy*dy lows*/
159          movq      mm4,mm1         /*50 Copy dx*/
160         punpckhwd  mm5,mm6         /*47 Unpack dy*dy highs*/
161          pmullw    mm1,mm1         /*51 Multiply dx*dx*/
162         movq       [edx+1024],mm2  /*48 Store dy*dy lows*/
163          pmullw    mm4,mm3         /*52 Multiply dx*dy*/
164         movq       [edx+1032],mm5  /*49 Store dy*dy highs*/
165         /***************dxx dxy dyy high part 50-79*********************************/
166          pmullw    mm3,mm3         /*53 Multiply dy*dy*/
167         pxor       mm6,mm6         /*54 Set to zero*/
168          movq      mm5,mm1         /*55 Copy dx*dx*/
169         pcmpgtw    mm6,mm1         /*56 Create unpack mask for dx*dx*/
170          pxor      mm2,mm2         /*61 Set to zero*/
171         punpcklwd  mm1,mm6         /*57 Unpack dx*dx lows*/
172          movq      mm0,mm4         /*62 Copy dx*dy*/
173         punpckhwd  mm5,mm6         /*58 Unpack dx*dx highs*/
174          pcmpgtw   mm2,mm4         /*63 Create unpack mask for dx*dy*/
175         movq       [edx+16],mm1    /*59 Store dx*dx lows*/
176          punpcklwd mm4,mm2         /*64 Unpack dx*dy lows*/
177         movq       [edx+24],mm5    /*60 Store dx*dx highs*/
178          punpckhwd mm0,mm2         /*65 Unpack dx*dy highs*/
179         movq       [edx+528],mm4   /*66 Store dx*dy lows*/
180          pxor      mm6,mm6         /*68 Set to zero*/
181         movq       [edx+536],mm0   /*67 Store dx*dy highs*/
182          movq      mm5,mm3         /*69 Copy dy*dy*/
183         pcmpgtw    mm6,mm3         /*70 Create unpack mask for dy*dy*/
184          add       eax,8           /*75*/
185         punpcklwd  mm3,mm6         /*71 Unpack dy*dy lows*/
186          add       ebx,8           /*76*/
187         punpckhwd  mm5,mm6         /*72 Unpack dy*dy highs*/
188          add       ecx,8           /*77*/
189         movq       [edx+1040],mm3  /*73 Store dy*dy lows*/
190          /*Stall*/
191         movq       [edx+1048],mm5  /*74 Store dy*dy highs*/
192          /*Stall*/
193         add        edx,32          /*78*/
194          dec esi                   /*79*/
195         jnz loopstart
196 
197         emms
198     }
199 
200 #else
201     int c;
202     int Ix,Iy;
203 
204     for(c=0;c<nc;c++)
205     {
206         Ix=(img[i][j+c-1]-img[i][j+c+1])>>1;
207         Iy=(img[i-1][j+c]-img[i+1][j+c])>>1;
208         dxx[c]=Ix*Ix;
209         dxx[c+128]=Ix*Iy;
210         dxx[c+256]=Iy*Iy;
211     }
212 #endif /*DB_USE_MMX*/
213 }
214 
215 /*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/
db_gxx_gxy_gyy_row_f(float * gxx,float * gxy,float * gyy,int chunk_width,float * Ix0,float * Ix1,float * Ix2,float * Ix3,float * Ix4,float * Iy0,float * Iy1,float * Iy2,float * Iy3,float * Iy4)216 inline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width,
217                                  float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4,
218                                  float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4)
219 {
220     int c;
221     float dx,dy;
222     float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4;
223 
224     for(c=0;c<chunk_width;c++)
225     {
226         dx=Ix0[c];
227         dy=Iy0[c];
228         Ixx0=dx*dx;
229         Ixy0=dx*dy;
230         Iyy0=dy*dy;
231 
232         dx=Ix1[c];
233         dy=Iy1[c];
234         Ixx1=dx*dx;
235         Ixy1=dx*dy;
236         Iyy1=dy*dy;
237 
238         dx=Ix2[c];
239         dy=Iy2[c];
240         Ixx2=dx*dx;
241         Ixy2=dx*dy;
242         Iyy2=dy*dy;
243 
244         dx=Ix3[c];
245         dy=Iy3[c];
246         Ixx3=dx*dx;
247         Ixy3=dx*dy;
248         Iyy3=dy*dy;
249 
250         dx=Ix4[c];
251         dy=Iy4[c];
252         Ixx4=dx*dx;
253         Ixy4=dx*dy;
254         Iyy4=dy*dy;
255 
256         /*Filter vertically*/
257         gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4;
258         gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4;
259         gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4;
260     }
261 }
262 
263 /*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/
db_gxx_gxy_gyy_row_s(int * g,int * d0,int * d1,int * d2,int * d3,int * d4,int nc)264 inline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc)
265 {
266 #ifdef DB_USE_MMX
267     int c;
268 
269     _asm
270     {
271         mov c,64
272         mov eax,d0
273         mov ebx,d1
274         mov ecx,d2
275         mov edx,d3
276         mov edi,d4
277         mov esi,g
278 
279 loopstart:
280         /***************dxx part 1-14*********************************/
281         movq        mm0,[eax]      /*1 Get dxx0*/
282          /*Stall*/
283         movq        mm1,[ebx]      /*2 Get dxx1*/
284          /*Stall*/
285         movq        mm2,[ecx]      /*5 Get dxx2*/
286          pslld      mm1,2          /*3 Shift dxx1*/
287         movq        mm3,[edx]      /*10 Get dxx3*/
288          paddd      mm0,mm1        /*4 Accumulate dxx1*/
289         movq        mm4,[eax+512]  /*15 Get dxy0*/
290          pslld      mm2,1          /*6 Shift dxx2 1*/
291         paddd       mm0,mm2        /*7 Accumulate dxx2 1*/
292          pslld      mm2,1          /*8 Shift dxx2 2*/
293         movq        mm5,[ebx+512]  /*16 Get dxy1*/
294          paddd      mm0,mm2        /*9 Accumulate dxx2 2*/
295         pslld       mm3,2          /*11 Shift dxx3*/
296          /*Stall*/
297         paddd       mm0,mm3        /*12 Accumulate dxx3*/
298          pslld      mm5,2          /*17 Shift dxy1*/
299         paddd       mm0,[edi]      /*13 Accumulate dxx4*/
300          paddd      mm4,mm5        /*18 Accumulate dxy1*/
301         movq        mm6,[ecx+512]  /*19 Get dxy2*/
302          /*Stall*/
303         movq        [esi],mm0      /*14 Store dxx sums*/
304         /***************dxy part 15-28*********************************/
305          pslld      mm6,1          /*20 Shift dxy2 1*/
306         paddd       mm4,mm6        /*21 Accumulate dxy2 1*/
307          pslld      mm6,1          /*22 Shift dxy2 2*/
308         movq        mm0,[eax+1024] /*29 Get dyy0*/
309          paddd      mm4,mm6        /*23 Accumulate dxy2 2*/
310         movq        mm7,[edx+512]  /*24 Get dxy3*/
311          pslld      mm7,2          /*25 Shift dxy3*/
312         movq        mm1,[ebx+1024] /*30 Get dyy1*/
313          paddd      mm4,mm7        /*26 Accumulate dxy3*/
314         paddd       mm4,[edi+512]  /*27 Accumulate dxy4*/
315          pslld      mm1,2          /*31 Shift dyy1*/
316         movq        mm2,[ecx+1024] /*33 Get dyy2*/
317          paddd      mm0,mm1        /*32 Accumulate dyy1*/
318         movq        [esi+512],mm4  /*28 Store dxy sums*/
319          pslld      mm2,1          /*34 Shift dyy2 1*/
320         /***************dyy part 29-49*********************************/
321 
322 
323         movq        mm3,[edx+1024] /*38 Get dyy3*/
324          paddd      mm0,mm2        /*35 Accumulate dyy2 1*/
325         paddd       mm0,[edi+1024] /*41 Accumulate dyy4*/
326          pslld      mm2,1          /*36 Shift dyy2 2*/
327         paddd       mm0,mm2        /*37 Accumulate dyy2 2*/
328          pslld      mm3,2          /*39 Shift dyy3*/
329         paddd       mm0,mm3        /*40 Accumulate dyy3*/
330          add        eax,8           /*43*/
331         add         ebx,8           /*44*/
332          add        ecx,8           /*45*/
333         movq        [esi+1024],mm0 /*42 Store dyy sums*/
334          /*Stall*/
335         add         edx,8           /*46*/
336          add        edi,8           /*47*/
337         add         esi,8           /*48*/
338          dec        c               /*49*/
339         jnz         loopstart
340 
341         emms
342     }
343 
344 #else
345     int c,dd;
346 
347     for(c=0;c<nc;c++)
348     {
349         /*Filter vertically*/
350         dd=d2[c];
351         g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c];
352 
353         dd=d2[c+128];
354         g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128];
355 
356         dd=d2[c+256];
357         g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256];
358     }
359 #endif /*DB_USE_MMX*/
360 }
361 
362 /*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j
363 and with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width
364 and starting at (i,j-2)*/
db_HarrisStrength_row_f(float ** s,float * gxx,float * gxy,float * gyy,int i,int j,int chunk_width)365 inline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width)
366 {
367     float Gxx,Gxy,Gyy,det,trc;
368     int c;
369 
370     for(c=0;c<chunk_width;c++)
371     {
372         Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4];
373         Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4];
374         Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4];
375 
376         det=Gxx*Gyy-Gxy*Gxy;
377         trc=Gxx+Gyy;
378         s[i][j+c]=det-0.06f*trc*trc;
379     }
380 }
381 
382 /*Filter g of length 128 in place with 14641. Output is shifted two steps
383 and of length 124*/
db_Filter14641_128_i(int * g,int nc)384 inline void db_Filter14641_128_i(int *g,int nc)
385 {
386 #ifdef DB_USE_MMX
387     int mask;
388 
389     mask=0xFFFFFFFF;
390     _asm
391     {
392         mov esi,31
393         mov eax,g
394 
395         /*Get bitmask 00000000FFFFFFFF into mm7*/
396         movd mm7,mask
397 
398         /*Warming iteration one 1-16********************/
399         movq       mm6,[eax]      /*1 Load new data*/
400         paddd      mm0,mm6        /*2 Add 1* behind two steps*/
401         movq       mm2,mm6        /*3 Start with 1* in front two steps*/
402         pslld      mm6,1          /*4*/
403         paddd      mm1,mm6        /*5 Add 2* same place*/
404         pslld      mm6,1          /*6*/
405         paddd      mm1,mm6        /*7 Add 4* same place*/
406         pshufw     mm6,mm6,4Eh    /*8 Swap the two double-words using bitmask 01001110=4Eh*/
407         paddd      mm1,mm6        /*9 Add 4* swapped*/
408         movq       mm5,mm6        /*10 Copy*/
409         pand       mm6,mm7        /*11 Get low double-word only*/
410         paddd      mm2,mm6        /*12 Add 4* in front one step*/
411         pxor       mm6,mm5        /*13 Get high double-word only*/
412         paddd      mm0,mm6        /*14 Add 4* behind one step*/
413         movq       mm0,mm1        /*15 Shift along*/
414         movq       mm1,mm2        /*16 Shift along*/
415         /*Warming iteration two 17-32********************/
416         movq       mm4,[eax+8]    /*17 Load new data*/
417         paddd      mm0,mm4        /*18 Add 1* behind two steps*/
418         movq       mm2,mm4        /*19 Start with 1* in front two steps*/
419         pslld      mm4,1          /*20*/
420         paddd      mm1,mm4        /*21 Add 2* same place*/
421         pslld      mm4,1          /*22*/
422         paddd      mm1,mm4        /*23 Add 4* same place*/
423         pshufw     mm4,mm4,4Eh    /*24 Swap the two double-words using bitmask 01001110=4Eh*/
424         paddd      mm1,mm4        /*25 Add 4* swapped*/
425         movq       mm3,mm4        /*26 Copy*/
426         pand       mm4,mm7        /*27 Get low double-word only*/
427         paddd      mm2,mm4        /*28 Add 4* in front one step*/
428         pxor       mm4,mm3        /*29 Get high double-word only*/
429         paddd      mm0,mm4        /*30 Add 4* behind one step*/
430         movq       mm0,mm1        /*31 Shift along*/
431         movq       mm1,mm2        /*32 Shift along*/
432 
433         /*Loop********************/
434 loopstart:
435         /*First part of loop 33-47********/
436         movq        mm6,[eax+16]   /*33 Load new data*/
437          /*Stall*/
438         paddd       mm0,mm6        /*34 Add 1* behind two steps*/
439          movq       mm2,mm6        /*35 Start with 1* in front two steps*/
440         movq        mm4,[eax+24]   /*48 Load new data*/
441          pslld      mm6,1          /*36*/
442         paddd       mm1,mm6        /*37 Add 2* same place*/
443          pslld      mm6,1          /*38*/
444         paddd       mm1,mm6        /*39 Add 4* same place*/
445          pshufw     mm6,mm6,4Eh    /*40 Swap the two double-words using bitmask 01001110=4Eh*/
446         paddd       mm1,mm4        /*49 Add 1* behind two steps*/
447          movq       mm5,mm6        /*41 Copy*/
448         paddd       mm1,mm6        /*42 Add 4* swapped*/
449          pand       mm6,mm7        /*43 Get low double-word only*/
450         paddd       mm2,mm6        /*44 Add 4* in front one step*/
451          pxor       mm6,mm5        /*45 Get high double-word only*/
452         paddd       mm0,mm6        /*46 Add 4* behind one step*/
453          movq       mm6,mm4        /*50a Copy*/
454         pslld       mm4,1          /*51*/
455          /*Stall*/
456         movq        [eax],mm0      /*47 Store result two steps behind*/
457         /*Second part of loop 48-66********/
458          movq       mm0,mm6        /*50b Start with 1* in front two steps*/
459         paddd       mm2,mm4        /*52 Add 2* same place*/
460          pslld      mm4,1          /*53*/
461         paddd       mm2,mm4        /*54 Add 4* same place*/
462          pshufw     mm4,mm4,4Eh    /*55 Swap the two double-words using bitmask 01001110=4Eh*/
463         paddd       mm2,mm4        /*56 Add 4* swapped*/
464          movq       mm3,mm4        /*57 Copy*/
465         pand        mm4,mm7        /*58 Get low double-word only*/
466          /*Stall*/
467         paddd       mm0,mm4        /*59 Add 4* in front one step*/
468          pxor       mm4,mm3        /*60 Get high double-word only*/
469         paddd       mm1,mm4        /*61 Add 4* behind one step*/
470          add        eax,16         /*65*/
471         dec         esi            /*66*/
472          /*Stall*/
473         movq        [eax-8],mm1    /*62 Store result two steps behind*/
474          movq       mm1,mm0        /*63 Shift along*/
475         movq        mm0,mm2        /*64 Shift along*/
476         jnz loopstart
477 
478         emms
479     }
480 
481 #else
482     int c;
483 
484     for(c=0;c<nc-4;c++)
485     {
486         g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4];
487     }
488 #endif /*DB_USE_MMX*/
489 }
490 
491 /*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s
492 of length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought.
493 s should be 16 byte aligned*/
db_HarrisStrength_row_s(float * s,int * gxx,int * gxy,int * gyy,int nc)494 inline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc)
495 {
496     float k;
497 
498     k=0.06f;
499 
500     db_Filter14641_128_i(gxx,nc);
501     db_Filter14641_128_i(gxy,nc);
502     db_Filter14641_128_i(gyy,nc);
503 
504 #ifdef DB_USE_SIMD
505 
506 
507     _asm
508     {
509         mov esi,15
510         mov eax,gxx
511         mov ebx,gxy
512         mov ecx,gyy
513         mov edx,s
514 
515         /*broadcast k to all positions of xmm7*/
516         movss   xmm7,k
517         shufps  xmm7,xmm7,0
518 
519         /*****Warm up 1-10**************************************/
520         cvtpi2ps  xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/
521          /*Stall*/
522         cvtpi2ps  xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/
523          movlhps  xmm0,xmm0    /*2 Move them to the high double-word*/
524         cvtpi2ps  xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/
525          movlhps  xmm1,xmm1    /*5 Move them to the high double-word*/
526         cvtpi2ps  xmm0,[eax]   /*3 Convert two integers into floating point of low double-word*/
527          movlhps  xmm2,xmm2    /*8 Move them to the high double-word*/
528         cvtpi2ps  xmm1,[ebx]   /*6 Convert two integers into floating point of low double-word*/
529          movaps   xmm3,xmm0    /*10 Copy Cxx*/
530         cvtpi2ps  xmm2,[ecx]   /*9 Convert two integers into floating point of low double-word*/
531          /*Stall*/
532 loopstart:
533         /*****First part of loop 11-18***********************/
534         mulps     xmm0,xmm2     /*11 Multiply to get Gxx*Gyy*/
535          addps    xmm2,xmm3     /*12 Add to get Gxx+Gyy*/
536         cvtpi2ps  xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/
537          mulps    xmm1,xmm1     /*13 Multiply to get Gxy*Gxy*/
538         mulps     xmm2,xmm2     /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
539          movlhps  xmm4,xmm4     /*20 Move them to the high double-word*/
540         cvtpi2ps  xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/
541          /*Stall*/
542         subps     xmm0,xmm1     /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/
543          mulps    xmm2,xmm7     /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
544         cvtpi2ps  xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/
545          /*Stall*/
546         movlhps   xmm5,xmm5     /*23 Move them to the high double-word*/
547          /*Stall*/
548         cvtpi2ps  xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/
549          subps    xmm0,xmm2     /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
550         cvtpi2ps  xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/
551          /*Stall*/
552         movaps    [edx],xmm0    /*18 Store*/
553         /*****Second part of loop 26-40***********************/
554          movlhps  xmm6,xmm6     /*26 Move them to the high double-word*/
555         cvtpi2ps  xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/
556          movaps   xmm3,xmm4     /*28 Copy Cxx*/
557         mulps     xmm4,xmm6     /*29 Multiply to get Gxx*Gyy*/
558          addps    xmm6,xmm3     /*30 Add to get Gxx+Gyy*/
559         cvtpi2ps  xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/
560          mulps    xmm5,xmm5     /*31 Multiply to get Gxy*Gxy*/
561         cvtpi2ps  xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/
562          mulps    xmm6,xmm6     /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
563         cvtpi2ps  xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/
564          movlhps  xmm0,xmm0     /*(2 Next) Move them to the high double-word*/
565         subps     xmm4,xmm5     /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/
566          movlhps  xmm1,xmm1     /*(5 Next) Move them to the high double-word*/
567         cvtpi2ps  xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/
568          mulps    xmm6,xmm7     /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
569         cvtpi2ps  xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/
570          movlhps  xmm2,xmm2     /*(8 Next) Move them to the high double-word*/
571         movaps    xmm3,xmm0     /*(10 Next) Copy Cxx*/
572          add      eax,32        /*37*/
573         subps     xmm4,xmm6     /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
574          add      ebx,32        /*38*/
575         cvtpi2ps  xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/
576          /*Stall*/
577         movaps    [edx+16],xmm4 /*36 Store*/
578          /*Stall*/
579         add       ecx,32        /*39*/
580          add      edx,32        /*40*/
581         dec       esi           /*41*/
582         jnz loopstart
583 
584         /****Cool down***************/
585         mulps    xmm0,xmm2    /*Multiply to get Gxx*Gyy*/
586         addps    xmm2,xmm3    /*Add to get Gxx+Gyy*/
587         mulps    xmm1,xmm1    /*Multiply to get Gxy*Gxy*/
588         mulps    xmm2,xmm2    /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
589         subps    xmm0,xmm1    /*Subtract to get Gxx*Gyy-Gxy*Gxy*/
590         mulps    xmm2,xmm7    /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
591         subps    xmm0,xmm2    /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
592         movaps   [edx],xmm0   /*Store*/
593     }
594 
595 #else
596     float Gxx,Gxy,Gyy,det,trc;
597     int c;
598 
599     //for(c=0;c<124;c++)
600     for(c=0;c<nc-4;c++)
601     {
602         Gxx=(float)gxx[c];
603         Gxy=(float)gxy[c];
604         Gyy=(float)gyy[c];
605 
606         det=Gxx*Gyy-Gxy*Gxy;
607         trc=Gxx+Gyy;
608         s[c]=det-k*trc*trc;
609     }
610 #endif /*DB_USE_SIMD*/
611 }
612 
613 /*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and
614 store it into the corresponding region of s. left and top have to be at least 3 and
615 right and bottom have to be at most width-4,height-4*/
db_HarrisStrengthChunk_f(float ** s,const float * const * img,int left,int top,int right,int bottom,float * temp)616 inline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom,
617                                       /*temp should point to at least
618                                       13*(right-left+5) of allocated memory*/
619                                       float *temp)
620 {
621     float *Ix[5],*Iy[5];
622     float *gxx,*gxy,*gyy;
623     int i,chunk_width,chunk_width_p4;
624 
625     chunk_width=right-left+1;
626     chunk_width_p4=chunk_width+4;
627     gxx=temp;
628     gxy=gxx+chunk_width_p4;
629     gyy=gxy+chunk_width_p4;
630     for(i=0;i<5;i++)
631     {
632         Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4);
633         Iy[i]=Ix[i]+chunk_width_p4;
634     }
635 
636     /*Fill four rows of the wrap-around derivative buffers*/
637     for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4);
638 
639     /*For each output row*/
640     for(i=top;i<=bottom;i++)
641     {
642         /*Step the derivative buffers*/
643         db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4);
644 
645         /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
646         db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4,
647                                  Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5],
648                                  Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]);
649 
650         /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
651         db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width);
652     }
653 }
654 
655 /*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and
656 store it into the corresponding region of s. left and top have to be at least 3 and
657 right and bottom have to be at most width-4,height-4. The left of the region in s should
658 be 16 byte aligned*/
db_HarrisStrengthChunk_u(float ** s,const unsigned char * const * img,int left,int top,int bottom,int * temp,int nc)659 inline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom,
660                                       /*temp should point to at least
661                                       18*128 of allocated memory*/
662                                       int *temp, int nc)
663 {
664     int *Ixx[5],*Ixy[5],*Iyy[5];
665     int *gxx,*gxy,*gyy;
666     int i;
667 
668     gxx=temp;
669     gxy=gxx+128;
670     gyy=gxy+128;
671     for(i=0;i<5;i++)
672     {
673         Ixx[i]=gyy+(3*i+1)*128;
674         Ixy[i]=gyy+(3*i+2)*128;
675         Iyy[i]=gyy+(3*i+3)*128;
676     }
677 
678     /*Fill four rows of the wrap-around derivative buffers*/
679     for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc);
680 
681     /*For each output row*/
682     for(i=top;i<=bottom;i++)
683     {
684         /*Step the derivative buffers*/
685         db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc);
686 
687         /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
688         db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc);
689 
690         /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
691         db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc);
692     }
693 
694 }
695 
696 /*Compute Harris corner strength of img. Strength is returned for the region
697 with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
698 same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
699 for a meaningful result*/
db_HarrisStrength_f(float ** s,const float * const * img,int w,int h,float * temp,int chunk_width)700 void db_HarrisStrength_f(float **s,const float * const *img,int w,int h,
701                                     /*temp should point to at least
702                                     13*(chunk_width+4) of allocated memory*/
703                                     float *temp,
704                                     int chunk_width)
705 {
706     int x,next_x,last,right;
707 
708     last=w-4;
709     for(x=3;x<=last;x=next_x)
710     {
711         next_x=x+chunk_width;
712         right=next_x-1;
713         if(right>last) right=last;
714         /*Compute the Harris strength of a chunk*/
715         db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp);
716     }
717 }
718 
719 /*Compute Harris corner strength of img. Strength is returned for the region
720 with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
721 same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
722 for a meaningful result.Moreover, the image should be overallocated by 256 bytes.
723 s[i][3] should by 16 byte aligned for any i*/
db_HarrisStrength_u(float ** s,const unsigned char * const * img,int w,int h,int * temp)724 void db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h,
725                                     /*temp should point to at least
726                                     18*128 of allocated memory*/
727                                     int *temp)
728 {
729     int x,next_x,last;
730     int nc;
731 
732     last=w-4;
733     for(x=3;x<=last;x=next_x)
734     {
735         next_x=x+124;
736 
737         // mayban: to revert to the original full chunks state, change the line below to: nc = 128;
738         nc = db_mini(128,last-x+1);
739         //nc = 128;
740 
741         /*Compute the Harris strength of a chunk*/
742         db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc);
743     }
744 }
745 
db_Max_128Aligned16_f(float * v)746 inline float db_Max_128Aligned16_f(float *v)
747 {
748 #ifdef DB_USE_SIMD
749     float back;
750 
751     _asm
752     {
753         mov eax,v
754 
755         /*Chunk1*/
756         movaps xmm0,[eax]
757         movaps xmm1,[eax+16]
758         movaps xmm2,[eax+32]
759         movaps xmm3,[eax+48]
760         movaps xmm4,[eax+64]
761         movaps xmm5,[eax+80]
762         movaps xmm6,[eax+96]
763         movaps xmm7,[eax+112]
764 
765         /*Chunk2*/
766         maxps xmm0,[eax+128]
767         maxps xmm1,[eax+144]
768         maxps xmm2,[eax+160]
769         maxps xmm3,[eax+176]
770         maxps xmm4,[eax+192]
771         maxps xmm5,[eax+208]
772         maxps xmm6,[eax+224]
773         maxps xmm7,[eax+240]
774 
775         /*Chunk3*/
776         maxps xmm0,[eax+256]
777         maxps xmm1,[eax+272]
778         maxps xmm2,[eax+288]
779         maxps xmm3,[eax+304]
780         maxps xmm4,[eax+320]
781         maxps xmm5,[eax+336]
782         maxps xmm6,[eax+352]
783         maxps xmm7,[eax+368]
784 
785         /*Chunk4*/
786         maxps xmm0,[eax+384]
787         maxps xmm1,[eax+400]
788         maxps xmm2,[eax+416]
789         maxps xmm3,[eax+432]
790         maxps xmm4,[eax+448]
791         maxps xmm5,[eax+464]
792         maxps xmm6,[eax+480]
793         maxps xmm7,[eax+496]
794 
795         /*Collect*/
796         maxps   xmm0,xmm1
797         maxps   xmm2,xmm3
798         maxps   xmm4,xmm5
799         maxps   xmm6,xmm7
800         maxps   xmm0,xmm2
801         maxps   xmm4,xmm6
802         maxps   xmm0,xmm4
803         movhlps xmm1,xmm0
804         maxps   xmm0,xmm1
805         shufps  xmm1,xmm0,1
806         maxps   xmm0,xmm1
807         movss   back,xmm0
808     }
809 
810     return(back);
811 #else
812     float val,max_val;
813     float *p,*stop_p;
814     max_val=v[0];
815     for(p=v+1,stop_p=v+128;p!=stop_p;)
816     {
817         val= *p++;
818         if(val>max_val) max_val=val;
819     }
820     return(max_val);
821 #endif /*DB_USE_SIMD*/
822 }
823 
db_Max_64Aligned16_f(float * v)824 inline float db_Max_64Aligned16_f(float *v)
825 {
826 #ifdef DB_USE_SIMD
827     float back;
828 
829     _asm
830     {
831         mov eax,v
832 
833         /*Chunk1*/
834         movaps xmm0,[eax]
835         movaps xmm1,[eax+16]
836         movaps xmm2,[eax+32]
837         movaps xmm3,[eax+48]
838         movaps xmm4,[eax+64]
839         movaps xmm5,[eax+80]
840         movaps xmm6,[eax+96]
841         movaps xmm7,[eax+112]
842 
843         /*Chunk2*/
844         maxps xmm0,[eax+128]
845         maxps xmm1,[eax+144]
846         maxps xmm2,[eax+160]
847         maxps xmm3,[eax+176]
848         maxps xmm4,[eax+192]
849         maxps xmm5,[eax+208]
850         maxps xmm6,[eax+224]
851         maxps xmm7,[eax+240]
852 
853         /*Collect*/
854         maxps   xmm0,xmm1
855         maxps   xmm2,xmm3
856         maxps   xmm4,xmm5
857         maxps   xmm6,xmm7
858         maxps   xmm0,xmm2
859         maxps   xmm4,xmm6
860         maxps   xmm0,xmm4
861         movhlps xmm1,xmm0
862         maxps   xmm0,xmm1
863         shufps  xmm1,xmm0,1
864         maxps   xmm0,xmm1
865         movss   back,xmm0
866     }
867 
868     return(back);
869 #else
870     float val,max_val;
871     float *p,*stop_p;
872     max_val=v[0];
873     for(p=v+1,stop_p=v+64;p!=stop_p;)
874     {
875         val= *p++;
876         if(val>max_val) max_val=val;
877     }
878     return(max_val);
879 #endif /*DB_USE_SIMD*/
880 }
881 
db_Max_32Aligned16_f(float * v)882 inline float db_Max_32Aligned16_f(float *v)
883 {
884 #ifdef DB_USE_SIMD
885     float back;
886 
887     _asm
888     {
889         mov eax,v
890 
891         /*Chunk1*/
892         movaps xmm0,[eax]
893         movaps xmm1,[eax+16]
894         movaps xmm2,[eax+32]
895         movaps xmm3,[eax+48]
896         movaps xmm4,[eax+64]
897         movaps xmm5,[eax+80]
898         movaps xmm6,[eax+96]
899         movaps xmm7,[eax+112]
900 
901         /*Collect*/
902         maxps   xmm0,xmm1
903         maxps   xmm2,xmm3
904         maxps   xmm4,xmm5
905         maxps   xmm6,xmm7
906         maxps   xmm0,xmm2
907         maxps   xmm4,xmm6
908         maxps   xmm0,xmm4
909         movhlps xmm1,xmm0
910         maxps   xmm0,xmm1
911         shufps  xmm1,xmm0,1
912         maxps   xmm0,xmm1
913         movss   back,xmm0
914     }
915 
916     return(back);
917 #else
918     float val,max_val;
919     float *p,*stop_p;
920     max_val=v[0];
921     for(p=v+1,stop_p=v+32;p!=stop_p;)
922     {
923         val= *p++;
924         if(val>max_val) max_val=val;
925     }
926     return(max_val);
927 #endif /*DB_USE_SIMD*/
928 }
929 
db_Max_16Aligned16_f(float * v)930 inline float db_Max_16Aligned16_f(float *v)
931 {
932 #ifdef DB_USE_SIMD
933     float back;
934 
935     _asm
936     {
937         mov eax,v
938 
939         /*Chunk1*/
940         movaps xmm0,[eax]
941         movaps xmm1,[eax+16]
942         movaps xmm2,[eax+32]
943         movaps xmm3,[eax+48]
944 
945         /*Collect*/
946         maxps   xmm0,xmm1
947         maxps   xmm2,xmm3
948         maxps   xmm0,xmm2
949         movhlps xmm1,xmm0
950         maxps   xmm0,xmm1
951         shufps  xmm1,xmm0,1
952         maxps   xmm0,xmm1
953         movss   back,xmm0
954     }
955 
956     return(back);
957 #else
958     float val,max_val;
959     float *p,*stop_p;
960     max_val=v[0];
961     for(p=v+1,stop_p=v+16;p!=stop_p;)
962     {
963         val= *p++;
964         if(val>max_val) max_val=val;
965     }
966     return(max_val);
967 #endif /*DB_USE_SIMD*/
968 }
969 
db_Max_8Aligned16_f(float * v)970 inline float db_Max_8Aligned16_f(float *v)
971 {
972 #ifdef DB_USE_SIMD
973     float back;
974 
975     _asm
976     {
977         mov eax,v
978 
979         /*Chunk1*/
980         movaps xmm0,[eax]
981         movaps xmm1,[eax+16]
982 
983         /*Collect*/
984         maxps   xmm0,xmm1
985         movhlps xmm1,xmm0
986         maxps   xmm0,xmm1
987         shufps  xmm1,xmm0,1
988         maxps   xmm0,xmm1
989         movss   back,xmm0
990     }
991 
992     return(back);
993 #else
994     float val,max_val;
995     float *p,*stop_p;
996     max_val=v[0];
997     for(p=v+1,stop_p=v+8;p!=stop_p;)
998     {
999         val= *p++;
1000         if(val>max_val) max_val=val;
1001     }
1002     return(max_val);
1003 #endif /*DB_USE_SIMD*/
1004 }
1005 
db_Max_Aligned16_f(float * v,int size)1006 inline float db_Max_Aligned16_f(float *v,int size)
1007 {
1008     float val,max_val;
1009     float *stop_v;
1010 
1011     max_val=v[0];
1012     for(;size>=128;size-=128)
1013     {
1014         val=db_Max_128Aligned16_f(v);
1015         v+=128;
1016         if(val>max_val) max_val=val;
1017     }
1018     if(size&64)
1019     {
1020         val=db_Max_64Aligned16_f(v);
1021         v+=64;
1022         if(val>max_val) max_val=val;
1023     }
1024     if(size&32)
1025     {
1026         val=db_Max_32Aligned16_f(v);
1027         v+=32;
1028         if(val>max_val) max_val=val;
1029     }
1030     if(size&16)
1031     {
1032         val=db_Max_16Aligned16_f(v);
1033         v+=16;
1034         if(val>max_val) max_val=val;
1035     }
1036     if(size&8)
1037     {
1038         val=db_Max_8Aligned16_f(v);
1039         v+=8;
1040         if(val>max_val) max_val=val;
1041     }
1042     if(size&7)
1043     {
1044         for(stop_v=v+(size&7);v!=stop_v;)
1045         {
1046             val= *v++;
1047             if(val>max_val) max_val=val;
1048         }
1049     }
1050 
1051     return(max_val);
1052 }
1053 
1054 /*Find maximum value of img in the region starting at (left,top)
1055 and with width w and height h. img[left] should be 16 byte aligned*/
db_MaxImage_Aligned16_f(float ** img,int left,int top,int w,int h)1056 float db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h)
1057 {
1058     float val,max_val;
1059     int i,stop_i;
1060 
1061     if(w && h)
1062     {
1063         stop_i=top+h;
1064         max_val=img[top][left];
1065 
1066         for(i=top;i<stop_i;i++)
1067         {
1068             val=db_Max_Aligned16_f(img[i]+left,w);
1069             if(val>max_val) max_val=val;
1070         }
1071         return(max_val);
1072     }
1073     return(0.0);
1074 }
1075 
db_MaxVector_128_Aligned16_f(float * m,float * v1,float * v2)1076 inline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2)
1077 {
1078 #ifdef DB_USE_SIMD
1079     _asm
1080     {
1081         mov eax,v1
1082         mov ebx,v2
1083         mov ecx,m
1084 
1085         /*Chunk1*/
1086         movaps xmm0,[eax]
1087         movaps xmm1,[eax+16]
1088         movaps xmm2,[eax+32]
1089         movaps xmm3,[eax+48]
1090         movaps xmm4,[eax+64]
1091         movaps xmm5,[eax+80]
1092         movaps xmm6,[eax+96]
1093         movaps xmm7,[eax+112]
1094         maxps  xmm0,[ebx]
1095         maxps  xmm1,[ebx+16]
1096         maxps  xmm2,[ebx+32]
1097         maxps  xmm3,[ebx+48]
1098         maxps  xmm4,[ebx+64]
1099         maxps  xmm5,[ebx+80]
1100         maxps  xmm6,[ebx+96]
1101         maxps  xmm7,[ebx+112]
1102         movaps [ecx],xmm0
1103         movaps [ecx+16],xmm1
1104         movaps [ecx+32],xmm2
1105         movaps [ecx+48],xmm3
1106         movaps [ecx+64],xmm4
1107         movaps [ecx+80],xmm5
1108         movaps [ecx+96],xmm6
1109         movaps [ecx+112],xmm7
1110 
1111         /*Chunk2*/
1112         movaps xmm0,[eax+128]
1113         movaps xmm1,[eax+144]
1114         movaps xmm2,[eax+160]
1115         movaps xmm3,[eax+176]
1116         movaps xmm4,[eax+192]
1117         movaps xmm5,[eax+208]
1118         movaps xmm6,[eax+224]
1119         movaps xmm7,[eax+240]
1120         maxps  xmm0,[ebx+128]
1121         maxps  xmm1,[ebx+144]
1122         maxps  xmm2,[ebx+160]
1123         maxps  xmm3,[ebx+176]
1124         maxps  xmm4,[ebx+192]
1125         maxps  xmm5,[ebx+208]
1126         maxps  xmm6,[ebx+224]
1127         maxps  xmm7,[ebx+240]
1128         movaps [ecx+128],xmm0
1129         movaps [ecx+144],xmm1
1130         movaps [ecx+160],xmm2
1131         movaps [ecx+176],xmm3
1132         movaps [ecx+192],xmm4
1133         movaps [ecx+208],xmm5
1134         movaps [ecx+224],xmm6
1135         movaps [ecx+240],xmm7
1136 
1137         /*Chunk3*/
1138         movaps xmm0,[eax+256]
1139         movaps xmm1,[eax+272]
1140         movaps xmm2,[eax+288]
1141         movaps xmm3,[eax+304]
1142         movaps xmm4,[eax+320]
1143         movaps xmm5,[eax+336]
1144         movaps xmm6,[eax+352]
1145         movaps xmm7,[eax+368]
1146         maxps  xmm0,[ebx+256]
1147         maxps  xmm1,[ebx+272]
1148         maxps  xmm2,[ebx+288]
1149         maxps  xmm3,[ebx+304]
1150         maxps  xmm4,[ebx+320]
1151         maxps  xmm5,[ebx+336]
1152         maxps  xmm6,[ebx+352]
1153         maxps  xmm7,[ebx+368]
1154         movaps [ecx+256],xmm0
1155         movaps [ecx+272],xmm1
1156         movaps [ecx+288],xmm2
1157         movaps [ecx+304],xmm3
1158         movaps [ecx+320],xmm4
1159         movaps [ecx+336],xmm5
1160         movaps [ecx+352],xmm6
1161         movaps [ecx+368],xmm7
1162 
1163         /*Chunk4*/
1164         movaps xmm0,[eax+384]
1165         movaps xmm1,[eax+400]
1166         movaps xmm2,[eax+416]
1167         movaps xmm3,[eax+432]
1168         movaps xmm4,[eax+448]
1169         movaps xmm5,[eax+464]
1170         movaps xmm6,[eax+480]
1171         movaps xmm7,[eax+496]
1172         maxps  xmm0,[ebx+384]
1173         maxps  xmm1,[ebx+400]
1174         maxps  xmm2,[ebx+416]
1175         maxps  xmm3,[ebx+432]
1176         maxps  xmm4,[ebx+448]
1177         maxps  xmm5,[ebx+464]
1178         maxps  xmm6,[ebx+480]
1179         maxps  xmm7,[ebx+496]
1180         movaps [ecx+384],xmm0
1181         movaps [ecx+400],xmm1
1182         movaps [ecx+416],xmm2
1183         movaps [ecx+432],xmm3
1184         movaps [ecx+448],xmm4
1185         movaps [ecx+464],xmm5
1186         movaps [ecx+480],xmm6
1187         movaps [ecx+496],xmm7
1188     }
1189 #else
1190     int i;
1191     float a,b;
1192     for(i=0;i<128;i++)
1193     {
1194         a=v1[i];
1195         b=v2[i];
1196         if(a>=b) m[i]=a;
1197         else m[i]=b;
1198     }
1199 #endif /*DB_USE_SIMD*/
1200 }
1201 
db_MaxVector_128_SecondSourceDestAligned16_f(float * m,float * v1,float * v2)1202 inline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2)
1203 {
1204 #ifdef DB_USE_SIMD
1205     _asm
1206     {
1207         mov eax,v1
1208         mov ebx,v2
1209         mov ecx,m
1210 
1211         /*Chunk1*/
1212         movups xmm0,[eax]
1213         movups xmm1,[eax+16]
1214         movups xmm2,[eax+32]
1215         movups xmm3,[eax+48]
1216         movups xmm4,[eax+64]
1217         movups xmm5,[eax+80]
1218         movups xmm6,[eax+96]
1219         movups xmm7,[eax+112]
1220         maxps  xmm0,[ebx]
1221         maxps  xmm1,[ebx+16]
1222         maxps  xmm2,[ebx+32]
1223         maxps  xmm3,[ebx+48]
1224         maxps  xmm4,[ebx+64]
1225         maxps  xmm5,[ebx+80]
1226         maxps  xmm6,[ebx+96]
1227         maxps  xmm7,[ebx+112]
1228         movaps [ecx],xmm0
1229         movaps [ecx+16],xmm1
1230         movaps [ecx+32],xmm2
1231         movaps [ecx+48],xmm3
1232         movaps [ecx+64],xmm4
1233         movaps [ecx+80],xmm5
1234         movaps [ecx+96],xmm6
1235         movaps [ecx+112],xmm7
1236 
1237         /*Chunk2*/
1238         movups xmm0,[eax+128]
1239         movups xmm1,[eax+144]
1240         movups xmm2,[eax+160]
1241         movups xmm3,[eax+176]
1242         movups xmm4,[eax+192]
1243         movups xmm5,[eax+208]
1244         movups xmm6,[eax+224]
1245         movups xmm7,[eax+240]
1246         maxps  xmm0,[ebx+128]
1247         maxps  xmm1,[ebx+144]
1248         maxps  xmm2,[ebx+160]
1249         maxps  xmm3,[ebx+176]
1250         maxps  xmm4,[ebx+192]
1251         maxps  xmm5,[ebx+208]
1252         maxps  xmm6,[ebx+224]
1253         maxps  xmm7,[ebx+240]
1254         movaps [ecx+128],xmm0
1255         movaps [ecx+144],xmm1
1256         movaps [ecx+160],xmm2
1257         movaps [ecx+176],xmm3
1258         movaps [ecx+192],xmm4
1259         movaps [ecx+208],xmm5
1260         movaps [ecx+224],xmm6
1261         movaps [ecx+240],xmm7
1262 
1263         /*Chunk3*/
1264         movups xmm0,[eax+256]
1265         movups xmm1,[eax+272]
1266         movups xmm2,[eax+288]
1267         movups xmm3,[eax+304]
1268         movups xmm4,[eax+320]
1269         movups xmm5,[eax+336]
1270         movups xmm6,[eax+352]
1271         movups xmm7,[eax+368]
1272         maxps  xmm0,[ebx+256]
1273         maxps  xmm1,[ebx+272]
1274         maxps  xmm2,[ebx+288]
1275         maxps  xmm3,[ebx+304]
1276         maxps  xmm4,[ebx+320]
1277         maxps  xmm5,[ebx+336]
1278         maxps  xmm6,[ebx+352]
1279         maxps  xmm7,[ebx+368]
1280         movaps [ecx+256],xmm0
1281         movaps [ecx+272],xmm1
1282         movaps [ecx+288],xmm2
1283         movaps [ecx+304],xmm3
1284         movaps [ecx+320],xmm4
1285         movaps [ecx+336],xmm5
1286         movaps [ecx+352],xmm6
1287         movaps [ecx+368],xmm7
1288 
1289         /*Chunk4*/
1290         movups xmm0,[eax+384]
1291         movups xmm1,[eax+400]
1292         movups xmm2,[eax+416]
1293         movups xmm3,[eax+432]
1294         movups xmm4,[eax+448]
1295         movups xmm5,[eax+464]
1296         movups xmm6,[eax+480]
1297         movups xmm7,[eax+496]
1298         maxps  xmm0,[ebx+384]
1299         maxps  xmm1,[ebx+400]
1300         maxps  xmm2,[ebx+416]
1301         maxps  xmm3,[ebx+432]
1302         maxps  xmm4,[ebx+448]
1303         maxps  xmm5,[ebx+464]
1304         maxps  xmm6,[ebx+480]
1305         maxps  xmm7,[ebx+496]
1306         movaps [ecx+384],xmm0
1307         movaps [ecx+400],xmm1
1308         movaps [ecx+416],xmm2
1309         movaps [ecx+432],xmm3
1310         movaps [ecx+448],xmm4
1311         movaps [ecx+464],xmm5
1312         movaps [ecx+480],xmm6
1313         movaps [ecx+496],xmm7
1314     }
1315 #else
1316     int i;
1317     float a,b;
1318     for(i=0;i<128;i++)
1319     {
1320         a=v1[i];
1321         b=v2[i];
1322         if(a>=b) m[i]=a;
1323         else m[i]=b;
1324     }
1325 #endif /*DB_USE_SIMD*/
1326 }
1327 
1328 /*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and
1329 stopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row.
1330 The input s should be of width at least 128, and exist for 2 pixels outside the specified region.
1331 s[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/
db_MaxSuppressFilterChunk_5x5_Aligned16_f(float ** sf,float ** s,int left,int top,int bottom,float * temp)1332 inline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom,
1333                                       /*temp should point to at least
1334                                       6*132 floats of 16-byte-aligned allocated memory*/
1335                                       float *temp)
1336 {
1337 #ifdef DB_USE_SIMD
1338     int i,lm2;
1339     float *two[4];
1340     float *four,*five;
1341 
1342     lm2=left-2;
1343 
1344     /*Set pointers to pre-allocated memory*/
1345     four=temp;
1346     five=four+132;
1347     for(i=0;i<4;i++)
1348     {
1349         two[i]=five+(i+1)*132;
1350     }
1351 
1352     /*Set rests of four and five to zero to avoid
1353     floating point exceptions*/
1354     for(i=129;i<132;i++)
1355     {
1356         four[i]=0.0;
1357         five[i]=0.0;
1358     }
1359 
1360     /*Fill three rows of the wrap-around max buffers*/
1361     for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
1362 
1363     /*For each output row*/
1364     for(;i<=bottom;i++)
1365     {
1366         /*Compute max of the lowest pair of rows in the five row window*/
1367         db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
1368         /*Compute max of the lowest and highest pair of rows in the five row window*/
1369         db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]);
1370         /*Compute max of all rows*/
1371         db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]);
1372         /*Compute max of 2x5 chunks*/
1373         db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five);
1374         /*Compute max of pairs of 2x5 chunks*/
1375         db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five);
1376         /*Compute max of pairs of 5x5 except middle*/
1377         db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five);
1378     }
1379 
1380 #else
1381     int i,j,right;
1382     float sv;
1383 
1384     right=left+128;
1385     for(i=top;i<=bottom;i++) for(j=left;j<right;j++)
1386     {
1387         sv=s[i][j];
1388 
1389         if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] &&
1390             sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] &&
1391             sv>s[  i][j-2] && sv>s[  i][j-1] &&                 sv>s[  i][j+1] && sv>s[  i][j+2] &&
1392             sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] &&
1393             sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2])
1394         {
1395             sf[i][j-2]=0.0;
1396         }
1397         else sf[i][j-2]=sv;
1398     }
1399 #endif /*DB_USE_SIMD*/
1400 }
1401 
1402 /*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and
1403 stopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels
1404 outside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned.
1405 Top must be at least 3. Reading and writing from and to the input and output images is done
1406 as if the region had a width equal to a multiple of 124. If this is not the case, the images
1407 should be over-allocated and the input cleared for a sufficient region*/
db_MaxSuppressFilter_5x5_Aligned16_f(float ** sf,float ** s,int left,int top,int right,int bottom,float * temp)1408 void db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom,
1409                                           /*temp should point to at least
1410                                           6*132 floats of 16-byte-aligned allocated memory*/
1411                                           float *temp)
1412 {
1413     int x,next_x;
1414 
1415     for(x=left;x<=right;x=next_x)
1416     {
1417         next_x=x+124;
1418         db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp);
1419     }
1420 }
1421 
1422 /*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp
1423 which should point to space of at least as many positions as there are pixels in the chunk*/
db_CornersFromChunk(float ** strength,int left,int top,int right,int bottom,float threshold,double * x_temp,double * y_temp,double * s_temp)1424 inline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp)
1425 {
1426     int i,j,nr;
1427     float s;
1428 
1429     nr=0;
1430     for(i=top;i<=bottom;i++) for(j=left;j<=right;j++)
1431     {
1432         s=strength[i][j];
1433 
1434         if(s>=threshold &&
1435             s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] &&
1436             s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] &&
1437             s>strength[  i][j-2] && s>strength[  i][j-1] &&                       s>strength[  i][j+1] && s>strength[  i][j+2] &&
1438             s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] &&
1439             s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2])
1440         {
1441             x_temp[nr]=(double) j;
1442             y_temp[nr]=(double) i;
1443             s_temp[nr]=(double) s;
1444             nr++;
1445         }
1446     }
1447     return(nr);
1448 }
1449 
1450 
1451 //Sub-pixel accuracy using 2D quadratic interpolation.(YCJ)
db_SubPixel(float ** strength,const double xd,const double yd,double & xs,double & ys)1452 inline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys)
1453 {
1454     int x = (int) xd;
1455     int y = (int) yd;
1456 
1457     float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1];
1458     float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x];
1459     float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0;
1460 
1461     float denom = (fxx * fyy - fxy * fxy) * (float) 2.0;
1462 
1463     xs = xd;
1464     ys = yd;
1465 
1466     if ( db_absf(denom) <= FLT_EPSILON )
1467     {
1468         return;
1469     }
1470     else
1471     {
1472         float fx = strength[y][x+1] - strength[y][x-1];
1473         float fy = strength[y+1][x] - strength[y-1][x];
1474 
1475         float dx = (fyy * fx - fxy * fy) / denom;
1476         float dy = (fxx * fy - fxy * fx) / denom;
1477 
1478         if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 )
1479         {
1480             return;
1481         }
1482         else
1483         {
1484             xs -= dx;
1485             ys -= dy;
1486         }
1487     }
1488 
1489     return;
1490 }
1491 
1492 /*Extract corners from the image part from (left,top) to (right,bottom).
1493 Store in x and y, extracting at most satnr corners in each block of size (bw,bh).
1494 The pointer temp_d should point to at least 5*bw*bh positions.
1495 area_factor holds how many corners max to extract per 10000 pixels*/
db_ExtractCornersSaturated(float ** strength,int left,int top,int right,int bottom,int bw,int bh,unsigned long area_factor,float threshold,double * temp_d,double * x_coord,double * y_coord,int * nr_corners)1496 void db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom,
1497                                 int bw,int bh,unsigned long area_factor,
1498                                 float threshold,double *temp_d,
1499                                 double *x_coord,double *y_coord,int *nr_corners)
1500 {
1501     double *x_temp,*y_temp,*s_temp,*select_temp;
1502     double loc_thresh;
1503     unsigned long bwbh,area,saturation;
1504     int x,next_x,last_x;
1505     int y,next_y,last_y;
1506     int nr,nr_points,i,stop;
1507 
1508     bwbh=bw*bh;
1509     x_temp=temp_d;
1510     y_temp=x_temp+bwbh;
1511     s_temp=y_temp+bwbh;
1512     select_temp=s_temp+bwbh;
1513 
1514 #ifdef DB_SUB_PIXEL
1515     // subpixel processing may sometimes push the corner ourside the real border
1516     // increasing border size:
1517     left++;
1518     top++;
1519     bottom--;
1520     right--;
1521 #endif /*DB_SUB_PIXEL*/
1522 
1523     nr_points=0;
1524     for(y=top;y<=bottom;y=next_y)
1525     {
1526         next_y=y+bh;
1527         last_y=next_y-1;
1528         if(last_y>bottom) last_y=bottom;
1529         for(x=left;x<=right;x=next_x)
1530         {
1531             next_x=x+bw;
1532             last_x=next_x-1;
1533             if(last_x>right) last_x=right;
1534 
1535             area=(last_x-x+1)*(last_y-y+1);
1536             saturation=(area*area_factor)/10000;
1537             nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp);
1538             if(nr)
1539             {
1540                 if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp);
1541                 else loc_thresh=threshold;
1542 
1543                 stop=nr_points+saturation;
1544                 for(i=0;(i<nr)&&(nr_points<stop);i++)
1545                 {
1546                     if(s_temp[i]>=loc_thresh)
1547                     {
1548                         #ifdef DB_SUB_PIXEL
1549                                db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]);
1550                         #else
1551                                x_coord[nr_points]=x_temp[i];
1552                                y_coord[nr_points]=y_temp[i];
1553                         #endif
1554 
1555                         nr_points++;
1556                     }
1557                 }
1558             }
1559         }
1560     }
1561     *nr_corners=nr_points;
1562 }
1563 
db_CornerDetector_f()1564 db_CornerDetector_f::db_CornerDetector_f()
1565 {
1566     m_w=0; m_h=0;
1567 }
1568 
~db_CornerDetector_f()1569 db_CornerDetector_f::~db_CornerDetector_f()
1570 {
1571     Clean();
1572 }
1573 
Clean()1574 void db_CornerDetector_f::Clean()
1575 {
1576     if(m_w!=0)
1577     {
1578         delete [] m_temp_f;
1579         delete [] m_temp_d;
1580         db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
1581     }
1582     m_w=0; m_h=0;
1583 }
1584 
Init(int im_width,int im_height,int target_nr_corners,int nr_horizontal_blocks,int nr_vertical_blocks,double absolute_threshold,double relative_threshold)1585 unsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners,
1586                             int nr_horizontal_blocks,int nr_vertical_blocks,
1587                             double absolute_threshold,double relative_threshold)
1588 {
1589     int chunkwidth=208;
1590     int block_width,block_height;
1591     unsigned long area_factor;
1592     int active_width,active_height;
1593 
1594     active_width=db_maxi(1,im_width-10);
1595     active_height=db_maxi(1,im_height-10);
1596     block_width=db_maxi(1,active_width/nr_horizontal_blocks);
1597     block_height=db_maxi(1,active_height/nr_vertical_blocks);
1598 
1599     area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
1600         (((double)active_width)*((double)active_height)))));
1601 
1602     return(Start(im_width,im_height,block_width,block_height,area_factor,
1603         absolute_threshold,relative_threshold,chunkwidth));
1604 }
1605 
Start(int im_width,int im_height,int block_width,int block_height,unsigned long area_factor,double absolute_threshold,double relative_threshold,int chunkwidth)1606 unsigned long db_CornerDetector_f::Start(int im_width,int im_height,
1607                              int block_width,int block_height,unsigned long area_factor,
1608                              double absolute_threshold,double relative_threshold,int chunkwidth)
1609 {
1610     Clean();
1611 
1612     m_w=im_width;
1613     m_h=im_height;
1614     m_cw=chunkwidth;
1615     m_bw=block_width;
1616     m_bh=block_height;
1617     m_area_factor=area_factor;
1618     m_r_thresh=relative_threshold;
1619     m_a_thresh=absolute_threshold;
1620     m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
1621 
1622     m_temp_f=new float[13*(m_cw+4)];
1623     m_temp_d=new double[5*m_bw*m_bh];
1624     m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
1625 
1626     return(m_max_nr);
1627 }
1628 
DetectCorners(const float * const * img,double * x_coord,double * y_coord,int * nr_corners) const1629 void db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const
1630 {
1631     float max_val,threshold;
1632 
1633     db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw);
1634 
1635     if(m_r_thresh)
1636     {
1637         max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
1638         threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
1639     }
1640     else threshold= (float) m_a_thresh;
1641 
1642     db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
1643         m_temp_d,x_coord,y_coord,nr_corners);
1644 }
1645 
db_CornerDetector_u()1646 db_CornerDetector_u::db_CornerDetector_u()
1647 {
1648     m_w=0; m_h=0;
1649 }
1650 
~db_CornerDetector_u()1651 db_CornerDetector_u::~db_CornerDetector_u()
1652 {
1653     Clean();
1654 }
1655 
db_CornerDetector_u(const db_CornerDetector_u & cd)1656 db_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd)
1657 {
1658     Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
1659         cd.m_a_thresh, cd.m_r_thresh);
1660 }
1661 
operator =(const db_CornerDetector_u & cd)1662 db_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd)
1663 {
1664     if ( this == &cd ) return *this;
1665 
1666     Clean();
1667 
1668     Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
1669         cd.m_a_thresh, cd.m_r_thresh);
1670 
1671     return *this;
1672 }
1673 
Clean()1674 void db_CornerDetector_u::Clean()
1675 {
1676     if(m_w!=0)
1677     {
1678         delete [] m_temp_i;
1679         delete [] m_temp_d;
1680         db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
1681     }
1682     m_w=0; m_h=0;
1683 }
1684 
Init(int im_width,int im_height,int target_nr_corners,int nr_horizontal_blocks,int nr_vertical_blocks,double absolute_threshold,double relative_threshold)1685 unsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners,
1686                             int nr_horizontal_blocks,int nr_vertical_blocks,
1687                             double absolute_threshold,double relative_threshold)
1688 {
1689     int block_width,block_height;
1690     unsigned long area_factor;
1691     int active_width,active_height;
1692 
1693     active_width=db_maxi(1,im_width-10);
1694     active_height=db_maxi(1,im_height-10);
1695     block_width=db_maxi(1,active_width/nr_horizontal_blocks);
1696     block_height=db_maxi(1,active_height/nr_vertical_blocks);
1697 
1698     area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
1699         (((double)active_width)*((double)active_height)))));
1700 
1701     return(Start(im_width,im_height,block_width,block_height,area_factor,
1702         16.0*absolute_threshold,relative_threshold));
1703 }
1704 
Start(int im_width,int im_height,int block_width,int block_height,unsigned long area_factor,double absolute_threshold,double relative_threshold)1705 unsigned long db_CornerDetector_u::Start(int im_width,int im_height,
1706                              int block_width,int block_height,unsigned long area_factor,
1707                              double absolute_threshold,double relative_threshold)
1708 {
1709     Clean();
1710 
1711     m_w=im_width;
1712     m_h=im_height;
1713     m_bw=block_width;
1714     m_bh=block_height;
1715     m_area_factor=area_factor;
1716     m_r_thresh=relative_threshold;
1717     m_a_thresh=absolute_threshold;
1718     m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
1719 
1720     m_temp_i=new int[18*128];
1721     m_temp_d=new double[5*m_bw*m_bh];
1722     m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
1723 
1724     return(m_max_nr);
1725 }
1726 
DetectCorners(const unsigned char * const * img,double * x_coord,double * y_coord,int * nr_corners,const unsigned char * const * msk,unsigned char fgnd) const1727 void db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
1728                                         const unsigned char * const *msk, unsigned char fgnd) const
1729 {
1730     float max_val,threshold;
1731 
1732     db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i);
1733 
1734 
1735     if(m_r_thresh)
1736     {
1737         max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
1738         threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
1739     }
1740     else threshold= (float) m_a_thresh;
1741 
1742     db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
1743         m_temp_d,x_coord,y_coord,nr_corners);
1744 
1745 
1746     if ( msk )
1747     {
1748         int nr_corners_mask=0;
1749 
1750         for ( int i = 0; i < *nr_corners; ++i)
1751         {
1752             int cor_x = db_roundi(*(x_coord+i));
1753             int cor_y = db_roundi(*(y_coord+i));
1754             if ( msk[cor_y][cor_x] == fgnd )
1755             {
1756                 x_coord[nr_corners_mask] = x_coord[i];
1757                 y_coord[nr_corners_mask] = y_coord[i];
1758                 nr_corners_mask++;
1759             }
1760         }
1761         *nr_corners = nr_corners_mask;
1762     }
1763 }
1764 
ExtractCorners(float ** strength,double * x_coord,double * y_coord,int * nr_corners)1765 void db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) {
1766     if ( m_w!=0 )
1767         db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh),
1768             m_temp_d,x_coord,y_coord,nr_corners);
1769 }
1770 
1771