1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
18
19 /*****************************************************************
20 * Lean and mean begins here *
21 *****************************************************************/
22
23 #include "db_utilities.h"
24 #include "db_feature_detection.h"
25 #ifdef _VERBOSE_
26 #include <iostream>
27 #endif
28 #include <float.h>
29
30 #define DB_SUB_PIXEL
31
32 #define BORDER 10 // 5
33
db_AllocStrengthImage_f(float ** im,int w,int h)34 float** db_AllocStrengthImage_f(float **im,int w,int h)
35 {
36 int i,n,aw;
37 long c,size;
38 float **img,*aim,*p;
39
40 /*Determine number of 124 element chunks needed*/
41 n=(db_maxi(1,w-6)+123)/124;
42 /*Determine the total allocation width aw*/
43 aw=n*124+8;
44 /*Allocate*/
45 size=aw*h+16;
46 *im=new float [size];
47 /*Clean up*/
48 p=(*im);
49 for(c=0;c<size;c++) p[c]=0.0;
50 /*Get a 16 byte aligned pointer*/
51 aim=db_AlignPointer_f(*im,16);
52 /*Allocate pointer table*/
53 img=new float* [h];
54 /*Initialize the pointer table*/
55 for(i=0;i<h;i++)
56 {
57 img[i]=aim+aw*i+1;
58 }
59
60 return(img);
61 }
62
db_FreeStrengthImage_f(float * im,float ** img,int h)63 void db_FreeStrengthImage_f(float *im,float **img,int h)
64 {
65 delete [] im;
66 delete [] img;
67 }
68
69 /*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width
70 Memory references occur one pixel outside the subrow*/
db_IxIyRow_f(float * Ix,float * Iy,const float * const * img,int i,int j,int chunk_width)71 inline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width)
72 {
73 int c;
74
75 for(c=0;c<chunk_width;c++)
76 {
77 Ix[c]=img[i][j+c-1]-img[i][j+c+1];
78 Iy[c]=img[i-1][j+c]-img[i+1][j+c];
79 }
80 }
81
82 /*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128
83 Memory references occur one pixel outside the subrow*/
db_IxIyRow_u(int * dxx,const unsigned char * const * img,int i,int j,int nc)84 inline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc)
85 {
86 #ifdef DB_USE_MMX
87 const unsigned char *r1,*r2,*r3;
88
89 r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j;
90
91 _asm
92 {
93 mov esi,16
94 mov eax,r1
95 mov ebx,r2
96 mov ecx,r3
97 mov edx,dxx
98
99 /*Get bitmask into mm7*/
100 mov edi,7F7F7F7Fh
101 movd mm7,edi
102 punpckldq mm7,mm7
103
104 loopstart:
105 /***************dx part 1-12*********************************/
106 movq mm0,[eax] /*1 Get upper*/
107 pxor mm6,mm6 /*2 Set to zero*/
108 movq mm1,[ecx] /*3 Get lower*/
109 psrlq mm0,1 /*4 Shift*/
110 psrlq mm1,1 /*5 Shift*/
111 pand mm0,mm7 /*6 And*/
112 movq mm2,[ebx-1] /*13 Get left*/
113 pand mm1,mm7 /*7 And*/
114 psubb mm0,mm1 /*8 Subtract*/
115 pxor mm5,mm5 /*14 Set to zero*/
116 movq mm1,mm0 /*9 Copy*/
117 pcmpgtb mm6,mm0 /*10 Create unpack mask*/
118 movq mm3,[ebx+1] /*15 Get right*/
119 punpcklbw mm0,mm6 /*11 Unpack low*/
120 punpckhbw mm1,mm6 /*12 Unpack high*/
121 /***************dy part 13-24*********************************/
122 movq mm4,mm0 /*25 Copy dx*/
123 psrlq mm2,1 /*16 Shift*/
124 pmullw mm0,mm0 /*26 Multiply dx*dx*/
125 psrlq mm3,1 /*17 Shift*/
126 pand mm2,mm7 /*18 And*/
127 pand mm3,mm7 /*19 And*/
128 /*Stall*/
129 psubb mm2,mm3 /*20 Subtract*/
130 /*Stall*/
131 movq mm3,mm2 /*21 Copy*/
132 pcmpgtb mm5,mm2 /*22 Create unpack mask*/
133 punpcklbw mm2,mm5 /*23 Unpack low*/
134 /*Stall*/
135 punpckhbw mm3,mm5 /*24 Unpack high*/
136 /***************dxx dxy dyy low part 25-49*********************************/
137 pmullw mm4,mm2 /*27 Multiply dx*dy*/
138 pmullw mm2,mm2 /*28 Multiply dy*dy*/
139 pxor mm6,mm6 /*29 Set to zero*/
140 movq mm5,mm0 /*30 Copy dx*dx*/
141 pcmpgtw mm6,mm0 /*31 Create unpack mask for dx*dx*/
142 punpcklwd mm0,mm6 /*32 Unpack dx*dx lows*/
143 /*Stall*/
144 punpckhwd mm5,mm6 /*33 Unpack dx*dx highs*/
145 pxor mm6,mm6 /*36 Set to zero*/
146 movq [edx],mm0 /*34 Store dx*dx lows*/
147 movq mm0,mm4 /*37 Copy dx*dy*/
148 movq [edx+8],mm5 /*35 Store dx*dx highs*/
149 pcmpgtw mm6,mm4 /*38 Create unpack mask for dx*dy*/
150 punpcklwd mm4,mm6 /*39 Unpack dx*dy lows*/
151 /*Stall*/
152 punpckhwd mm0,mm6 /*40 Unpack dx*dy highs*/
153 pxor mm6,mm6 /*43 Set to zero*/
154 movq [edx+512],mm4 /*41 Store dx*dy lows*/
155 movq mm5,mm2 /*44 Copy dy*dy*/
156 movq [edx+520],mm0 /*42 Store dx*dy highs*/
157 pcmpgtw mm6,mm2 /*45 Create unpack mask for dy*dy*/
158 punpcklwd mm2,mm6 /*46 Unpack dy*dy lows*/
159 movq mm4,mm1 /*50 Copy dx*/
160 punpckhwd mm5,mm6 /*47 Unpack dy*dy highs*/
161 pmullw mm1,mm1 /*51 Multiply dx*dx*/
162 movq [edx+1024],mm2 /*48 Store dy*dy lows*/
163 pmullw mm4,mm3 /*52 Multiply dx*dy*/
164 movq [edx+1032],mm5 /*49 Store dy*dy highs*/
165 /***************dxx dxy dyy high part 50-79*********************************/
166 pmullw mm3,mm3 /*53 Multiply dy*dy*/
167 pxor mm6,mm6 /*54 Set to zero*/
168 movq mm5,mm1 /*55 Copy dx*dx*/
169 pcmpgtw mm6,mm1 /*56 Create unpack mask for dx*dx*/
170 pxor mm2,mm2 /*61 Set to zero*/
171 punpcklwd mm1,mm6 /*57 Unpack dx*dx lows*/
172 movq mm0,mm4 /*62 Copy dx*dy*/
173 punpckhwd mm5,mm6 /*58 Unpack dx*dx highs*/
174 pcmpgtw mm2,mm4 /*63 Create unpack mask for dx*dy*/
175 movq [edx+16],mm1 /*59 Store dx*dx lows*/
176 punpcklwd mm4,mm2 /*64 Unpack dx*dy lows*/
177 movq [edx+24],mm5 /*60 Store dx*dx highs*/
178 punpckhwd mm0,mm2 /*65 Unpack dx*dy highs*/
179 movq [edx+528],mm4 /*66 Store dx*dy lows*/
180 pxor mm6,mm6 /*68 Set to zero*/
181 movq [edx+536],mm0 /*67 Store dx*dy highs*/
182 movq mm5,mm3 /*69 Copy dy*dy*/
183 pcmpgtw mm6,mm3 /*70 Create unpack mask for dy*dy*/
184 add eax,8 /*75*/
185 punpcklwd mm3,mm6 /*71 Unpack dy*dy lows*/
186 add ebx,8 /*76*/
187 punpckhwd mm5,mm6 /*72 Unpack dy*dy highs*/
188 add ecx,8 /*77*/
189 movq [edx+1040],mm3 /*73 Store dy*dy lows*/
190 /*Stall*/
191 movq [edx+1048],mm5 /*74 Store dy*dy highs*/
192 /*Stall*/
193 add edx,32 /*78*/
194 dec esi /*79*/
195 jnz loopstart
196
197 emms
198 }
199
200 #else
201 int c;
202 int Ix,Iy;
203
204 for(c=0;c<nc;c++)
205 {
206 Ix=(img[i][j+c-1]-img[i][j+c+1])>>1;
207 Iy=(img[i-1][j+c]-img[i+1][j+c])>>1;
208 dxx[c]=Ix*Ix;
209 dxx[c+128]=Ix*Iy;
210 dxx[c+256]=Iy*Iy;
211 }
212 #endif /*DB_USE_MMX*/
213 }
214
215 /*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/
db_gxx_gxy_gyy_row_f(float * gxx,float * gxy,float * gyy,int chunk_width,float * Ix0,float * Ix1,float * Ix2,float * Ix3,float * Ix4,float * Iy0,float * Iy1,float * Iy2,float * Iy3,float * Iy4)216 inline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width,
217 float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4,
218 float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4)
219 {
220 int c;
221 float dx,dy;
222 float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4;
223
224 for(c=0;c<chunk_width;c++)
225 {
226 dx=Ix0[c];
227 dy=Iy0[c];
228 Ixx0=dx*dx;
229 Ixy0=dx*dy;
230 Iyy0=dy*dy;
231
232 dx=Ix1[c];
233 dy=Iy1[c];
234 Ixx1=dx*dx;
235 Ixy1=dx*dy;
236 Iyy1=dy*dy;
237
238 dx=Ix2[c];
239 dy=Iy2[c];
240 Ixx2=dx*dx;
241 Ixy2=dx*dy;
242 Iyy2=dy*dy;
243
244 dx=Ix3[c];
245 dy=Iy3[c];
246 Ixx3=dx*dx;
247 Ixy3=dx*dy;
248 Iyy3=dy*dy;
249
250 dx=Ix4[c];
251 dy=Iy4[c];
252 Ixx4=dx*dx;
253 Ixy4=dx*dy;
254 Iyy4=dy*dy;
255
256 /*Filter vertically*/
257 gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4;
258 gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4;
259 gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4;
260 }
261 }
262
263 /*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/
db_gxx_gxy_gyy_row_s(int * g,int * d0,int * d1,int * d2,int * d3,int * d4,int nc)264 inline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc)
265 {
266 #ifdef DB_USE_MMX
267 int c;
268
269 _asm
270 {
271 mov c,64
272 mov eax,d0
273 mov ebx,d1
274 mov ecx,d2
275 mov edx,d3
276 mov edi,d4
277 mov esi,g
278
279 loopstart:
280 /***************dxx part 1-14*********************************/
281 movq mm0,[eax] /*1 Get dxx0*/
282 /*Stall*/
283 movq mm1,[ebx] /*2 Get dxx1*/
284 /*Stall*/
285 movq mm2,[ecx] /*5 Get dxx2*/
286 pslld mm1,2 /*3 Shift dxx1*/
287 movq mm3,[edx] /*10 Get dxx3*/
288 paddd mm0,mm1 /*4 Accumulate dxx1*/
289 movq mm4,[eax+512] /*15 Get dxy0*/
290 pslld mm2,1 /*6 Shift dxx2 1*/
291 paddd mm0,mm2 /*7 Accumulate dxx2 1*/
292 pslld mm2,1 /*8 Shift dxx2 2*/
293 movq mm5,[ebx+512] /*16 Get dxy1*/
294 paddd mm0,mm2 /*9 Accumulate dxx2 2*/
295 pslld mm3,2 /*11 Shift dxx3*/
296 /*Stall*/
297 paddd mm0,mm3 /*12 Accumulate dxx3*/
298 pslld mm5,2 /*17 Shift dxy1*/
299 paddd mm0,[edi] /*13 Accumulate dxx4*/
300 paddd mm4,mm5 /*18 Accumulate dxy1*/
301 movq mm6,[ecx+512] /*19 Get dxy2*/
302 /*Stall*/
303 movq [esi],mm0 /*14 Store dxx sums*/
304 /***************dxy part 15-28*********************************/
305 pslld mm6,1 /*20 Shift dxy2 1*/
306 paddd mm4,mm6 /*21 Accumulate dxy2 1*/
307 pslld mm6,1 /*22 Shift dxy2 2*/
308 movq mm0,[eax+1024] /*29 Get dyy0*/
309 paddd mm4,mm6 /*23 Accumulate dxy2 2*/
310 movq mm7,[edx+512] /*24 Get dxy3*/
311 pslld mm7,2 /*25 Shift dxy3*/
312 movq mm1,[ebx+1024] /*30 Get dyy1*/
313 paddd mm4,mm7 /*26 Accumulate dxy3*/
314 paddd mm4,[edi+512] /*27 Accumulate dxy4*/
315 pslld mm1,2 /*31 Shift dyy1*/
316 movq mm2,[ecx+1024] /*33 Get dyy2*/
317 paddd mm0,mm1 /*32 Accumulate dyy1*/
318 movq [esi+512],mm4 /*28 Store dxy sums*/
319 pslld mm2,1 /*34 Shift dyy2 1*/
320 /***************dyy part 29-49*********************************/
321
322
323 movq mm3,[edx+1024] /*38 Get dyy3*/
324 paddd mm0,mm2 /*35 Accumulate dyy2 1*/
325 paddd mm0,[edi+1024] /*41 Accumulate dyy4*/
326 pslld mm2,1 /*36 Shift dyy2 2*/
327 paddd mm0,mm2 /*37 Accumulate dyy2 2*/
328 pslld mm3,2 /*39 Shift dyy3*/
329 paddd mm0,mm3 /*40 Accumulate dyy3*/
330 add eax,8 /*43*/
331 add ebx,8 /*44*/
332 add ecx,8 /*45*/
333 movq [esi+1024],mm0 /*42 Store dyy sums*/
334 /*Stall*/
335 add edx,8 /*46*/
336 add edi,8 /*47*/
337 add esi,8 /*48*/
338 dec c /*49*/
339 jnz loopstart
340
341 emms
342 }
343
344 #else
345 int c,dd;
346
347 for(c=0;c<nc;c++)
348 {
349 /*Filter vertically*/
350 dd=d2[c];
351 g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c];
352
353 dd=d2[c+128];
354 g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128];
355
356 dd=d2[c+256];
357 g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256];
358 }
359 #endif /*DB_USE_MMX*/
360 }
361
362 /*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j
363 and with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width
364 and starting at (i,j-2)*/
db_HarrisStrength_row_f(float ** s,float * gxx,float * gxy,float * gyy,int i,int j,int chunk_width)365 inline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width)
366 {
367 float Gxx,Gxy,Gyy,det,trc;
368 int c;
369
370 for(c=0;c<chunk_width;c++)
371 {
372 Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4];
373 Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4];
374 Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4];
375
376 det=Gxx*Gyy-Gxy*Gxy;
377 trc=Gxx+Gyy;
378 s[i][j+c]=det-0.06f*trc*trc;
379 }
380 }
381
382 /*Filter g of length 128 in place with 14641. Output is shifted two steps
383 and of length 124*/
db_Filter14641_128_i(int * g,int nc)384 inline void db_Filter14641_128_i(int *g,int nc)
385 {
386 #ifdef DB_USE_MMX
387 int mask;
388
389 mask=0xFFFFFFFF;
390 _asm
391 {
392 mov esi,31
393 mov eax,g
394
395 /*Get bitmask 00000000FFFFFFFF into mm7*/
396 movd mm7,mask
397
398 /*Warming iteration one 1-16********************/
399 movq mm6,[eax] /*1 Load new data*/
400 paddd mm0,mm6 /*2 Add 1* behind two steps*/
401 movq mm2,mm6 /*3 Start with 1* in front two steps*/
402 pslld mm6,1 /*4*/
403 paddd mm1,mm6 /*5 Add 2* same place*/
404 pslld mm6,1 /*6*/
405 paddd mm1,mm6 /*7 Add 4* same place*/
406 pshufw mm6,mm6,4Eh /*8 Swap the two double-words using bitmask 01001110=4Eh*/
407 paddd mm1,mm6 /*9 Add 4* swapped*/
408 movq mm5,mm6 /*10 Copy*/
409 pand mm6,mm7 /*11 Get low double-word only*/
410 paddd mm2,mm6 /*12 Add 4* in front one step*/
411 pxor mm6,mm5 /*13 Get high double-word only*/
412 paddd mm0,mm6 /*14 Add 4* behind one step*/
413 movq mm0,mm1 /*15 Shift along*/
414 movq mm1,mm2 /*16 Shift along*/
415 /*Warming iteration two 17-32********************/
416 movq mm4,[eax+8] /*17 Load new data*/
417 paddd mm0,mm4 /*18 Add 1* behind two steps*/
418 movq mm2,mm4 /*19 Start with 1* in front two steps*/
419 pslld mm4,1 /*20*/
420 paddd mm1,mm4 /*21 Add 2* same place*/
421 pslld mm4,1 /*22*/
422 paddd mm1,mm4 /*23 Add 4* same place*/
423 pshufw mm4,mm4,4Eh /*24 Swap the two double-words using bitmask 01001110=4Eh*/
424 paddd mm1,mm4 /*25 Add 4* swapped*/
425 movq mm3,mm4 /*26 Copy*/
426 pand mm4,mm7 /*27 Get low double-word only*/
427 paddd mm2,mm4 /*28 Add 4* in front one step*/
428 pxor mm4,mm3 /*29 Get high double-word only*/
429 paddd mm0,mm4 /*30 Add 4* behind one step*/
430 movq mm0,mm1 /*31 Shift along*/
431 movq mm1,mm2 /*32 Shift along*/
432
433 /*Loop********************/
434 loopstart:
435 /*First part of loop 33-47********/
436 movq mm6,[eax+16] /*33 Load new data*/
437 /*Stall*/
438 paddd mm0,mm6 /*34 Add 1* behind two steps*/
439 movq mm2,mm6 /*35 Start with 1* in front two steps*/
440 movq mm4,[eax+24] /*48 Load new data*/
441 pslld mm6,1 /*36*/
442 paddd mm1,mm6 /*37 Add 2* same place*/
443 pslld mm6,1 /*38*/
444 paddd mm1,mm6 /*39 Add 4* same place*/
445 pshufw mm6,mm6,4Eh /*40 Swap the two double-words using bitmask 01001110=4Eh*/
446 paddd mm1,mm4 /*49 Add 1* behind two steps*/
447 movq mm5,mm6 /*41 Copy*/
448 paddd mm1,mm6 /*42 Add 4* swapped*/
449 pand mm6,mm7 /*43 Get low double-word only*/
450 paddd mm2,mm6 /*44 Add 4* in front one step*/
451 pxor mm6,mm5 /*45 Get high double-word only*/
452 paddd mm0,mm6 /*46 Add 4* behind one step*/
453 movq mm6,mm4 /*50a Copy*/
454 pslld mm4,1 /*51*/
455 /*Stall*/
456 movq [eax],mm0 /*47 Store result two steps behind*/
457 /*Second part of loop 48-66********/
458 movq mm0,mm6 /*50b Start with 1* in front two steps*/
459 paddd mm2,mm4 /*52 Add 2* same place*/
460 pslld mm4,1 /*53*/
461 paddd mm2,mm4 /*54 Add 4* same place*/
462 pshufw mm4,mm4,4Eh /*55 Swap the two double-words using bitmask 01001110=4Eh*/
463 paddd mm2,mm4 /*56 Add 4* swapped*/
464 movq mm3,mm4 /*57 Copy*/
465 pand mm4,mm7 /*58 Get low double-word only*/
466 /*Stall*/
467 paddd mm0,mm4 /*59 Add 4* in front one step*/
468 pxor mm4,mm3 /*60 Get high double-word only*/
469 paddd mm1,mm4 /*61 Add 4* behind one step*/
470 add eax,16 /*65*/
471 dec esi /*66*/
472 /*Stall*/
473 movq [eax-8],mm1 /*62 Store result two steps behind*/
474 movq mm1,mm0 /*63 Shift along*/
475 movq mm0,mm2 /*64 Shift along*/
476 jnz loopstart
477
478 emms
479 }
480
481 #else
482 int c;
483
484 for(c=0;c<nc-4;c++)
485 {
486 g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4];
487 }
488 #endif /*DB_USE_MMX*/
489 }
490
491 /*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s
492 of length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought.
493 s should be 16 byte aligned*/
db_HarrisStrength_row_s(float * s,int * gxx,int * gxy,int * gyy,int nc)494 inline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc)
495 {
496 float k;
497
498 k=0.06f;
499
500 db_Filter14641_128_i(gxx,nc);
501 db_Filter14641_128_i(gxy,nc);
502 db_Filter14641_128_i(gyy,nc);
503
504 #ifdef DB_USE_SIMD
505
506
507 _asm
508 {
509 mov esi,15
510 mov eax,gxx
511 mov ebx,gxy
512 mov ecx,gyy
513 mov edx,s
514
515 /*broadcast k to all positions of xmm7*/
516 movss xmm7,k
517 shufps xmm7,xmm7,0
518
519 /*****Warm up 1-10**************************************/
520 cvtpi2ps xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/
521 /*Stall*/
522 cvtpi2ps xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/
523 movlhps xmm0,xmm0 /*2 Move them to the high double-word*/
524 cvtpi2ps xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/
525 movlhps xmm1,xmm1 /*5 Move them to the high double-word*/
526 cvtpi2ps xmm0,[eax] /*3 Convert two integers into floating point of low double-word*/
527 movlhps xmm2,xmm2 /*8 Move them to the high double-word*/
528 cvtpi2ps xmm1,[ebx] /*6 Convert two integers into floating point of low double-word*/
529 movaps xmm3,xmm0 /*10 Copy Cxx*/
530 cvtpi2ps xmm2,[ecx] /*9 Convert two integers into floating point of low double-word*/
531 /*Stall*/
532 loopstart:
533 /*****First part of loop 11-18***********************/
534 mulps xmm0,xmm2 /*11 Multiply to get Gxx*Gyy*/
535 addps xmm2,xmm3 /*12 Add to get Gxx+Gyy*/
536 cvtpi2ps xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/
537 mulps xmm1,xmm1 /*13 Multiply to get Gxy*Gxy*/
538 mulps xmm2,xmm2 /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
539 movlhps xmm4,xmm4 /*20 Move them to the high double-word*/
540 cvtpi2ps xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/
541 /*Stall*/
542 subps xmm0,xmm1 /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/
543 mulps xmm2,xmm7 /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
544 cvtpi2ps xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/
545 /*Stall*/
546 movlhps xmm5,xmm5 /*23 Move them to the high double-word*/
547 /*Stall*/
548 cvtpi2ps xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/
549 subps xmm0,xmm2 /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
550 cvtpi2ps xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/
551 /*Stall*/
552 movaps [edx],xmm0 /*18 Store*/
553 /*****Second part of loop 26-40***********************/
554 movlhps xmm6,xmm6 /*26 Move them to the high double-word*/
555 cvtpi2ps xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/
556 movaps xmm3,xmm4 /*28 Copy Cxx*/
557 mulps xmm4,xmm6 /*29 Multiply to get Gxx*Gyy*/
558 addps xmm6,xmm3 /*30 Add to get Gxx+Gyy*/
559 cvtpi2ps xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/
560 mulps xmm5,xmm5 /*31 Multiply to get Gxy*Gxy*/
561 cvtpi2ps xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/
562 mulps xmm6,xmm6 /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
563 cvtpi2ps xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/
564 movlhps xmm0,xmm0 /*(2 Next) Move them to the high double-word*/
565 subps xmm4,xmm5 /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/
566 movlhps xmm1,xmm1 /*(5 Next) Move them to the high double-word*/
567 cvtpi2ps xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/
568 mulps xmm6,xmm7 /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
569 cvtpi2ps xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/
570 movlhps xmm2,xmm2 /*(8 Next) Move them to the high double-word*/
571 movaps xmm3,xmm0 /*(10 Next) Copy Cxx*/
572 add eax,32 /*37*/
573 subps xmm4,xmm6 /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
574 add ebx,32 /*38*/
575 cvtpi2ps xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/
576 /*Stall*/
577 movaps [edx+16],xmm4 /*36 Store*/
578 /*Stall*/
579 add ecx,32 /*39*/
580 add edx,32 /*40*/
581 dec esi /*41*/
582 jnz loopstart
583
584 /****Cool down***************/
585 mulps xmm0,xmm2 /*Multiply to get Gxx*Gyy*/
586 addps xmm2,xmm3 /*Add to get Gxx+Gyy*/
587 mulps xmm1,xmm1 /*Multiply to get Gxy*Gxy*/
588 mulps xmm2,xmm2 /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
589 subps xmm0,xmm1 /*Subtract to get Gxx*Gyy-Gxy*Gxy*/
590 mulps xmm2,xmm7 /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
591 subps xmm0,xmm2 /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
592 movaps [edx],xmm0 /*Store*/
593 }
594
595 #else
596 float Gxx,Gxy,Gyy,det,trc;
597 int c;
598
599 //for(c=0;c<124;c++)
600 for(c=0;c<nc-4;c++)
601 {
602 Gxx=(float)gxx[c];
603 Gxy=(float)gxy[c];
604 Gyy=(float)gyy[c];
605
606 det=Gxx*Gyy-Gxy*Gxy;
607 trc=Gxx+Gyy;
608 s[c]=det-k*trc*trc;
609 }
610 #endif /*DB_USE_SIMD*/
611 }
612
613 /*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and
614 store it into the corresponding region of s. left and top have to be at least 3 and
615 right and bottom have to be at most width-4,height-4*/
db_HarrisStrengthChunk_f(float ** s,const float * const * img,int left,int top,int right,int bottom,float * temp)616 inline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom,
617 /*temp should point to at least
618 13*(right-left+5) of allocated memory*/
619 float *temp)
620 {
621 float *Ix[5],*Iy[5];
622 float *gxx,*gxy,*gyy;
623 int i,chunk_width,chunk_width_p4;
624
625 chunk_width=right-left+1;
626 chunk_width_p4=chunk_width+4;
627 gxx=temp;
628 gxy=gxx+chunk_width_p4;
629 gyy=gxy+chunk_width_p4;
630 for(i=0;i<5;i++)
631 {
632 Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4);
633 Iy[i]=Ix[i]+chunk_width_p4;
634 }
635
636 /*Fill four rows of the wrap-around derivative buffers*/
637 for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4);
638
639 /*For each output row*/
640 for(i=top;i<=bottom;i++)
641 {
642 /*Step the derivative buffers*/
643 db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4);
644
645 /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
646 db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4,
647 Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5],
648 Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]);
649
650 /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
651 db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width);
652 }
653 }
654
655 /*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and
656 store it into the corresponding region of s. left and top have to be at least 3 and
657 right and bottom have to be at most width-4,height-4. The left of the region in s should
658 be 16 byte aligned*/
db_HarrisStrengthChunk_u(float ** s,const unsigned char * const * img,int left,int top,int bottom,int * temp,int nc)659 inline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom,
660 /*temp should point to at least
661 18*128 of allocated memory*/
662 int *temp, int nc)
663 {
664 int *Ixx[5],*Ixy[5],*Iyy[5];
665 int *gxx,*gxy,*gyy;
666 int i;
667
668 gxx=temp;
669 gxy=gxx+128;
670 gyy=gxy+128;
671 for(i=0;i<5;i++)
672 {
673 Ixx[i]=gyy+(3*i+1)*128;
674 Ixy[i]=gyy+(3*i+2)*128;
675 Iyy[i]=gyy+(3*i+3)*128;
676 }
677
678 /*Fill four rows of the wrap-around derivative buffers*/
679 for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc);
680
681 /*For each output row*/
682 for(i=top;i<=bottom;i++)
683 {
684 /*Step the derivative buffers*/
685 db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc);
686
687 /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
688 db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc);
689
690 /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
691 db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc);
692 }
693
694 }
695
696 /*Compute Harris corner strength of img. Strength is returned for the region
697 with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
698 same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
699 for a meaningful result*/
db_HarrisStrength_f(float ** s,const float * const * img,int w,int h,float * temp,int chunk_width)700 void db_HarrisStrength_f(float **s,const float * const *img,int w,int h,
701 /*temp should point to at least
702 13*(chunk_width+4) of allocated memory*/
703 float *temp,
704 int chunk_width)
705 {
706 int x,next_x,last,right;
707
708 last=w-4;
709 for(x=3;x<=last;x=next_x)
710 {
711 next_x=x+chunk_width;
712 right=next_x-1;
713 if(right>last) right=last;
714 /*Compute the Harris strength of a chunk*/
715 db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp);
716 }
717 }
718
719 /*Compute Harris corner strength of img. Strength is returned for the region
720 with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
721 same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
722 for a meaningful result.Moreover, the image should be overallocated by 256 bytes.
723 s[i][3] should by 16 byte aligned for any i*/
db_HarrisStrength_u(float ** s,const unsigned char * const * img,int w,int h,int * temp)724 void db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h,
725 /*temp should point to at least
726 18*128 of allocated memory*/
727 int *temp)
728 {
729 int x,next_x,last;
730 int nc;
731
732 last=w-4;
733 for(x=3;x<=last;x=next_x)
734 {
735 next_x=x+124;
736
737 // mayban: to revert to the original full chunks state, change the line below to: nc = 128;
738 nc = db_mini(128,last-x+1);
739 //nc = 128;
740
741 /*Compute the Harris strength of a chunk*/
742 db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc);
743 }
744 }
745
db_Max_128Aligned16_f(float * v)746 inline float db_Max_128Aligned16_f(float *v)
747 {
748 #ifdef DB_USE_SIMD
749 float back;
750
751 _asm
752 {
753 mov eax,v
754
755 /*Chunk1*/
756 movaps xmm0,[eax]
757 movaps xmm1,[eax+16]
758 movaps xmm2,[eax+32]
759 movaps xmm3,[eax+48]
760 movaps xmm4,[eax+64]
761 movaps xmm5,[eax+80]
762 movaps xmm6,[eax+96]
763 movaps xmm7,[eax+112]
764
765 /*Chunk2*/
766 maxps xmm0,[eax+128]
767 maxps xmm1,[eax+144]
768 maxps xmm2,[eax+160]
769 maxps xmm3,[eax+176]
770 maxps xmm4,[eax+192]
771 maxps xmm5,[eax+208]
772 maxps xmm6,[eax+224]
773 maxps xmm7,[eax+240]
774
775 /*Chunk3*/
776 maxps xmm0,[eax+256]
777 maxps xmm1,[eax+272]
778 maxps xmm2,[eax+288]
779 maxps xmm3,[eax+304]
780 maxps xmm4,[eax+320]
781 maxps xmm5,[eax+336]
782 maxps xmm6,[eax+352]
783 maxps xmm7,[eax+368]
784
785 /*Chunk4*/
786 maxps xmm0,[eax+384]
787 maxps xmm1,[eax+400]
788 maxps xmm2,[eax+416]
789 maxps xmm3,[eax+432]
790 maxps xmm4,[eax+448]
791 maxps xmm5,[eax+464]
792 maxps xmm6,[eax+480]
793 maxps xmm7,[eax+496]
794
795 /*Collect*/
796 maxps xmm0,xmm1
797 maxps xmm2,xmm3
798 maxps xmm4,xmm5
799 maxps xmm6,xmm7
800 maxps xmm0,xmm2
801 maxps xmm4,xmm6
802 maxps xmm0,xmm4
803 movhlps xmm1,xmm0
804 maxps xmm0,xmm1
805 shufps xmm1,xmm0,1
806 maxps xmm0,xmm1
807 movss back,xmm0
808 }
809
810 return(back);
811 #else
812 float val,max_val;
813 float *p,*stop_p;
814 max_val=v[0];
815 for(p=v+1,stop_p=v+128;p!=stop_p;)
816 {
817 val= *p++;
818 if(val>max_val) max_val=val;
819 }
820 return(max_val);
821 #endif /*DB_USE_SIMD*/
822 }
823
db_Max_64Aligned16_f(float * v)824 inline float db_Max_64Aligned16_f(float *v)
825 {
826 #ifdef DB_USE_SIMD
827 float back;
828
829 _asm
830 {
831 mov eax,v
832
833 /*Chunk1*/
834 movaps xmm0,[eax]
835 movaps xmm1,[eax+16]
836 movaps xmm2,[eax+32]
837 movaps xmm3,[eax+48]
838 movaps xmm4,[eax+64]
839 movaps xmm5,[eax+80]
840 movaps xmm6,[eax+96]
841 movaps xmm7,[eax+112]
842
843 /*Chunk2*/
844 maxps xmm0,[eax+128]
845 maxps xmm1,[eax+144]
846 maxps xmm2,[eax+160]
847 maxps xmm3,[eax+176]
848 maxps xmm4,[eax+192]
849 maxps xmm5,[eax+208]
850 maxps xmm6,[eax+224]
851 maxps xmm7,[eax+240]
852
853 /*Collect*/
854 maxps xmm0,xmm1
855 maxps xmm2,xmm3
856 maxps xmm4,xmm5
857 maxps xmm6,xmm7
858 maxps xmm0,xmm2
859 maxps xmm4,xmm6
860 maxps xmm0,xmm4
861 movhlps xmm1,xmm0
862 maxps xmm0,xmm1
863 shufps xmm1,xmm0,1
864 maxps xmm0,xmm1
865 movss back,xmm0
866 }
867
868 return(back);
869 #else
870 float val,max_val;
871 float *p,*stop_p;
872 max_val=v[0];
873 for(p=v+1,stop_p=v+64;p!=stop_p;)
874 {
875 val= *p++;
876 if(val>max_val) max_val=val;
877 }
878 return(max_val);
879 #endif /*DB_USE_SIMD*/
880 }
881
db_Max_32Aligned16_f(float * v)882 inline float db_Max_32Aligned16_f(float *v)
883 {
884 #ifdef DB_USE_SIMD
885 float back;
886
887 _asm
888 {
889 mov eax,v
890
891 /*Chunk1*/
892 movaps xmm0,[eax]
893 movaps xmm1,[eax+16]
894 movaps xmm2,[eax+32]
895 movaps xmm3,[eax+48]
896 movaps xmm4,[eax+64]
897 movaps xmm5,[eax+80]
898 movaps xmm6,[eax+96]
899 movaps xmm7,[eax+112]
900
901 /*Collect*/
902 maxps xmm0,xmm1
903 maxps xmm2,xmm3
904 maxps xmm4,xmm5
905 maxps xmm6,xmm7
906 maxps xmm0,xmm2
907 maxps xmm4,xmm6
908 maxps xmm0,xmm4
909 movhlps xmm1,xmm0
910 maxps xmm0,xmm1
911 shufps xmm1,xmm0,1
912 maxps xmm0,xmm1
913 movss back,xmm0
914 }
915
916 return(back);
917 #else
918 float val,max_val;
919 float *p,*stop_p;
920 max_val=v[0];
921 for(p=v+1,stop_p=v+32;p!=stop_p;)
922 {
923 val= *p++;
924 if(val>max_val) max_val=val;
925 }
926 return(max_val);
927 #endif /*DB_USE_SIMD*/
928 }
929
db_Max_16Aligned16_f(float * v)930 inline float db_Max_16Aligned16_f(float *v)
931 {
932 #ifdef DB_USE_SIMD
933 float back;
934
935 _asm
936 {
937 mov eax,v
938
939 /*Chunk1*/
940 movaps xmm0,[eax]
941 movaps xmm1,[eax+16]
942 movaps xmm2,[eax+32]
943 movaps xmm3,[eax+48]
944
945 /*Collect*/
946 maxps xmm0,xmm1
947 maxps xmm2,xmm3
948 maxps xmm0,xmm2
949 movhlps xmm1,xmm0
950 maxps xmm0,xmm1
951 shufps xmm1,xmm0,1
952 maxps xmm0,xmm1
953 movss back,xmm0
954 }
955
956 return(back);
957 #else
958 float val,max_val;
959 float *p,*stop_p;
960 max_val=v[0];
961 for(p=v+1,stop_p=v+16;p!=stop_p;)
962 {
963 val= *p++;
964 if(val>max_val) max_val=val;
965 }
966 return(max_val);
967 #endif /*DB_USE_SIMD*/
968 }
969
db_Max_8Aligned16_f(float * v)970 inline float db_Max_8Aligned16_f(float *v)
971 {
972 #ifdef DB_USE_SIMD
973 float back;
974
975 _asm
976 {
977 mov eax,v
978
979 /*Chunk1*/
980 movaps xmm0,[eax]
981 movaps xmm1,[eax+16]
982
983 /*Collect*/
984 maxps xmm0,xmm1
985 movhlps xmm1,xmm0
986 maxps xmm0,xmm1
987 shufps xmm1,xmm0,1
988 maxps xmm0,xmm1
989 movss back,xmm0
990 }
991
992 return(back);
993 #else
994 float val,max_val;
995 float *p,*stop_p;
996 max_val=v[0];
997 for(p=v+1,stop_p=v+8;p!=stop_p;)
998 {
999 val= *p++;
1000 if(val>max_val) max_val=val;
1001 }
1002 return(max_val);
1003 #endif /*DB_USE_SIMD*/
1004 }
1005
db_Max_Aligned16_f(float * v,int size)1006 inline float db_Max_Aligned16_f(float *v,int size)
1007 {
1008 float val,max_val;
1009 float *stop_v;
1010
1011 max_val=v[0];
1012 for(;size>=128;size-=128)
1013 {
1014 val=db_Max_128Aligned16_f(v);
1015 v+=128;
1016 if(val>max_val) max_val=val;
1017 }
1018 if(size&64)
1019 {
1020 val=db_Max_64Aligned16_f(v);
1021 v+=64;
1022 if(val>max_val) max_val=val;
1023 }
1024 if(size&32)
1025 {
1026 val=db_Max_32Aligned16_f(v);
1027 v+=32;
1028 if(val>max_val) max_val=val;
1029 }
1030 if(size&16)
1031 {
1032 val=db_Max_16Aligned16_f(v);
1033 v+=16;
1034 if(val>max_val) max_val=val;
1035 }
1036 if(size&8)
1037 {
1038 val=db_Max_8Aligned16_f(v);
1039 v+=8;
1040 if(val>max_val) max_val=val;
1041 }
1042 if(size&7)
1043 {
1044 for(stop_v=v+(size&7);v!=stop_v;)
1045 {
1046 val= *v++;
1047 if(val>max_val) max_val=val;
1048 }
1049 }
1050
1051 return(max_val);
1052 }
1053
1054 /*Find maximum value of img in the region starting at (left,top)
1055 and with width w and height h. img[left] should be 16 byte aligned*/
db_MaxImage_Aligned16_f(float ** img,int left,int top,int w,int h)1056 float db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h)
1057 {
1058 float val,max_val;
1059 int i,stop_i;
1060
1061 if(w && h)
1062 {
1063 stop_i=top+h;
1064 max_val=img[top][left];
1065
1066 for(i=top;i<stop_i;i++)
1067 {
1068 val=db_Max_Aligned16_f(img[i]+left,w);
1069 if(val>max_val) max_val=val;
1070 }
1071 return(max_val);
1072 }
1073 return(0.0);
1074 }
1075
db_MaxVector_128_Aligned16_f(float * m,float * v1,float * v2)1076 inline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2)
1077 {
1078 #ifdef DB_USE_SIMD
1079 _asm
1080 {
1081 mov eax,v1
1082 mov ebx,v2
1083 mov ecx,m
1084
1085 /*Chunk1*/
1086 movaps xmm0,[eax]
1087 movaps xmm1,[eax+16]
1088 movaps xmm2,[eax+32]
1089 movaps xmm3,[eax+48]
1090 movaps xmm4,[eax+64]
1091 movaps xmm5,[eax+80]
1092 movaps xmm6,[eax+96]
1093 movaps xmm7,[eax+112]
1094 maxps xmm0,[ebx]
1095 maxps xmm1,[ebx+16]
1096 maxps xmm2,[ebx+32]
1097 maxps xmm3,[ebx+48]
1098 maxps xmm4,[ebx+64]
1099 maxps xmm5,[ebx+80]
1100 maxps xmm6,[ebx+96]
1101 maxps xmm7,[ebx+112]
1102 movaps [ecx],xmm0
1103 movaps [ecx+16],xmm1
1104 movaps [ecx+32],xmm2
1105 movaps [ecx+48],xmm3
1106 movaps [ecx+64],xmm4
1107 movaps [ecx+80],xmm5
1108 movaps [ecx+96],xmm6
1109 movaps [ecx+112],xmm7
1110
1111 /*Chunk2*/
1112 movaps xmm0,[eax+128]
1113 movaps xmm1,[eax+144]
1114 movaps xmm2,[eax+160]
1115 movaps xmm3,[eax+176]
1116 movaps xmm4,[eax+192]
1117 movaps xmm5,[eax+208]
1118 movaps xmm6,[eax+224]
1119 movaps xmm7,[eax+240]
1120 maxps xmm0,[ebx+128]
1121 maxps xmm1,[ebx+144]
1122 maxps xmm2,[ebx+160]
1123 maxps xmm3,[ebx+176]
1124 maxps xmm4,[ebx+192]
1125 maxps xmm5,[ebx+208]
1126 maxps xmm6,[ebx+224]
1127 maxps xmm7,[ebx+240]
1128 movaps [ecx+128],xmm0
1129 movaps [ecx+144],xmm1
1130 movaps [ecx+160],xmm2
1131 movaps [ecx+176],xmm3
1132 movaps [ecx+192],xmm4
1133 movaps [ecx+208],xmm5
1134 movaps [ecx+224],xmm6
1135 movaps [ecx+240],xmm7
1136
1137 /*Chunk3*/
1138 movaps xmm0,[eax+256]
1139 movaps xmm1,[eax+272]
1140 movaps xmm2,[eax+288]
1141 movaps xmm3,[eax+304]
1142 movaps xmm4,[eax+320]
1143 movaps xmm5,[eax+336]
1144 movaps xmm6,[eax+352]
1145 movaps xmm7,[eax+368]
1146 maxps xmm0,[ebx+256]
1147 maxps xmm1,[ebx+272]
1148 maxps xmm2,[ebx+288]
1149 maxps xmm3,[ebx+304]
1150 maxps xmm4,[ebx+320]
1151 maxps xmm5,[ebx+336]
1152 maxps xmm6,[ebx+352]
1153 maxps xmm7,[ebx+368]
1154 movaps [ecx+256],xmm0
1155 movaps [ecx+272],xmm1
1156 movaps [ecx+288],xmm2
1157 movaps [ecx+304],xmm3
1158 movaps [ecx+320],xmm4
1159 movaps [ecx+336],xmm5
1160 movaps [ecx+352],xmm6
1161 movaps [ecx+368],xmm7
1162
1163 /*Chunk4*/
1164 movaps xmm0,[eax+384]
1165 movaps xmm1,[eax+400]
1166 movaps xmm2,[eax+416]
1167 movaps xmm3,[eax+432]
1168 movaps xmm4,[eax+448]
1169 movaps xmm5,[eax+464]
1170 movaps xmm6,[eax+480]
1171 movaps xmm7,[eax+496]
1172 maxps xmm0,[ebx+384]
1173 maxps xmm1,[ebx+400]
1174 maxps xmm2,[ebx+416]
1175 maxps xmm3,[ebx+432]
1176 maxps xmm4,[ebx+448]
1177 maxps xmm5,[ebx+464]
1178 maxps xmm6,[ebx+480]
1179 maxps xmm7,[ebx+496]
1180 movaps [ecx+384],xmm0
1181 movaps [ecx+400],xmm1
1182 movaps [ecx+416],xmm2
1183 movaps [ecx+432],xmm3
1184 movaps [ecx+448],xmm4
1185 movaps [ecx+464],xmm5
1186 movaps [ecx+480],xmm6
1187 movaps [ecx+496],xmm7
1188 }
1189 #else
1190 int i;
1191 float a,b;
1192 for(i=0;i<128;i++)
1193 {
1194 a=v1[i];
1195 b=v2[i];
1196 if(a>=b) m[i]=a;
1197 else m[i]=b;
1198 }
1199 #endif /*DB_USE_SIMD*/
1200 }
1201
db_MaxVector_128_SecondSourceDestAligned16_f(float * m,float * v1,float * v2)1202 inline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2)
1203 {
1204 #ifdef DB_USE_SIMD
1205 _asm
1206 {
1207 mov eax,v1
1208 mov ebx,v2
1209 mov ecx,m
1210
1211 /*Chunk1*/
1212 movups xmm0,[eax]
1213 movups xmm1,[eax+16]
1214 movups xmm2,[eax+32]
1215 movups xmm3,[eax+48]
1216 movups xmm4,[eax+64]
1217 movups xmm5,[eax+80]
1218 movups xmm6,[eax+96]
1219 movups xmm7,[eax+112]
1220 maxps xmm0,[ebx]
1221 maxps xmm1,[ebx+16]
1222 maxps xmm2,[ebx+32]
1223 maxps xmm3,[ebx+48]
1224 maxps xmm4,[ebx+64]
1225 maxps xmm5,[ebx+80]
1226 maxps xmm6,[ebx+96]
1227 maxps xmm7,[ebx+112]
1228 movaps [ecx],xmm0
1229 movaps [ecx+16],xmm1
1230 movaps [ecx+32],xmm2
1231 movaps [ecx+48],xmm3
1232 movaps [ecx+64],xmm4
1233 movaps [ecx+80],xmm5
1234 movaps [ecx+96],xmm6
1235 movaps [ecx+112],xmm7
1236
1237 /*Chunk2*/
1238 movups xmm0,[eax+128]
1239 movups xmm1,[eax+144]
1240 movups xmm2,[eax+160]
1241 movups xmm3,[eax+176]
1242 movups xmm4,[eax+192]
1243 movups xmm5,[eax+208]
1244 movups xmm6,[eax+224]
1245 movups xmm7,[eax+240]
1246 maxps xmm0,[ebx+128]
1247 maxps xmm1,[ebx+144]
1248 maxps xmm2,[ebx+160]
1249 maxps xmm3,[ebx+176]
1250 maxps xmm4,[ebx+192]
1251 maxps xmm5,[ebx+208]
1252 maxps xmm6,[ebx+224]
1253 maxps xmm7,[ebx+240]
1254 movaps [ecx+128],xmm0
1255 movaps [ecx+144],xmm1
1256 movaps [ecx+160],xmm2
1257 movaps [ecx+176],xmm3
1258 movaps [ecx+192],xmm4
1259 movaps [ecx+208],xmm5
1260 movaps [ecx+224],xmm6
1261 movaps [ecx+240],xmm7
1262
1263 /*Chunk3*/
1264 movups xmm0,[eax+256]
1265 movups xmm1,[eax+272]
1266 movups xmm2,[eax+288]
1267 movups xmm3,[eax+304]
1268 movups xmm4,[eax+320]
1269 movups xmm5,[eax+336]
1270 movups xmm6,[eax+352]
1271 movups xmm7,[eax+368]
1272 maxps xmm0,[ebx+256]
1273 maxps xmm1,[ebx+272]
1274 maxps xmm2,[ebx+288]
1275 maxps xmm3,[ebx+304]
1276 maxps xmm4,[ebx+320]
1277 maxps xmm5,[ebx+336]
1278 maxps xmm6,[ebx+352]
1279 maxps xmm7,[ebx+368]
1280 movaps [ecx+256],xmm0
1281 movaps [ecx+272],xmm1
1282 movaps [ecx+288],xmm2
1283 movaps [ecx+304],xmm3
1284 movaps [ecx+320],xmm4
1285 movaps [ecx+336],xmm5
1286 movaps [ecx+352],xmm6
1287 movaps [ecx+368],xmm7
1288
1289 /*Chunk4*/
1290 movups xmm0,[eax+384]
1291 movups xmm1,[eax+400]
1292 movups xmm2,[eax+416]
1293 movups xmm3,[eax+432]
1294 movups xmm4,[eax+448]
1295 movups xmm5,[eax+464]
1296 movups xmm6,[eax+480]
1297 movups xmm7,[eax+496]
1298 maxps xmm0,[ebx+384]
1299 maxps xmm1,[ebx+400]
1300 maxps xmm2,[ebx+416]
1301 maxps xmm3,[ebx+432]
1302 maxps xmm4,[ebx+448]
1303 maxps xmm5,[ebx+464]
1304 maxps xmm6,[ebx+480]
1305 maxps xmm7,[ebx+496]
1306 movaps [ecx+384],xmm0
1307 movaps [ecx+400],xmm1
1308 movaps [ecx+416],xmm2
1309 movaps [ecx+432],xmm3
1310 movaps [ecx+448],xmm4
1311 movaps [ecx+464],xmm5
1312 movaps [ecx+480],xmm6
1313 movaps [ecx+496],xmm7
1314 }
1315 #else
1316 int i;
1317 float a,b;
1318 for(i=0;i<128;i++)
1319 {
1320 a=v1[i];
1321 b=v2[i];
1322 if(a>=b) m[i]=a;
1323 else m[i]=b;
1324 }
1325 #endif /*DB_USE_SIMD*/
1326 }
1327
1328 /*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and
1329 stopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row.
1330 The input s should be of width at least 128, and exist for 2 pixels outside the specified region.
1331 s[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/
db_MaxSuppressFilterChunk_5x5_Aligned16_f(float ** sf,float ** s,int left,int top,int bottom,float * temp)1332 inline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom,
1333 /*temp should point to at least
1334 6*132 floats of 16-byte-aligned allocated memory*/
1335 float *temp)
1336 {
1337 #ifdef DB_USE_SIMD
1338 int i,lm2;
1339 float *two[4];
1340 float *four,*five;
1341
1342 lm2=left-2;
1343
1344 /*Set pointers to pre-allocated memory*/
1345 four=temp;
1346 five=four+132;
1347 for(i=0;i<4;i++)
1348 {
1349 two[i]=five+(i+1)*132;
1350 }
1351
1352 /*Set rests of four and five to zero to avoid
1353 floating point exceptions*/
1354 for(i=129;i<132;i++)
1355 {
1356 four[i]=0.0;
1357 five[i]=0.0;
1358 }
1359
1360 /*Fill three rows of the wrap-around max buffers*/
1361 for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
1362
1363 /*For each output row*/
1364 for(;i<=bottom;i++)
1365 {
1366 /*Compute max of the lowest pair of rows in the five row window*/
1367 db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
1368 /*Compute max of the lowest and highest pair of rows in the five row window*/
1369 db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]);
1370 /*Compute max of all rows*/
1371 db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]);
1372 /*Compute max of 2x5 chunks*/
1373 db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five);
1374 /*Compute max of pairs of 2x5 chunks*/
1375 db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five);
1376 /*Compute max of pairs of 5x5 except middle*/
1377 db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five);
1378 }
1379
1380 #else
1381 int i,j,right;
1382 float sv;
1383
1384 right=left+128;
1385 for(i=top;i<=bottom;i++) for(j=left;j<right;j++)
1386 {
1387 sv=s[i][j];
1388
1389 if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] &&
1390 sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] &&
1391 sv>s[ i][j-2] && sv>s[ i][j-1] && sv>s[ i][j+1] && sv>s[ i][j+2] &&
1392 sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] &&
1393 sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2])
1394 {
1395 sf[i][j-2]=0.0;
1396 }
1397 else sf[i][j-2]=sv;
1398 }
1399 #endif /*DB_USE_SIMD*/
1400 }
1401
1402 /*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and
1403 stopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels
1404 outside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned.
1405 Top must be at least 3. Reading and writing from and to the input and output images is done
1406 as if the region had a width equal to a multiple of 124. If this is not the case, the images
1407 should be over-allocated and the input cleared for a sufficient region*/
db_MaxSuppressFilter_5x5_Aligned16_f(float ** sf,float ** s,int left,int top,int right,int bottom,float * temp)1408 void db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom,
1409 /*temp should point to at least
1410 6*132 floats of 16-byte-aligned allocated memory*/
1411 float *temp)
1412 {
1413 int x,next_x;
1414
1415 for(x=left;x<=right;x=next_x)
1416 {
1417 next_x=x+124;
1418 db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp);
1419 }
1420 }
1421
1422 /*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp
1423 which should point to space of at least as many positions as there are pixels in the chunk*/
db_CornersFromChunk(float ** strength,int left,int top,int right,int bottom,float threshold,double * x_temp,double * y_temp,double * s_temp)1424 inline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp)
1425 {
1426 int i,j,nr;
1427 float s;
1428
1429 nr=0;
1430 for(i=top;i<=bottom;i++) for(j=left;j<=right;j++)
1431 {
1432 s=strength[i][j];
1433
1434 if(s>=threshold &&
1435 s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] &&
1436 s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] &&
1437 s>strength[ i][j-2] && s>strength[ i][j-1] && s>strength[ i][j+1] && s>strength[ i][j+2] &&
1438 s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] &&
1439 s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2])
1440 {
1441 x_temp[nr]=(double) j;
1442 y_temp[nr]=(double) i;
1443 s_temp[nr]=(double) s;
1444 nr++;
1445 }
1446 }
1447 return(nr);
1448 }
1449
1450
1451 //Sub-pixel accuracy using 2D quadratic interpolation.(YCJ)
db_SubPixel(float ** strength,const double xd,const double yd,double & xs,double & ys)1452 inline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys)
1453 {
1454 int x = (int) xd;
1455 int y = (int) yd;
1456
1457 float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1];
1458 float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x];
1459 float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0;
1460
1461 float denom = (fxx * fyy - fxy * fxy) * (float) 2.0;
1462
1463 xs = xd;
1464 ys = yd;
1465
1466 if ( db_absf(denom) <= FLT_EPSILON )
1467 {
1468 return;
1469 }
1470 else
1471 {
1472 float fx = strength[y][x+1] - strength[y][x-1];
1473 float fy = strength[y+1][x] - strength[y-1][x];
1474
1475 float dx = (fyy * fx - fxy * fy) / denom;
1476 float dy = (fxx * fy - fxy * fx) / denom;
1477
1478 if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 )
1479 {
1480 return;
1481 }
1482 else
1483 {
1484 xs -= dx;
1485 ys -= dy;
1486 }
1487 }
1488
1489 return;
1490 }
1491
1492 /*Extract corners from the image part from (left,top) to (right,bottom).
1493 Store in x and y, extracting at most satnr corners in each block of size (bw,bh).
1494 The pointer temp_d should point to at least 5*bw*bh positions.
1495 area_factor holds how many corners max to extract per 10000 pixels*/
db_ExtractCornersSaturated(float ** strength,int left,int top,int right,int bottom,int bw,int bh,unsigned long area_factor,float threshold,double * temp_d,double * x_coord,double * y_coord,int * nr_corners)1496 void db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom,
1497 int bw,int bh,unsigned long area_factor,
1498 float threshold,double *temp_d,
1499 double *x_coord,double *y_coord,int *nr_corners)
1500 {
1501 double *x_temp,*y_temp,*s_temp,*select_temp;
1502 double loc_thresh;
1503 unsigned long bwbh,area,saturation;
1504 int x,next_x,last_x;
1505 int y,next_y,last_y;
1506 int nr,nr_points,i,stop;
1507
1508 bwbh=bw*bh;
1509 x_temp=temp_d;
1510 y_temp=x_temp+bwbh;
1511 s_temp=y_temp+bwbh;
1512 select_temp=s_temp+bwbh;
1513
1514 #ifdef DB_SUB_PIXEL
1515 // subpixel processing may sometimes push the corner ourside the real border
1516 // increasing border size:
1517 left++;
1518 top++;
1519 bottom--;
1520 right--;
1521 #endif /*DB_SUB_PIXEL*/
1522
1523 nr_points=0;
1524 for(y=top;y<=bottom;y=next_y)
1525 {
1526 next_y=y+bh;
1527 last_y=next_y-1;
1528 if(last_y>bottom) last_y=bottom;
1529 for(x=left;x<=right;x=next_x)
1530 {
1531 next_x=x+bw;
1532 last_x=next_x-1;
1533 if(last_x>right) last_x=right;
1534
1535 area=(last_x-x+1)*(last_y-y+1);
1536 saturation=(area*area_factor)/10000;
1537 nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp);
1538 if(nr)
1539 {
1540 if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp);
1541 else loc_thresh=threshold;
1542
1543 stop=nr_points+saturation;
1544 for(i=0;(i<nr)&&(nr_points<stop);i++)
1545 {
1546 if(s_temp[i]>=loc_thresh)
1547 {
1548 #ifdef DB_SUB_PIXEL
1549 db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]);
1550 #else
1551 x_coord[nr_points]=x_temp[i];
1552 y_coord[nr_points]=y_temp[i];
1553 #endif
1554
1555 nr_points++;
1556 }
1557 }
1558 }
1559 }
1560 }
1561 *nr_corners=nr_points;
1562 }
1563
db_CornerDetector_f()1564 db_CornerDetector_f::db_CornerDetector_f()
1565 {
1566 m_w=0; m_h=0;
1567 }
1568
~db_CornerDetector_f()1569 db_CornerDetector_f::~db_CornerDetector_f()
1570 {
1571 Clean();
1572 }
1573
Clean()1574 void db_CornerDetector_f::Clean()
1575 {
1576 if(m_w!=0)
1577 {
1578 delete [] m_temp_f;
1579 delete [] m_temp_d;
1580 db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
1581 }
1582 m_w=0; m_h=0;
1583 }
1584
Init(int im_width,int im_height,int target_nr_corners,int nr_horizontal_blocks,int nr_vertical_blocks,double absolute_threshold,double relative_threshold)1585 unsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners,
1586 int nr_horizontal_blocks,int nr_vertical_blocks,
1587 double absolute_threshold,double relative_threshold)
1588 {
1589 int chunkwidth=208;
1590 int block_width,block_height;
1591 unsigned long area_factor;
1592 int active_width,active_height;
1593
1594 active_width=db_maxi(1,im_width-10);
1595 active_height=db_maxi(1,im_height-10);
1596 block_width=db_maxi(1,active_width/nr_horizontal_blocks);
1597 block_height=db_maxi(1,active_height/nr_vertical_blocks);
1598
1599 area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
1600 (((double)active_width)*((double)active_height)))));
1601
1602 return(Start(im_width,im_height,block_width,block_height,area_factor,
1603 absolute_threshold,relative_threshold,chunkwidth));
1604 }
1605
Start(int im_width,int im_height,int block_width,int block_height,unsigned long area_factor,double absolute_threshold,double relative_threshold,int chunkwidth)1606 unsigned long db_CornerDetector_f::Start(int im_width,int im_height,
1607 int block_width,int block_height,unsigned long area_factor,
1608 double absolute_threshold,double relative_threshold,int chunkwidth)
1609 {
1610 Clean();
1611
1612 m_w=im_width;
1613 m_h=im_height;
1614 m_cw=chunkwidth;
1615 m_bw=block_width;
1616 m_bh=block_height;
1617 m_area_factor=area_factor;
1618 m_r_thresh=relative_threshold;
1619 m_a_thresh=absolute_threshold;
1620 m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
1621
1622 m_temp_f=new float[13*(m_cw+4)];
1623 m_temp_d=new double[5*m_bw*m_bh];
1624 m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
1625
1626 return(m_max_nr);
1627 }
1628
DetectCorners(const float * const * img,double * x_coord,double * y_coord,int * nr_corners) const1629 void db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const
1630 {
1631 float max_val,threshold;
1632
1633 db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw);
1634
1635 if(m_r_thresh)
1636 {
1637 max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
1638 threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
1639 }
1640 else threshold= (float) m_a_thresh;
1641
1642 db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
1643 m_temp_d,x_coord,y_coord,nr_corners);
1644 }
1645
db_CornerDetector_u()1646 db_CornerDetector_u::db_CornerDetector_u()
1647 {
1648 m_w=0; m_h=0;
1649 }
1650
~db_CornerDetector_u()1651 db_CornerDetector_u::~db_CornerDetector_u()
1652 {
1653 Clean();
1654 }
1655
db_CornerDetector_u(const db_CornerDetector_u & cd)1656 db_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd)
1657 {
1658 Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
1659 cd.m_a_thresh, cd.m_r_thresh);
1660 }
1661
operator =(const db_CornerDetector_u & cd)1662 db_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd)
1663 {
1664 if ( this == &cd ) return *this;
1665
1666 Clean();
1667
1668 Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
1669 cd.m_a_thresh, cd.m_r_thresh);
1670
1671 return *this;
1672 }
1673
Clean()1674 void db_CornerDetector_u::Clean()
1675 {
1676 if(m_w!=0)
1677 {
1678 delete [] m_temp_i;
1679 delete [] m_temp_d;
1680 db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
1681 }
1682 m_w=0; m_h=0;
1683 }
1684
Init(int im_width,int im_height,int target_nr_corners,int nr_horizontal_blocks,int nr_vertical_blocks,double absolute_threshold,double relative_threshold)1685 unsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners,
1686 int nr_horizontal_blocks,int nr_vertical_blocks,
1687 double absolute_threshold,double relative_threshold)
1688 {
1689 int block_width,block_height;
1690 unsigned long area_factor;
1691 int active_width,active_height;
1692
1693 active_width=db_maxi(1,im_width-10);
1694 active_height=db_maxi(1,im_height-10);
1695 block_width=db_maxi(1,active_width/nr_horizontal_blocks);
1696 block_height=db_maxi(1,active_height/nr_vertical_blocks);
1697
1698 area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
1699 (((double)active_width)*((double)active_height)))));
1700
1701 return(Start(im_width,im_height,block_width,block_height,area_factor,
1702 16.0*absolute_threshold,relative_threshold));
1703 }
1704
Start(int im_width,int im_height,int block_width,int block_height,unsigned long area_factor,double absolute_threshold,double relative_threshold)1705 unsigned long db_CornerDetector_u::Start(int im_width,int im_height,
1706 int block_width,int block_height,unsigned long area_factor,
1707 double absolute_threshold,double relative_threshold)
1708 {
1709 Clean();
1710
1711 m_w=im_width;
1712 m_h=im_height;
1713 m_bw=block_width;
1714 m_bh=block_height;
1715 m_area_factor=area_factor;
1716 m_r_thresh=relative_threshold;
1717 m_a_thresh=absolute_threshold;
1718 m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
1719
1720 m_temp_i=new int[18*128];
1721 m_temp_d=new double[5*m_bw*m_bh];
1722 m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
1723
1724 return(m_max_nr);
1725 }
1726
DetectCorners(const unsigned char * const * img,double * x_coord,double * y_coord,int * nr_corners,const unsigned char * const * msk,unsigned char fgnd) const1727 void db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
1728 const unsigned char * const *msk, unsigned char fgnd) const
1729 {
1730 float max_val,threshold;
1731
1732 db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i);
1733
1734
1735 if(m_r_thresh)
1736 {
1737 max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
1738 threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
1739 }
1740 else threshold= (float) m_a_thresh;
1741
1742 db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
1743 m_temp_d,x_coord,y_coord,nr_corners);
1744
1745
1746 if ( msk )
1747 {
1748 int nr_corners_mask=0;
1749
1750 for ( int i = 0; i < *nr_corners; ++i)
1751 {
1752 int cor_x = db_roundi(*(x_coord+i));
1753 int cor_y = db_roundi(*(y_coord+i));
1754 if ( msk[cor_y][cor_x] == fgnd )
1755 {
1756 x_coord[nr_corners_mask] = x_coord[i];
1757 y_coord[nr_corners_mask] = y_coord[i];
1758 nr_corners_mask++;
1759 }
1760 }
1761 *nr_corners = nr_corners_mask;
1762 }
1763 }
1764
ExtractCorners(float ** strength,double * x_coord,double * y_coord,int * nr_corners)1765 void db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) {
1766 if ( m_w!=0 )
1767 db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh),
1768 m_temp_d,x_coord,y_coord,nr_corners);
1769 }
1770
1771