1 /*********************************************************************** 2 * Software License Agreement (BSD License) 3 * 4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 6 * 7 * THE BSD LICENSE 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 *************************************************************************/ 30 31 #ifndef OPENCV_FLANN_KDTREE_INDEX_H_ 32 #define OPENCV_FLANN_KDTREE_INDEX_H_ 33 34 #include <algorithm> 35 #include <map> 36 #include <cassert> 37 #include <cstring> 38 39 #include "general.h" 40 #include "nn_index.h" 41 #include "dynamic_bitset.h" 42 #include "matrix.h" 43 #include "result_set.h" 44 #include "heap.h" 45 #include "allocator.h" 46 #include "random.h" 47 #include "saving.h" 48 49 50 namespace cvflann 51 { 52 53 struct KDTreeIndexParams : public IndexParams 54 { 55 KDTreeIndexParams(int trees = 4) 56 { 57 (*this)["algorithm"] = FLANN_INDEX_KDTREE; 58 (*this)["trees"] = trees; 59 } 60 }; 61 62 63 /** 64 * Randomized kd-tree index 65 * 66 * Contains the k-d trees and other information for indexing a set of points 67 * for nearest-neighbor matching. 68 */ 69 template <typename Distance> 70 class KDTreeIndex : public NNIndex<Distance> 71 { 72 public: 73 typedef typename Distance::ElementType ElementType; 74 typedef typename Distance::ResultType DistanceType; 75 76 77 /** 78 * KDTree constructor 79 * 80 * Params: 81 * inputData = dataset with the input features 82 * params = parameters passed to the kdtree algorithm 83 */ 84 KDTreeIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KDTreeIndexParams(), 85 Distance d = Distance() ) : dataset_(inputData)86 dataset_(inputData), index_params_(params), distance_(d) 87 { 88 size_ = dataset_.rows; 89 veclen_ = dataset_.cols; 90 91 trees_ = get_param(index_params_,"trees",4); 92 tree_roots_ = new NodePtr[trees_]; 93 94 // Create a permutable array of indices to the input vectors. 95 vind_.resize(size_); 96 for (size_t i = 0; i < size_; ++i) { 97 vind_[i] = int(i); 98 } 99 100 mean_ = new DistanceType[veclen_]; 101 var_ = new DistanceType[veclen_]; 102 } 103 104 105 KDTreeIndex(const KDTreeIndex&); 106 KDTreeIndex& operator=(const KDTreeIndex&); 107 108 /** 109 * Standard destructor 110 */ ~KDTreeIndex()111 ~KDTreeIndex() 112 { 113 if (tree_roots_!=NULL) { 114 delete[] tree_roots_; 115 } 116 delete[] mean_; 117 delete[] var_; 118 } 119 120 /** 121 * Builds the index 122 */ buildIndex()123 void buildIndex() 124 { 125 /* Construct the randomized trees. */ 126 for (int i = 0; i < trees_; i++) { 127 /* Randomize the order of vectors to allow for unbiased sampling. */ 128 std::random_shuffle(vind_.begin(), vind_.end()); 129 tree_roots_[i] = divideTree(&vind_[0], int(size_) ); 130 } 131 } 132 133 getType()134 flann_algorithm_t getType() const 135 { 136 return FLANN_INDEX_KDTREE; 137 } 138 139 saveIndex(FILE * stream)140 void saveIndex(FILE* stream) 141 { 142 save_value(stream, trees_); 143 for (int i=0; i<trees_; ++i) { 144 save_tree(stream, tree_roots_[i]); 145 } 146 } 147 148 149 loadIndex(FILE * stream)150 void loadIndex(FILE* stream) 151 { 152 load_value(stream, trees_); 153 if (tree_roots_!=NULL) { 154 delete[] tree_roots_; 155 } 156 tree_roots_ = new NodePtr[trees_]; 157 for (int i=0; i<trees_; ++i) { 158 load_tree(stream,tree_roots_[i]); 159 } 160 161 index_params_["algorithm"] = getType(); 162 index_params_["trees"] = tree_roots_; 163 } 164 165 /** 166 * Returns size of index. 167 */ size()168 size_t size() const 169 { 170 return size_; 171 } 172 173 /** 174 * Returns the length of an index feature. 175 */ veclen()176 size_t veclen() const 177 { 178 return veclen_; 179 } 180 181 /** 182 * Computes the inde memory usage 183 * Returns: memory used by the index 184 */ usedMemory()185 int usedMemory() const 186 { 187 return int(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory 188 } 189 190 /** 191 * Find set of nearest neighbors to vec. Their indices are stored inside 192 * the result object. 193 * 194 * Params: 195 * result = the result object in which the indices of the nearest-neighbors are stored 196 * vec = the vector for which to search the nearest neighbors 197 * maxCheck = the maximum number of restarts (in a best-bin-first manner) 198 */ findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)199 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) 200 { 201 int maxChecks = get_param(searchParams,"checks", 32); 202 float epsError = 1+get_param(searchParams,"eps",0.0f); 203 204 if (maxChecks==FLANN_CHECKS_UNLIMITED) { 205 getExactNeighbors(result, vec, epsError); 206 } 207 else { 208 getNeighbors(result, vec, maxChecks, epsError); 209 } 210 } 211 getParameters()212 IndexParams getParameters() const 213 { 214 return index_params_; 215 } 216 217 private: 218 219 220 /*--------------------- Internal Data Structures --------------------------*/ 221 struct Node 222 { 223 /** 224 * Dimension used for subdivision. 225 */ 226 int divfeat; 227 /** 228 * The values used for subdivision. 229 */ 230 DistanceType divval; 231 /** 232 * The child nodes. 233 */ 234 Node* child1, * child2; 235 }; 236 typedef Node* NodePtr; 237 typedef BranchStruct<NodePtr, DistanceType> BranchSt; 238 typedef BranchSt* Branch; 239 240 241 save_tree(FILE * stream,NodePtr tree)242 void save_tree(FILE* stream, NodePtr tree) 243 { 244 save_value(stream, *tree); 245 if (tree->child1!=NULL) { 246 save_tree(stream, tree->child1); 247 } 248 if (tree->child2!=NULL) { 249 save_tree(stream, tree->child2); 250 } 251 } 252 253 load_tree(FILE * stream,NodePtr & tree)254 void load_tree(FILE* stream, NodePtr& tree) 255 { 256 tree = pool_.allocate<Node>(); 257 load_value(stream, *tree); 258 if (tree->child1!=NULL) { 259 load_tree(stream, tree->child1); 260 } 261 if (tree->child2!=NULL) { 262 load_tree(stream, tree->child2); 263 } 264 } 265 266 267 /** 268 * Create a tree node that subdivides the list of vecs from vind[first] 269 * to vind[last]. The routine is called recursively on each sublist. 270 * Place a pointer to this new tree node in the location pTree. 271 * 272 * Params: pTree = the new node to create 273 * first = index of the first vector 274 * last = index of the last vector 275 */ divideTree(int * ind,int count)276 NodePtr divideTree(int* ind, int count) 277 { 278 NodePtr node = pool_.allocate<Node>(); // allocate memory 279 280 /* If too few exemplars remain, then make this a leaf node. */ 281 if ( count == 1) { 282 node->child1 = node->child2 = NULL; /* Mark as leaf node. */ 283 node->divfeat = *ind; /* Store index of this vec. */ 284 } 285 else { 286 int idx; 287 int cutfeat; 288 DistanceType cutval; 289 meanSplit(ind, count, idx, cutfeat, cutval); 290 291 node->divfeat = cutfeat; 292 node->divval = cutval; 293 node->child1 = divideTree(ind, idx); 294 node->child2 = divideTree(ind+idx, count-idx); 295 } 296 297 return node; 298 } 299 300 301 /** 302 * Choose which feature to use in order to subdivide this set of vectors. 303 * Make a random choice among those with the highest variance, and use 304 * its variance as the threshold value. 305 */ meanSplit(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval)306 void meanSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval) 307 { 308 memset(mean_,0,veclen_*sizeof(DistanceType)); 309 memset(var_,0,veclen_*sizeof(DistanceType)); 310 311 /* Compute mean values. Only the first SAMPLE_MEAN values need to be 312 sampled to get a good estimate. 313 */ 314 int cnt = std::min((int)SAMPLE_MEAN+1, count); 315 for (int j = 0; j < cnt; ++j) { 316 ElementType* v = dataset_[ind[j]]; 317 for (size_t k=0; k<veclen_; ++k) { 318 mean_[k] += v[k]; 319 } 320 } 321 for (size_t k=0; k<veclen_; ++k) { 322 mean_[k] /= cnt; 323 } 324 325 /* Compute variances (no need to divide by count). */ 326 for (int j = 0; j < cnt; ++j) { 327 ElementType* v = dataset_[ind[j]]; 328 for (size_t k=0; k<veclen_; ++k) { 329 DistanceType dist = v[k] - mean_[k]; 330 var_[k] += dist * dist; 331 } 332 } 333 /* Select one of the highest variance indices at random. */ 334 cutfeat = selectDivision(var_); 335 cutval = mean_[cutfeat]; 336 337 int lim1, lim2; 338 planeSplit(ind, count, cutfeat, cutval, lim1, lim2); 339 340 if (lim1>count/2) index = lim1; 341 else if (lim2<count/2) index = lim2; 342 else index = count/2; 343 344 /* If either list is empty, it means that all remaining features 345 * are identical. Split in the middle to maintain a balanced tree. 346 */ 347 if ((lim1==count)||(lim2==0)) index = count/2; 348 } 349 350 351 /** 352 * Select the top RAND_DIM largest values from v and return the index of 353 * one of these selected at random. 354 */ selectDivision(DistanceType * v)355 int selectDivision(DistanceType* v) 356 { 357 int num = 0; 358 size_t topind[RAND_DIM]; 359 360 /* Create a list of the indices of the top RAND_DIM values. */ 361 for (size_t i = 0; i < veclen_; ++i) { 362 if ((num < RAND_DIM)||(v[i] > v[topind[num-1]])) { 363 /* Put this element at end of topind. */ 364 if (num < RAND_DIM) { 365 topind[num++] = i; /* Add to list. */ 366 } 367 else { 368 topind[num-1] = i; /* Replace last element. */ 369 } 370 /* Bubble end value down to right location by repeated swapping. */ 371 int j = num - 1; 372 while (j > 0 && v[topind[j]] > v[topind[j-1]]) { 373 std::swap(topind[j], topind[j-1]); 374 --j; 375 } 376 } 377 } 378 /* Select a random integer in range [0,num-1], and return that index. */ 379 int rnd = rand_int(num); 380 return (int)topind[rnd]; 381 } 382 383 384 /** 385 * Subdivide the list of points by a plane perpendicular on axe corresponding 386 * to the 'cutfeat' dimension at 'cutval' position. 387 * 388 * On return: 389 * dataset[ind[0..lim1-1]][cutfeat]<cutval 390 * dataset[ind[lim1..lim2-1]][cutfeat]==cutval 391 * dataset[ind[lim2..count]][cutfeat]>cutval 392 */ planeSplit(int * ind,int count,int cutfeat,DistanceType cutval,int & lim1,int & lim2)393 void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2) 394 { 395 /* Move vector indices for left subtree to front of list. */ 396 int left = 0; 397 int right = count-1; 398 for (;; ) { 399 while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left; 400 while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right; 401 if (left>right) break; 402 std::swap(ind[left], ind[right]); ++left; --right; 403 } 404 lim1 = left; 405 right = count-1; 406 for (;; ) { 407 while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left; 408 while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right; 409 if (left>right) break; 410 std::swap(ind[left], ind[right]); ++left; --right; 411 } 412 lim2 = left; 413 } 414 415 /** 416 * Performs an exact nearest neighbor search. The exact search performs a full 417 * traversal of the tree. 418 */ getExactNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,float epsError)419 void getExactNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, float epsError) 420 { 421 // checkID -= 1; /* Set a different unique ID for each search. */ 422 423 if (trees_ > 1) { 424 fprintf(stderr,"It doesn't make any sense to use more than one tree for exact search"); 425 } 426 if (trees_>0) { 427 searchLevelExact(result, vec, tree_roots_[0], 0.0, epsError); 428 } 429 assert(result.full()); 430 } 431 432 /** 433 * Performs the approximate nearest-neighbor search. The search is approximate 434 * because the tree traversal is abandoned after a given number of descends in 435 * the tree. 436 */ getNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,int maxCheck,float epsError)437 void getNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, int maxCheck, float epsError) 438 { 439 int i; 440 BranchSt branch; 441 442 int checkCount = 0; 443 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_); 444 DynamicBitset checked(size_); 445 446 /* Search once through each tree down to root. */ 447 for (i = 0; i < trees_; ++i) { 448 searchLevel(result, vec, tree_roots_[i], 0, checkCount, maxCheck, epsError, heap, checked); 449 } 450 451 /* Keep searching other branches from heap until finished. */ 452 while ( heap->popMin(branch) && (checkCount < maxCheck || !result.full() )) { 453 searchLevel(result, vec, branch.node, branch.mindist, checkCount, maxCheck, epsError, heap, checked); 454 } 455 456 delete heap; 457 458 assert(result.full()); 459 } 460 461 462 /** 463 * Search starting from a given node of the tree. Based on any mismatches at 464 * higher levels, all exemplars below this level must have a distance of 465 * at least "mindistsq". 466 */ searchLevel(ResultSet<DistanceType> & result_set,const ElementType * vec,NodePtr node,DistanceType mindist,int & checkCount,int maxCheck,float epsError,Heap<BranchSt> * heap,DynamicBitset & checked)467 void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, NodePtr node, DistanceType mindist, int& checkCount, int maxCheck, 468 float epsError, Heap<BranchSt>* heap, DynamicBitset& checked) 469 { 470 if (result_set.worstDist()<mindist) { 471 // printf("Ignoring branch, too far\n"); 472 return; 473 } 474 475 /* If this is a leaf node, then do check and return. */ 476 if ((node->child1 == NULL)&&(node->child2 == NULL)) { 477 /* Do not check same node more than once when searching multiple trees. 478 Once a vector is checked, we set its location in vind to the 479 current checkID. 480 */ 481 int index = node->divfeat; 482 if ( checked.test(index) || ((checkCount>=maxCheck)&& result_set.full()) ) return; 483 checked.set(index); 484 checkCount++; 485 486 DistanceType dist = distance_(dataset_[index], vec, veclen_); 487 result_set.addPoint(dist,index); 488 489 return; 490 } 491 492 /* Which child branch should be taken first? */ 493 ElementType val = vec[node->divfeat]; 494 DistanceType diff = val - node->divval; 495 NodePtr bestChild = (diff < 0) ? node->child1 : node->child2; 496 NodePtr otherChild = (diff < 0) ? node->child2 : node->child1; 497 498 /* Create a branch record for the branch not taken. Add distance 499 of this feature boundary (we don't attempt to correct for any 500 use of this feature in a parent node, which is unlikely to 501 happen and would have only a small effect). Don't bother 502 adding more branches to heap after halfway point, as cost of 503 adding exceeds their value. 504 */ 505 506 DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat); 507 // if (2 * checkCount < maxCheck || !result.full()) { 508 if ((new_distsq*epsError < result_set.worstDist())|| !result_set.full()) { 509 heap->insert( BranchSt(otherChild, new_distsq) ); 510 } 511 512 /* Call recursively to search next level down. */ 513 searchLevel(result_set, vec, bestChild, mindist, checkCount, maxCheck, epsError, heap, checked); 514 } 515 516 /** 517 * Performs an exact search in the tree starting from a node. 518 */ searchLevelExact(ResultSet<DistanceType> & result_set,const ElementType * vec,const NodePtr node,DistanceType mindist,const float epsError)519 void searchLevelExact(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindist, const float epsError) 520 { 521 /* If this is a leaf node, then do check and return. */ 522 if ((node->child1 == NULL)&&(node->child2 == NULL)) { 523 int index = node->divfeat; 524 DistanceType dist = distance_(dataset_[index], vec, veclen_); 525 result_set.addPoint(dist,index); 526 return; 527 } 528 529 /* Which child branch should be taken first? */ 530 ElementType val = vec[node->divfeat]; 531 DistanceType diff = val - node->divval; 532 NodePtr bestChild = (diff < 0) ? node->child1 : node->child2; 533 NodePtr otherChild = (diff < 0) ? node->child2 : node->child1; 534 535 /* Create a branch record for the branch not taken. Add distance 536 of this feature boundary (we don't attempt to correct for any 537 use of this feature in a parent node, which is unlikely to 538 happen and would have only a small effect). Don't bother 539 adding more branches to heap after halfway point, as cost of 540 adding exceeds their value. 541 */ 542 543 DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat); 544 545 /* Call recursively to search next level down. */ 546 searchLevelExact(result_set, vec, bestChild, mindist, epsError); 547 548 if (new_distsq*epsError<=result_set.worstDist()) { 549 searchLevelExact(result_set, vec, otherChild, new_distsq, epsError); 550 } 551 } 552 553 554 private: 555 556 enum 557 { 558 /** 559 * To improve efficiency, only SAMPLE_MEAN random values are used to 560 * compute the mean and variance at each level when building a tree. 561 * A value of 100 seems to perform as well as using all values. 562 */ 563 SAMPLE_MEAN = 100, 564 /** 565 * Top random dimensions to consider 566 * 567 * When creating random trees, the dimension on which to subdivide is 568 * selected at random from among the top RAND_DIM dimensions with the 569 * highest variance. A value of 5 works well. 570 */ 571 RAND_DIM=5 572 }; 573 574 575 /** 576 * Number of randomized trees that are used 577 */ 578 int trees_; 579 580 /** 581 * Array of indices to vectors in the dataset. 582 */ 583 std::vector<int> vind_; 584 585 /** 586 * The dataset used by this index 587 */ 588 const Matrix<ElementType> dataset_; 589 590 IndexParams index_params_; 591 592 size_t size_; 593 size_t veclen_; 594 595 596 DistanceType* mean_; 597 DistanceType* var_; 598 599 600 /** 601 * Array of k-d trees used to find neighbours. 602 */ 603 NodePtr* tree_roots_; 604 605 /** 606 * Pooled memory allocator. 607 * 608 * Using a pooled memory allocator is more efficient 609 * than allocating memory directly when there is a large 610 * number small of memory allocations. 611 */ 612 PooledAllocator pool_; 613 614 Distance distance_; 615 616 617 }; // class KDTreeForest 618 619 } 620 621 #endif //OPENCV_FLANN_KDTREE_INDEX_H_ 622