1 /*********************************************************************** 2 * Software License Agreement (BSD License) 3 * 4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 6 * 7 * THE BSD LICENSE 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 *************************************************************************/ 30 31 #ifndef OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ 32 #define OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ 33 34 #include <algorithm> 35 #include <map> 36 #include <cassert> 37 #include <cstring> 38 39 #include "general.h" 40 #include "nn_index.h" 41 #include "matrix.h" 42 #include "result_set.h" 43 #include "heap.h" 44 #include "allocator.h" 45 #include "random.h" 46 #include "saving.h" 47 48 namespace cvflann 49 { 50 51 struct KDTreeSingleIndexParams : public IndexParams 52 { 53 KDTreeSingleIndexParams(int leaf_max_size = 10, bool reorder = true, int dim = -1) 54 { 55 (*this)["algorithm"] = FLANN_INDEX_KDTREE_SINGLE; 56 (*this)["leaf_max_size"] = leaf_max_size; 57 (*this)["reorder"] = reorder; 58 (*this)["dim"] = dim; 59 } 60 }; 61 62 63 /** 64 * Randomized kd-tree index 65 * 66 * Contains the k-d trees and other information for indexing a set of points 67 * for nearest-neighbor matching. 68 */ 69 template <typename Distance> 70 class KDTreeSingleIndex : public NNIndex<Distance> 71 { 72 public: 73 typedef typename Distance::ElementType ElementType; 74 typedef typename Distance::ResultType DistanceType; 75 76 77 /** 78 * KDTree constructor 79 * 80 * Params: 81 * inputData = dataset with the input features 82 * params = parameters passed to the kdtree algorithm 83 */ 84 KDTreeSingleIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KDTreeSingleIndexParams(), 85 Distance d = Distance() ) : dataset_(inputData)86 dataset_(inputData), index_params_(params), distance_(d) 87 { 88 size_ = dataset_.rows; 89 dim_ = dataset_.cols; 90 int dim_param = get_param(params,"dim",-1); 91 if (dim_param>0) dim_ = dim_param; 92 leaf_max_size_ = get_param(params,"leaf_max_size",10); 93 reorder_ = get_param(params,"reorder",true); 94 95 // Create a permutable array of indices to the input vectors. 96 vind_.resize(size_); 97 for (size_t i = 0; i < size_; i++) { 98 vind_[i] = (int)i; 99 } 100 } 101 102 KDTreeSingleIndex(const KDTreeSingleIndex&); 103 KDTreeSingleIndex& operator=(const KDTreeSingleIndex&); 104 105 /** 106 * Standard destructor 107 */ ~KDTreeSingleIndex()108 ~KDTreeSingleIndex() 109 { 110 if (reorder_) delete[] data_.data; 111 } 112 113 /** 114 * Builds the index 115 */ buildIndex()116 void buildIndex() 117 { 118 computeBoundingBox(root_bbox_); 119 root_node_ = divideTree(0, (int)size_, root_bbox_ ); // construct the tree 120 121 if (reorder_) { 122 delete[] data_.data; 123 data_ = cvflann::Matrix<ElementType>(new ElementType[size_*dim_], size_, dim_); 124 for (size_t i=0; i<size_; ++i) { 125 for (size_t j=0; j<dim_; ++j) { 126 data_[i][j] = dataset_[vind_[i]][j]; 127 } 128 } 129 } 130 else { 131 data_ = dataset_; 132 } 133 } 134 getType()135 flann_algorithm_t getType() const 136 { 137 return FLANN_INDEX_KDTREE_SINGLE; 138 } 139 140 saveIndex(FILE * stream)141 void saveIndex(FILE* stream) 142 { 143 save_value(stream, size_); 144 save_value(stream, dim_); 145 save_value(stream, root_bbox_); 146 save_value(stream, reorder_); 147 save_value(stream, leaf_max_size_); 148 save_value(stream, vind_); 149 if (reorder_) { 150 save_value(stream, data_); 151 } 152 save_tree(stream, root_node_); 153 } 154 155 loadIndex(FILE * stream)156 void loadIndex(FILE* stream) 157 { 158 load_value(stream, size_); 159 load_value(stream, dim_); 160 load_value(stream, root_bbox_); 161 load_value(stream, reorder_); 162 load_value(stream, leaf_max_size_); 163 load_value(stream, vind_); 164 if (reorder_) { 165 load_value(stream, data_); 166 } 167 else { 168 data_ = dataset_; 169 } 170 load_tree(stream, root_node_); 171 172 173 index_params_["algorithm"] = getType(); 174 index_params_["leaf_max_size"] = leaf_max_size_; 175 index_params_["reorder"] = reorder_; 176 } 177 178 /** 179 * Returns size of index. 180 */ size()181 size_t size() const 182 { 183 return size_; 184 } 185 186 /** 187 * Returns the length of an index feature. 188 */ veclen()189 size_t veclen() const 190 { 191 return dim_; 192 } 193 194 /** 195 * Computes the inde memory usage 196 * Returns: memory used by the index 197 */ usedMemory()198 int usedMemory() const 199 { 200 return (int)(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory 201 } 202 203 204 /** 205 * \brief Perform k-nearest neighbor search 206 * \param[in] queries The query points for which to find the nearest neighbors 207 * \param[out] indices The indices of the nearest neighbors found 208 * \param[out] dists Distances to the nearest neighbors found 209 * \param[in] knn Number of nearest neighbors to return 210 * \param[in] params Search parameters 211 */ knnSearch(const Matrix<ElementType> & queries,Matrix<int> & indices,Matrix<DistanceType> & dists,int knn,const SearchParams & params)212 void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params) 213 { 214 assert(queries.cols == veclen()); 215 assert(indices.rows >= queries.rows); 216 assert(dists.rows >= queries.rows); 217 assert(int(indices.cols) >= knn); 218 assert(int(dists.cols) >= knn); 219 220 KNNSimpleResultSet<DistanceType> resultSet(knn); 221 for (size_t i = 0; i < queries.rows; i++) { 222 resultSet.init(indices[i], dists[i]); 223 findNeighbors(resultSet, queries[i], params); 224 } 225 } 226 getParameters()227 IndexParams getParameters() const 228 { 229 return index_params_; 230 } 231 232 /** 233 * Find set of nearest neighbors to vec. Their indices are stored inside 234 * the result object. 235 * 236 * Params: 237 * result = the result object in which the indices of the nearest-neighbors are stored 238 * vec = the vector for which to search the nearest neighbors 239 * maxCheck = the maximum number of restarts (in a best-bin-first manner) 240 */ findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)241 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) 242 { 243 float epsError = 1+get_param(searchParams,"eps",0.0f); 244 245 std::vector<DistanceType> dists(dim_,0); 246 DistanceType distsq = computeInitialDistances(vec, dists); 247 searchLevel(result, vec, root_node_, distsq, dists, epsError); 248 } 249 250 private: 251 252 253 /*--------------------- Internal Data Structures --------------------------*/ 254 struct Node 255 { 256 /** 257 * Indices of points in leaf node 258 */ 259 int left, right; 260 /** 261 * Dimension used for subdivision. 262 */ 263 int divfeat; 264 /** 265 * The values used for subdivision. 266 */ 267 DistanceType divlow, divhigh; 268 /** 269 * The child nodes. 270 */ 271 Node* child1, * child2; 272 }; 273 typedef Node* NodePtr; 274 275 276 struct Interval 277 { 278 DistanceType low, high; 279 }; 280 281 typedef std::vector<Interval> BoundingBox; 282 283 typedef BranchStruct<NodePtr, DistanceType> BranchSt; 284 typedef BranchSt* Branch; 285 286 287 288 save_tree(FILE * stream,NodePtr tree)289 void save_tree(FILE* stream, NodePtr tree) 290 { 291 save_value(stream, *tree); 292 if (tree->child1!=NULL) { 293 save_tree(stream, tree->child1); 294 } 295 if (tree->child2!=NULL) { 296 save_tree(stream, tree->child2); 297 } 298 } 299 300 load_tree(FILE * stream,NodePtr & tree)301 void load_tree(FILE* stream, NodePtr& tree) 302 { 303 tree = pool_.allocate<Node>(); 304 load_value(stream, *tree); 305 if (tree->child1!=NULL) { 306 load_tree(stream, tree->child1); 307 } 308 if (tree->child2!=NULL) { 309 load_tree(stream, tree->child2); 310 } 311 } 312 313 computeBoundingBox(BoundingBox & bbox)314 void computeBoundingBox(BoundingBox& bbox) 315 { 316 bbox.resize(dim_); 317 for (size_t i=0; i<dim_; ++i) { 318 bbox[i].low = (DistanceType)dataset_[0][i]; 319 bbox[i].high = (DistanceType)dataset_[0][i]; 320 } 321 for (size_t k=1; k<dataset_.rows; ++k) { 322 for (size_t i=0; i<dim_; ++i) { 323 if (dataset_[k][i]<bbox[i].low) bbox[i].low = (DistanceType)dataset_[k][i]; 324 if (dataset_[k][i]>bbox[i].high) bbox[i].high = (DistanceType)dataset_[k][i]; 325 } 326 } 327 } 328 329 330 /** 331 * Create a tree node that subdivides the list of vecs from vind[first] 332 * to vind[last]. The routine is called recursively on each sublist. 333 * Place a pointer to this new tree node in the location pTree. 334 * 335 * Params: pTree = the new node to create 336 * first = index of the first vector 337 * last = index of the last vector 338 */ divideTree(int left,int right,BoundingBox & bbox)339 NodePtr divideTree(int left, int right, BoundingBox& bbox) 340 { 341 NodePtr node = pool_.allocate<Node>(); // allocate memory 342 343 /* If too few exemplars remain, then make this a leaf node. */ 344 if ( (right-left) <= leaf_max_size_) { 345 node->child1 = node->child2 = NULL; /* Mark as leaf node. */ 346 node->left = left; 347 node->right = right; 348 349 // compute bounding-box of leaf points 350 for (size_t i=0; i<dim_; ++i) { 351 bbox[i].low = (DistanceType)dataset_[vind_[left]][i]; 352 bbox[i].high = (DistanceType)dataset_[vind_[left]][i]; 353 } 354 for (int k=left+1; k<right; ++k) { 355 for (size_t i=0; i<dim_; ++i) { 356 if (bbox[i].low>dataset_[vind_[k]][i]) bbox[i].low=(DistanceType)dataset_[vind_[k]][i]; 357 if (bbox[i].high<dataset_[vind_[k]][i]) bbox[i].high=(DistanceType)dataset_[vind_[k]][i]; 358 } 359 } 360 } 361 else { 362 int idx; 363 int cutfeat; 364 DistanceType cutval; 365 middleSplit_(&vind_[0]+left, right-left, idx, cutfeat, cutval, bbox); 366 367 node->divfeat = cutfeat; 368 369 BoundingBox left_bbox(bbox); 370 left_bbox[cutfeat].high = cutval; 371 node->child1 = divideTree(left, left+idx, left_bbox); 372 373 BoundingBox right_bbox(bbox); 374 right_bbox[cutfeat].low = cutval; 375 node->child2 = divideTree(left+idx, right, right_bbox); 376 377 node->divlow = left_bbox[cutfeat].high; 378 node->divhigh = right_bbox[cutfeat].low; 379 380 for (size_t i=0; i<dim_; ++i) { 381 bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); 382 bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); 383 } 384 } 385 386 return node; 387 } 388 computeMinMax(int * ind,int count,int dim,ElementType & min_elem,ElementType & max_elem)389 void computeMinMax(int* ind, int count, int dim, ElementType& min_elem, ElementType& max_elem) 390 { 391 min_elem = dataset_[ind[0]][dim]; 392 max_elem = dataset_[ind[0]][dim]; 393 for (int i=1; i<count; ++i) { 394 ElementType val = dataset_[ind[i]][dim]; 395 if (val<min_elem) min_elem = val; 396 if (val>max_elem) max_elem = val; 397 } 398 } 399 middleSplit(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval,const BoundingBox & bbox)400 void middleSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox) 401 { 402 // find the largest span from the approximate bounding box 403 ElementType max_span = bbox[0].high-bbox[0].low; 404 cutfeat = 0; 405 cutval = (bbox[0].high+bbox[0].low)/2; 406 for (size_t i=1; i<dim_; ++i) { 407 ElementType span = bbox[i].high-bbox[i].low; 408 if (span>max_span) { 409 max_span = span; 410 cutfeat = i; 411 cutval = (bbox[i].high+bbox[i].low)/2; 412 } 413 } 414 415 // compute exact span on the found dimension 416 ElementType min_elem, max_elem; 417 computeMinMax(ind, count, cutfeat, min_elem, max_elem); 418 cutval = (min_elem+max_elem)/2; 419 max_span = max_elem - min_elem; 420 421 // check if a dimension of a largest span exists 422 size_t k = cutfeat; 423 for (size_t i=0; i<dim_; ++i) { 424 if (i==k) continue; 425 ElementType span = bbox[i].high-bbox[i].low; 426 if (span>max_span) { 427 computeMinMax(ind, count, i, min_elem, max_elem); 428 span = max_elem - min_elem; 429 if (span>max_span) { 430 max_span = span; 431 cutfeat = i; 432 cutval = (min_elem+max_elem)/2; 433 } 434 } 435 } 436 int lim1, lim2; 437 planeSplit(ind, count, cutfeat, cutval, lim1, lim2); 438 439 if (lim1>count/2) index = lim1; 440 else if (lim2<count/2) index = lim2; 441 else index = count/2; 442 } 443 444 middleSplit_(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval,const BoundingBox & bbox)445 void middleSplit_(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox) 446 { 447 const float EPS=0.00001f; 448 DistanceType max_span = bbox[0].high-bbox[0].low; 449 for (size_t i=1; i<dim_; ++i) { 450 DistanceType span = bbox[i].high-bbox[i].low; 451 if (span>max_span) { 452 max_span = span; 453 } 454 } 455 DistanceType max_spread = -1; 456 cutfeat = 0; 457 for (size_t i=0; i<dim_; ++i) { 458 DistanceType span = bbox[i].high-bbox[i].low; 459 if (span>(DistanceType)((1-EPS)*max_span)) { 460 ElementType min_elem, max_elem; 461 computeMinMax(ind, count, cutfeat, min_elem, max_elem); 462 DistanceType spread = (DistanceType)(max_elem-min_elem); 463 if (spread>max_spread) { 464 cutfeat = (int)i; 465 max_spread = spread; 466 } 467 } 468 } 469 // split in the middle 470 DistanceType split_val = (bbox[cutfeat].low+bbox[cutfeat].high)/2; 471 ElementType min_elem, max_elem; 472 computeMinMax(ind, count, cutfeat, min_elem, max_elem); 473 474 if (split_val<min_elem) cutval = (DistanceType)min_elem; 475 else if (split_val>max_elem) cutval = (DistanceType)max_elem; 476 else cutval = split_val; 477 478 int lim1, lim2; 479 planeSplit(ind, count, cutfeat, cutval, lim1, lim2); 480 481 if (lim1>count/2) index = lim1; 482 else if (lim2<count/2) index = lim2; 483 else index = count/2; 484 } 485 486 487 /** 488 * Subdivide the list of points by a plane perpendicular on axe corresponding 489 * to the 'cutfeat' dimension at 'cutval' position. 490 * 491 * On return: 492 * dataset[ind[0..lim1-1]][cutfeat]<cutval 493 * dataset[ind[lim1..lim2-1]][cutfeat]==cutval 494 * dataset[ind[lim2..count]][cutfeat]>cutval 495 */ planeSplit(int * ind,int count,int cutfeat,DistanceType cutval,int & lim1,int & lim2)496 void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2) 497 { 498 /* Move vector indices for left subtree to front of list. */ 499 int left = 0; 500 int right = count-1; 501 for (;; ) { 502 while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left; 503 while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right; 504 if (left>right) break; 505 std::swap(ind[left], ind[right]); ++left; --right; 506 } 507 /* If either list is empty, it means that all remaining features 508 * are identical. Split in the middle to maintain a balanced tree. 509 */ 510 lim1 = left; 511 right = count-1; 512 for (;; ) { 513 while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left; 514 while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right; 515 if (left>right) break; 516 std::swap(ind[left], ind[right]); ++left; --right; 517 } 518 lim2 = left; 519 } 520 computeInitialDistances(const ElementType * vec,std::vector<DistanceType> & dists)521 DistanceType computeInitialDistances(const ElementType* vec, std::vector<DistanceType>& dists) 522 { 523 DistanceType distsq = 0.0; 524 525 for (size_t i = 0; i < dim_; ++i) { 526 if (vec[i] < root_bbox_[i].low) { 527 dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].low, (int)i); 528 distsq += dists[i]; 529 } 530 if (vec[i] > root_bbox_[i].high) { 531 dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].high, (int)i); 532 distsq += dists[i]; 533 } 534 } 535 536 return distsq; 537 } 538 539 /** 540 * Performs an exact search in the tree starting from a node. 541 */ searchLevel(ResultSet<DistanceType> & result_set,const ElementType * vec,const NodePtr node,DistanceType mindistsq,std::vector<DistanceType> & dists,const float epsError)542 void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq, 543 std::vector<DistanceType>& dists, const float epsError) 544 { 545 /* If this is a leaf node, then do check and return. */ 546 if ((node->child1 == NULL)&&(node->child2 == NULL)) { 547 DistanceType worst_dist = result_set.worstDist(); 548 for (int i=node->left; i<node->right; ++i) { 549 int index = reorder_ ? i : vind_[i]; 550 DistanceType dist = distance_(vec, data_[index], dim_, worst_dist); 551 if (dist<worst_dist) { 552 result_set.addPoint(dist,vind_[i]); 553 } 554 } 555 return; 556 } 557 558 /* Which child branch should be taken first? */ 559 int idx = node->divfeat; 560 ElementType val = vec[idx]; 561 DistanceType diff1 = val - node->divlow; 562 DistanceType diff2 = val - node->divhigh; 563 564 NodePtr bestChild; 565 NodePtr otherChild; 566 DistanceType cut_dist; 567 if ((diff1+diff2)<0) { 568 bestChild = node->child1; 569 otherChild = node->child2; 570 cut_dist = distance_.accum_dist(val, node->divhigh, idx); 571 } 572 else { 573 bestChild = node->child2; 574 otherChild = node->child1; 575 cut_dist = distance_.accum_dist( val, node->divlow, idx); 576 } 577 578 /* Call recursively to search next level down. */ 579 searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError); 580 581 DistanceType dst = dists[idx]; 582 mindistsq = mindistsq + cut_dist - dst; 583 dists[idx] = cut_dist; 584 if (mindistsq*epsError<=result_set.worstDist()) { 585 searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError); 586 } 587 dists[idx] = dst; 588 } 589 590 private: 591 592 /** 593 * The dataset used by this index 594 */ 595 const Matrix<ElementType> dataset_; 596 597 IndexParams index_params_; 598 599 int leaf_max_size_; 600 bool reorder_; 601 602 603 /** 604 * Array of indices to vectors in the dataset. 605 */ 606 std::vector<int> vind_; 607 608 Matrix<ElementType> data_; 609 610 size_t size_; 611 size_t dim_; 612 613 /** 614 * Array of k-d trees used to find neighbours. 615 */ 616 NodePtr root_node_; 617 618 BoundingBox root_bbox_; 619 620 /** 621 * Pooled memory allocator. 622 * 623 * Using a pooled memory allocator is more efficient 624 * than allocating memory directly when there is a large 625 * number small of memory allocations. 626 */ 627 PooledAllocator pool_; 628 629 Distance distance_; 630 }; // class KDTree 631 632 } 633 634 #endif //OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ 635