1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 #ifndef OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
32 #define OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
33 
34 #include <algorithm>
35 #include <map>
36 #include <cassert>
37 #include <cstring>
38 
39 #include "general.h"
40 #include "nn_index.h"
41 #include "matrix.h"
42 #include "result_set.h"
43 #include "heap.h"
44 #include "allocator.h"
45 #include "random.h"
46 #include "saving.h"
47 
48 namespace cvflann
49 {
50 
51 struct KDTreeSingleIndexParams : public IndexParams
52 {
53     KDTreeSingleIndexParams(int leaf_max_size = 10, bool reorder = true, int dim = -1)
54     {
55         (*this)["algorithm"] = FLANN_INDEX_KDTREE_SINGLE;
56         (*this)["leaf_max_size"] = leaf_max_size;
57         (*this)["reorder"] = reorder;
58         (*this)["dim"] = dim;
59     }
60 };
61 
62 
63 /**
64  * Randomized kd-tree index
65  *
66  * Contains the k-d trees and other information for indexing a set of points
67  * for nearest-neighbor matching.
68  */
69 template <typename Distance>
70 class KDTreeSingleIndex : public NNIndex<Distance>
71 {
72 public:
73     typedef typename Distance::ElementType ElementType;
74     typedef typename Distance::ResultType DistanceType;
75 
76 
77     /**
78      * KDTree constructor
79      *
80      * Params:
81      *          inputData = dataset with the input features
82      *          params = parameters passed to the kdtree algorithm
83      */
84     KDTreeSingleIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KDTreeSingleIndexParams(),
85                       Distance d = Distance() ) :
dataset_(inputData)86         dataset_(inputData), index_params_(params), distance_(d)
87     {
88         size_ = dataset_.rows;
89         dim_ = dataset_.cols;
90         int dim_param = get_param(params,"dim",-1);
91         if (dim_param>0) dim_ = dim_param;
92         leaf_max_size_ = get_param(params,"leaf_max_size",10);
93         reorder_ = get_param(params,"reorder",true);
94 
95         // Create a permutable array of indices to the input vectors.
96         vind_.resize(size_);
97         for (size_t i = 0; i < size_; i++) {
98             vind_[i] = (int)i;
99         }
100     }
101 
102     KDTreeSingleIndex(const KDTreeSingleIndex&);
103     KDTreeSingleIndex& operator=(const KDTreeSingleIndex&);
104 
105     /**
106      * Standard destructor
107      */
~KDTreeSingleIndex()108     ~KDTreeSingleIndex()
109     {
110         if (reorder_) delete[] data_.data;
111     }
112 
113     /**
114      * Builds the index
115      */
buildIndex()116     void buildIndex()
117     {
118         computeBoundingBox(root_bbox_);
119         root_node_ = divideTree(0, (int)size_, root_bbox_ );   // construct the tree
120 
121         if (reorder_) {
122             delete[] data_.data;
123             data_ = cvflann::Matrix<ElementType>(new ElementType[size_*dim_], size_, dim_);
124             for (size_t i=0; i<size_; ++i) {
125                 for (size_t j=0; j<dim_; ++j) {
126                     data_[i][j] = dataset_[vind_[i]][j];
127                 }
128             }
129         }
130         else {
131             data_ = dataset_;
132         }
133     }
134 
getType()135     flann_algorithm_t getType() const
136     {
137         return FLANN_INDEX_KDTREE_SINGLE;
138     }
139 
140 
saveIndex(FILE * stream)141     void saveIndex(FILE* stream)
142     {
143         save_value(stream, size_);
144         save_value(stream, dim_);
145         save_value(stream, root_bbox_);
146         save_value(stream, reorder_);
147         save_value(stream, leaf_max_size_);
148         save_value(stream, vind_);
149         if (reorder_) {
150             save_value(stream, data_);
151         }
152         save_tree(stream, root_node_);
153     }
154 
155 
loadIndex(FILE * stream)156     void loadIndex(FILE* stream)
157     {
158         load_value(stream, size_);
159         load_value(stream, dim_);
160         load_value(stream, root_bbox_);
161         load_value(stream, reorder_);
162         load_value(stream, leaf_max_size_);
163         load_value(stream, vind_);
164         if (reorder_) {
165             load_value(stream, data_);
166         }
167         else {
168             data_ = dataset_;
169         }
170         load_tree(stream, root_node_);
171 
172 
173         index_params_["algorithm"] = getType();
174         index_params_["leaf_max_size"] = leaf_max_size_;
175         index_params_["reorder"] = reorder_;
176     }
177 
178     /**
179      *  Returns size of index.
180      */
size()181     size_t size() const
182     {
183         return size_;
184     }
185 
186     /**
187      * Returns the length of an index feature.
188      */
veclen()189     size_t veclen() const
190     {
191         return dim_;
192     }
193 
194     /**
195      * Computes the inde memory usage
196      * Returns: memory used by the index
197      */
usedMemory()198     int usedMemory() const
199     {
200         return (int)(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int));  // pool memory and vind array memory
201     }
202 
203 
204     /**
205      * \brief Perform k-nearest neighbor search
206      * \param[in] queries The query points for which to find the nearest neighbors
207      * \param[out] indices The indices of the nearest neighbors found
208      * \param[out] dists Distances to the nearest neighbors found
209      * \param[in] knn Number of nearest neighbors to return
210      * \param[in] params Search parameters
211      */
knnSearch(const Matrix<ElementType> & queries,Matrix<int> & indices,Matrix<DistanceType> & dists,int knn,const SearchParams & params)212     void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params)
213     {
214         assert(queries.cols == veclen());
215         assert(indices.rows >= queries.rows);
216         assert(dists.rows >= queries.rows);
217         assert(int(indices.cols) >= knn);
218         assert(int(dists.cols) >= knn);
219 
220         KNNSimpleResultSet<DistanceType> resultSet(knn);
221         for (size_t i = 0; i < queries.rows; i++) {
222             resultSet.init(indices[i], dists[i]);
223             findNeighbors(resultSet, queries[i], params);
224         }
225     }
226 
getParameters()227     IndexParams getParameters() const
228     {
229         return index_params_;
230     }
231 
232     /**
233      * Find set of nearest neighbors to vec. Their indices are stored inside
234      * the result object.
235      *
236      * Params:
237      *     result = the result object in which the indices of the nearest-neighbors are stored
238      *     vec = the vector for which to search the nearest neighbors
239      *     maxCheck = the maximum number of restarts (in a best-bin-first manner)
240      */
findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)241     void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
242     {
243         float epsError = 1+get_param(searchParams,"eps",0.0f);
244 
245         std::vector<DistanceType> dists(dim_,0);
246         DistanceType distsq = computeInitialDistances(vec, dists);
247         searchLevel(result, vec, root_node_, distsq, dists, epsError);
248     }
249 
250 private:
251 
252 
253     /*--------------------- Internal Data Structures --------------------------*/
254     struct Node
255     {
256         /**
257          * Indices of points in leaf node
258          */
259         int left, right;
260         /**
261          * Dimension used for subdivision.
262          */
263         int divfeat;
264         /**
265          * The values used for subdivision.
266          */
267         DistanceType divlow, divhigh;
268         /**
269          * The child nodes.
270          */
271         Node* child1, * child2;
272     };
273     typedef Node* NodePtr;
274 
275 
276     struct Interval
277     {
278         DistanceType low, high;
279     };
280 
281     typedef std::vector<Interval> BoundingBox;
282 
283     typedef BranchStruct<NodePtr, DistanceType> BranchSt;
284     typedef BranchSt* Branch;
285 
286 
287 
288 
save_tree(FILE * stream,NodePtr tree)289     void save_tree(FILE* stream, NodePtr tree)
290     {
291         save_value(stream, *tree);
292         if (tree->child1!=NULL) {
293             save_tree(stream, tree->child1);
294         }
295         if (tree->child2!=NULL) {
296             save_tree(stream, tree->child2);
297         }
298     }
299 
300 
load_tree(FILE * stream,NodePtr & tree)301     void load_tree(FILE* stream, NodePtr& tree)
302     {
303         tree = pool_.allocate<Node>();
304         load_value(stream, *tree);
305         if (tree->child1!=NULL) {
306             load_tree(stream, tree->child1);
307         }
308         if (tree->child2!=NULL) {
309             load_tree(stream, tree->child2);
310         }
311     }
312 
313 
computeBoundingBox(BoundingBox & bbox)314     void computeBoundingBox(BoundingBox& bbox)
315     {
316         bbox.resize(dim_);
317         for (size_t i=0; i<dim_; ++i) {
318             bbox[i].low = (DistanceType)dataset_[0][i];
319             bbox[i].high = (DistanceType)dataset_[0][i];
320         }
321         for (size_t k=1; k<dataset_.rows; ++k) {
322             for (size_t i=0; i<dim_; ++i) {
323                 if (dataset_[k][i]<bbox[i].low) bbox[i].low = (DistanceType)dataset_[k][i];
324                 if (dataset_[k][i]>bbox[i].high) bbox[i].high = (DistanceType)dataset_[k][i];
325             }
326         }
327     }
328 
329 
330     /**
331      * Create a tree node that subdivides the list of vecs from vind[first]
332      * to vind[last].  The routine is called recursively on each sublist.
333      * Place a pointer to this new tree node in the location pTree.
334      *
335      * Params: pTree = the new node to create
336      *                  first = index of the first vector
337      *                  last = index of the last vector
338      */
divideTree(int left,int right,BoundingBox & bbox)339     NodePtr divideTree(int left, int right, BoundingBox& bbox)
340     {
341         NodePtr node = pool_.allocate<Node>(); // allocate memory
342 
343         /* If too few exemplars remain, then make this a leaf node. */
344         if ( (right-left) <= leaf_max_size_) {
345             node->child1 = node->child2 = NULL;    /* Mark as leaf node. */
346             node->left = left;
347             node->right = right;
348 
349             // compute bounding-box of leaf points
350             for (size_t i=0; i<dim_; ++i) {
351                 bbox[i].low = (DistanceType)dataset_[vind_[left]][i];
352                 bbox[i].high = (DistanceType)dataset_[vind_[left]][i];
353             }
354             for (int k=left+1; k<right; ++k) {
355                 for (size_t i=0; i<dim_; ++i) {
356                     if (bbox[i].low>dataset_[vind_[k]][i]) bbox[i].low=(DistanceType)dataset_[vind_[k]][i];
357                     if (bbox[i].high<dataset_[vind_[k]][i]) bbox[i].high=(DistanceType)dataset_[vind_[k]][i];
358                 }
359             }
360         }
361         else {
362             int idx;
363             int cutfeat;
364             DistanceType cutval;
365             middleSplit_(&vind_[0]+left, right-left, idx, cutfeat, cutval, bbox);
366 
367             node->divfeat = cutfeat;
368 
369             BoundingBox left_bbox(bbox);
370             left_bbox[cutfeat].high = cutval;
371             node->child1 = divideTree(left, left+idx, left_bbox);
372 
373             BoundingBox right_bbox(bbox);
374             right_bbox[cutfeat].low = cutval;
375             node->child2 = divideTree(left+idx, right, right_bbox);
376 
377             node->divlow = left_bbox[cutfeat].high;
378             node->divhigh = right_bbox[cutfeat].low;
379 
380             for (size_t i=0; i<dim_; ++i) {
381                 bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low);
382                 bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
383             }
384         }
385 
386         return node;
387     }
388 
computeMinMax(int * ind,int count,int dim,ElementType & min_elem,ElementType & max_elem)389     void computeMinMax(int* ind, int count, int dim, ElementType& min_elem, ElementType& max_elem)
390     {
391         min_elem = dataset_[ind[0]][dim];
392         max_elem = dataset_[ind[0]][dim];
393         for (int i=1; i<count; ++i) {
394             ElementType val = dataset_[ind[i]][dim];
395             if (val<min_elem) min_elem = val;
396             if (val>max_elem) max_elem = val;
397         }
398     }
399 
middleSplit(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval,const BoundingBox & bbox)400     void middleSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
401     {
402         // find the largest span from the approximate bounding box
403         ElementType max_span = bbox[0].high-bbox[0].low;
404         cutfeat = 0;
405         cutval = (bbox[0].high+bbox[0].low)/2;
406         for (size_t i=1; i<dim_; ++i) {
407             ElementType span = bbox[i].high-bbox[i].low;
408             if (span>max_span) {
409                 max_span = span;
410                 cutfeat = i;
411                 cutval = (bbox[i].high+bbox[i].low)/2;
412             }
413         }
414 
415         // compute exact span on the found dimension
416         ElementType min_elem, max_elem;
417         computeMinMax(ind, count, cutfeat, min_elem, max_elem);
418         cutval = (min_elem+max_elem)/2;
419         max_span = max_elem - min_elem;
420 
421         // check if a dimension of a largest span exists
422         size_t k = cutfeat;
423         for (size_t i=0; i<dim_; ++i) {
424             if (i==k) continue;
425             ElementType span = bbox[i].high-bbox[i].low;
426             if (span>max_span) {
427                 computeMinMax(ind, count, i, min_elem, max_elem);
428                 span = max_elem - min_elem;
429                 if (span>max_span) {
430                     max_span = span;
431                     cutfeat = i;
432                     cutval = (min_elem+max_elem)/2;
433                 }
434             }
435         }
436         int lim1, lim2;
437         planeSplit(ind, count, cutfeat, cutval, lim1, lim2);
438 
439         if (lim1>count/2) index = lim1;
440         else if (lim2<count/2) index = lim2;
441         else index = count/2;
442     }
443 
444 
middleSplit_(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval,const BoundingBox & bbox)445     void middleSplit_(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
446     {
447         const float EPS=0.00001f;
448         DistanceType max_span = bbox[0].high-bbox[0].low;
449         for (size_t i=1; i<dim_; ++i) {
450             DistanceType span = bbox[i].high-bbox[i].low;
451             if (span>max_span) {
452                 max_span = span;
453             }
454         }
455         DistanceType max_spread = -1;
456         cutfeat = 0;
457         for (size_t i=0; i<dim_; ++i) {
458             DistanceType span = bbox[i].high-bbox[i].low;
459             if (span>(DistanceType)((1-EPS)*max_span)) {
460                 ElementType min_elem, max_elem;
461                 computeMinMax(ind, count, cutfeat, min_elem, max_elem);
462                 DistanceType spread = (DistanceType)(max_elem-min_elem);
463                 if (spread>max_spread) {
464                     cutfeat = (int)i;
465                     max_spread = spread;
466                 }
467             }
468         }
469         // split in the middle
470         DistanceType split_val = (bbox[cutfeat].low+bbox[cutfeat].high)/2;
471         ElementType min_elem, max_elem;
472         computeMinMax(ind, count, cutfeat, min_elem, max_elem);
473 
474         if (split_val<min_elem) cutval = (DistanceType)min_elem;
475         else if (split_val>max_elem) cutval = (DistanceType)max_elem;
476         else cutval = split_val;
477 
478         int lim1, lim2;
479         planeSplit(ind, count, cutfeat, cutval, lim1, lim2);
480 
481         if (lim1>count/2) index = lim1;
482         else if (lim2<count/2) index = lim2;
483         else index = count/2;
484     }
485 
486 
487     /**
488      *  Subdivide the list of points by a plane perpendicular on axe corresponding
489      *  to the 'cutfeat' dimension at 'cutval' position.
490      *
491      *  On return:
492      *  dataset[ind[0..lim1-1]][cutfeat]<cutval
493      *  dataset[ind[lim1..lim2-1]][cutfeat]==cutval
494      *  dataset[ind[lim2..count]][cutfeat]>cutval
495      */
planeSplit(int * ind,int count,int cutfeat,DistanceType cutval,int & lim1,int & lim2)496     void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2)
497     {
498         /* Move vector indices for left subtree to front of list. */
499         int left = 0;
500         int right = count-1;
501         for (;; ) {
502             while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left;
503             while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right;
504             if (left>right) break;
505             std::swap(ind[left], ind[right]); ++left; --right;
506         }
507         /* If either list is empty, it means that all remaining features
508          * are identical. Split in the middle to maintain a balanced tree.
509          */
510         lim1 = left;
511         right = count-1;
512         for (;; ) {
513             while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left;
514             while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right;
515             if (left>right) break;
516             std::swap(ind[left], ind[right]); ++left; --right;
517         }
518         lim2 = left;
519     }
520 
computeInitialDistances(const ElementType * vec,std::vector<DistanceType> & dists)521     DistanceType computeInitialDistances(const ElementType* vec, std::vector<DistanceType>& dists)
522     {
523         DistanceType distsq = 0.0;
524 
525         for (size_t i = 0; i < dim_; ++i) {
526             if (vec[i] < root_bbox_[i].low) {
527                 dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].low, (int)i);
528                 distsq += dists[i];
529             }
530             if (vec[i] > root_bbox_[i].high) {
531                 dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].high, (int)i);
532                 distsq += dists[i];
533             }
534         }
535 
536         return distsq;
537     }
538 
539     /**
540      * Performs an exact search in the tree starting from a node.
541      */
searchLevel(ResultSet<DistanceType> & result_set,const ElementType * vec,const NodePtr node,DistanceType mindistsq,std::vector<DistanceType> & dists,const float epsError)542     void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq,
543                      std::vector<DistanceType>& dists, const float epsError)
544     {
545         /* If this is a leaf node, then do check and return. */
546         if ((node->child1 == NULL)&&(node->child2 == NULL)) {
547             DistanceType worst_dist = result_set.worstDist();
548             for (int i=node->left; i<node->right; ++i) {
549                 int index = reorder_ ? i : vind_[i];
550                 DistanceType dist = distance_(vec, data_[index], dim_, worst_dist);
551                 if (dist<worst_dist) {
552                     result_set.addPoint(dist,vind_[i]);
553                 }
554             }
555             return;
556         }
557 
558         /* Which child branch should be taken first? */
559         int idx = node->divfeat;
560         ElementType val = vec[idx];
561         DistanceType diff1 = val - node->divlow;
562         DistanceType diff2 = val - node->divhigh;
563 
564         NodePtr bestChild;
565         NodePtr otherChild;
566         DistanceType cut_dist;
567         if ((diff1+diff2)<0) {
568             bestChild = node->child1;
569             otherChild = node->child2;
570             cut_dist = distance_.accum_dist(val, node->divhigh, idx);
571         }
572         else {
573             bestChild = node->child2;
574             otherChild = node->child1;
575             cut_dist = distance_.accum_dist( val, node->divlow, idx);
576         }
577 
578         /* Call recursively to search next level down. */
579         searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError);
580 
581         DistanceType dst = dists[idx];
582         mindistsq = mindistsq + cut_dist - dst;
583         dists[idx] = cut_dist;
584         if (mindistsq*epsError<=result_set.worstDist()) {
585             searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError);
586         }
587         dists[idx] = dst;
588     }
589 
590 private:
591 
592     /**
593      * The dataset used by this index
594      */
595     const Matrix<ElementType> dataset_;
596 
597     IndexParams index_params_;
598 
599     int leaf_max_size_;
600     bool reorder_;
601 
602 
603     /**
604      *  Array of indices to vectors in the dataset.
605      */
606     std::vector<int> vind_;
607 
608     Matrix<ElementType> data_;
609 
610     size_t size_;
611     size_t dim_;
612 
613     /**
614      * Array of k-d trees used to find neighbours.
615      */
616     NodePtr root_node_;
617 
618     BoundingBox root_bbox_;
619 
620     /**
621      * Pooled memory allocator.
622      *
623      * Using a pooled memory allocator is more efficient
624      * than allocating memory directly when there is a large
625      * number small of memory allocations.
626      */
627     PooledAllocator pool_;
628 
629     Distance distance_;
630 };   // class KDTree
631 
632 }
633 
634 #endif //OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
635