1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This simple class finds the top n elements of an incrementally provided set
17 // of elements which you push one at a time.  If the number of elements exceeds
18 // n, the lowest elements are incrementally dropped.  At the end you get
19 // a vector of the top elements sorted in descending order (through Extract() or
20 // ExtractNondestructive()), or a vector of the top elements but not sorted
21 // (through ExtractUnsorted() or ExtractUnsortedNondestructive()).
22 //
23 // The value n is specified in the constructor.  If there are p elements pushed
24 // altogether:
25 //   The total storage requirements are O(min(n, p)) elements
26 //   The running time is O(p * log(min(n, p))) comparisons
27 // If n is a constant, the total storage required is a constant and the running
28 // time is linear in p.
29 //
30 // NOTE(zhifengc): There is a way to do this in O(min(n, p)) storage and O(p)
31 // runtime. The basic idea is to repeatedly fill up a buffer of 2 * n elements,
32 // discarding the lowest n elements whenever the buffer is full using a linear-
33 // time median algorithm. This may have better performance when the input
34 // sequence is partially sorted.
35 //
36 // NOTE(zhifengc): This class should be redesigned to avoid reallocating a
37 // vector for each Extract.
38 
39 // Copied from tensorflow/core/lib/gtl/top_n.h
40 // TODO(b/111524997): Remove this file.
41 #ifndef TENSORFLOW_LITE_EXPERIMENTAL_KERNELS_TOP_N_H_
42 #define TENSORFLOW_LITE_EXPERIMENTAL_KERNELS_TOP_N_H_
43 
44 #include <stddef.h>
45 #include <algorithm>
46 #include <functional>
47 #include <string>
48 #include <vector>
49 
50 #include "tensorflow/lite/kernels/internal/compatibility.h"
51 
52 namespace tflite {
53 namespace gtl {
54 
55 // Cmp is an stl binary predicate.  Note that Cmp is the "greater" predicate,
56 // not the more commonly used "less" predicate.
57 //
58 // If you use a "less" predicate here, the TopN will pick out the bottom N
59 // elements out of the ones passed to it, and it will return them sorted in
60 // ascending order.
61 //
62 // TopN is rule-of-zero copyable and movable if its members are.
63 template <class T, class Cmp = std::greater<T> >
64 class TopN {
65  public:
66   // The TopN is in one of the three states:
67   //
68   //  o UNORDERED: this is the state an instance is originally in,
69   //    where the elements are completely orderless.
70   //
71   //  o BOTTOM_KNOWN: in this state, we keep the invariant that there
72   //    is at least one element in it, and the lowest element is at
73   //    position 0. The elements in other positions remain
74   //    unsorted. This state is reached if the state was originally
75   //    UNORDERED and a peek_bottom() function call is invoked.
76   //
77   //  o HEAP_SORTED: in this state, the array is kept as a heap and
78   //    there are exactly (limit_+1) elements in the array. This
79   //    state is reached when at least (limit_+1) elements are
80   //    pushed in.
81   //
82   //  The state transition graph is at follows:
83   //
84   //             peek_bottom()                (limit_+1) elements
85   //  UNORDERED --------------> BOTTOM_KNOWN --------------------> HEAP_SORTED
86   //      |                                                           ^
87   //      |                      (limit_+1) elements                  |
88   //      +-----------------------------------------------------------+
89 
90   enum State { UNORDERED, BOTTOM_KNOWN, HEAP_SORTED };
91   using UnsortedIterator = typename std::vector<T>::const_iterator;
92 
93   // 'limit' is the maximum number of top results to return.
TopN(size_t limit)94   explicit TopN(size_t limit) : TopN(limit, Cmp()) {}
TopN(size_t limit,const Cmp & cmp)95   TopN(size_t limit, const Cmp &cmp) : limit_(limit), cmp_(cmp) {}
96 
limit()97   size_t limit() const { return limit_; }
98 
99   // Number of elements currently held by this TopN object.  This
100   // will be no greater than 'limit' passed to the constructor.
size()101   size_t size() const { return std::min(elements_.size(), limit_); }
102 
empty()103   bool empty() const { return size() == 0; }
104 
105   // If you know how many elements you will push at the time you create the
106   // TopN object, you can call reserve to preallocate the memory that TopN
107   // will need to process all 'n' pushes.  Calling this method is optional.
reserve(size_t n)108   void reserve(size_t n) { elements_.reserve(std::min(n, limit_ + 1)); }
109 
110   // Push 'v'.  If the maximum number of elements was exceeded, drop the
111   // lowest element and return it in 'dropped' (if given). If the maximum is not
112   // exceeded, 'dropped' will remain unchanged. 'dropped' may be omitted or
113   // nullptr, in which case it is not filled in.
114   // Requires: T is CopyAssignable, Swappable
push(const T & v)115   void push(const T &v) { push(v, nullptr); }
push(const T & v,T * dropped)116   void push(const T &v, T *dropped) { PushInternal(v, dropped); }
117 
118   // Move overloads of push.
119   // Requires: T is MoveAssignable, Swappable
push(T && v)120   void push(T &&v) {  // NOLINT(build/c++11)
121     push(std::move(v), nullptr);
122   }
push(T && v,T * dropped)123   void push(T &&v, T *dropped) {  // NOLINT(build/c++11)
124     PushInternal(std::move(v), dropped);
125   }
126 
127   // Peeks the bottom result without calling Extract()
128   const T &peek_bottom();
129 
130   // Extract the elements as a vector sorted in descending order.  The caller
131   // assumes ownership of the vector and must delete it when done.  This is a
132   // destructive operation.  The only method that can be called immediately
133   // after Extract() is Reset().
134   std::vector<T> *Extract();
135 
136   // Similar to Extract(), but makes no guarantees the elements are in sorted
137   // order.  As with Extract(), the caller assumes ownership of the vector and
138   // must delete it when done.  This is a destructive operation.  The only
139   // method that can be called immediately after ExtractUnsorted() is Reset().
140   std::vector<T> *ExtractUnsorted();
141 
142   // A non-destructive version of Extract(). Copy the elements in a new vector
143   // sorted in descending order and return it.  The caller assumes ownership of
144   // the new vector and must delete it when done.  After calling
145   // ExtractNondestructive(), the caller can continue to push() new elements.
146   std::vector<T> *ExtractNondestructive() const;
147 
148   // A non-destructive version of Extract(). Copy the elements to a given
149   // vector sorted in descending order. After calling
150   // ExtractNondestructive(), the caller can continue to push() new elements.
151   // Note:
152   //  1. The given argument must to be allocated.
153   //  2. Any data contained in the vector prior to the call will be deleted
154   //     from it. After the call the vector will contain only the elements
155   //     from the data structure.
156   void ExtractNondestructive(std::vector<T> *output) const;
157 
158   // A non-destructive version of ExtractUnsorted(). Copy the elements in a new
159   // vector and return it, with no guarantees the elements are in sorted order.
160   // The caller assumes ownership of the new vector and must delete it when
161   // done.  After calling ExtractUnsortedNondestructive(), the caller can
162   // continue to push() new elements.
163   std::vector<T> *ExtractUnsortedNondestructive() const;
164 
165   // A non-destructive version of ExtractUnsorted(). Copy the elements into
166   // a given vector, with no guarantees the elements are in sorted order.
167   // After calling ExtractUnsortedNondestructive(), the caller can continue
168   // to push() new elements.
169   // Note:
170   //  1. The given argument must to be allocated.
171   //  2. Any data contained in the vector prior to the call will be deleted
172   //     from it. After the call the vector will contain only the elements
173   //     from the data structure.
174   void ExtractUnsortedNondestructive(std::vector<T> *output) const;
175 
176   // Return an iterator to the beginning (end) of the container,
177   // with no guarantees about the order of iteration. These iterators are
178   // invalidated by mutation of the data structure.
unsorted_begin()179   UnsortedIterator unsorted_begin() const { return elements_.begin(); }
unsorted_end()180   UnsortedIterator unsorted_end() const { return elements_.begin() + size(); }
181 
182   // Accessor for comparator template argument.
comparator()183   Cmp *comparator() { return &cmp_; }
184 
185   // This removes all elements.  If Extract() or ExtractUnsorted() have been
186   // called, this will put it back in an empty but useable state.
187   void Reset();
188 
189  private:
190   template <typename U>
191   void PushInternal(U &&v, T *dropped);  // NOLINT(build/c++11)
192 
193   // elements_ can be in one of two states:
194   //   elements_.size() <= limit_:  elements_ is an unsorted vector of elements
195   //      pushed so far.
196   //   elements_.size() > limit_:  The last element of elements_ is unused;
197   //      the other elements of elements_ are an stl heap whose size is exactly
198   //      limit_.  In this case elements_.size() is exactly one greater than
199   //      limit_, but don't use "elements_.size() == limit_ + 1" to check for
200   //      that because you'll get a false positive if limit_ == size_t(-1).
201   std::vector<T> elements_;
202   size_t limit_;  // Maximum number of elements to find
203   Cmp cmp_;       // Greater-than comparison function
204   State state_ = UNORDERED;
205 };
206 
207 // ----------------------------------------------------------------------
208 // Implementations of non-inline functions
209 
210 template <class T, class Cmp>
211 template <typename U>
PushInternal(U && v,T * dropped)212 void TopN<T, Cmp>::PushInternal(U &&v, T *dropped) {  // NOLINT(build/c++11)
213   if (limit_ == 0) {
214     if (dropped) *dropped = std::forward<U>(v);  // NOLINT(build/c++11)
215     return;
216   }
217   if (state_ != HEAP_SORTED) {
218     elements_.push_back(std::forward<U>(v));  // NOLINT(build/c++11)
219     if (state_ == UNORDERED || cmp_(elements_.back(), elements_.front())) {
220       // Easy case: we just pushed the new element back
221     } else {
222       // To maintain the BOTTOM_KNOWN state, we need to make sure that
223       // the element at position 0 is always the smallest. So we put
224       // the new element at position 0 and push the original bottom
225       // element in the back.
226       // Warning: this code is subtle.
227       using std::swap;
228       swap(elements_.front(), elements_.back());
229     }
230     if (elements_.size() == limit_ + 1) {
231       // Transition from unsorted vector to a heap.
232       std::make_heap(elements_.begin(), elements_.end(), cmp_);
233       if (dropped) *dropped = std::move(elements_.front());
234       std::pop_heap(elements_.begin(), elements_.end(), cmp_);
235       state_ = HEAP_SORTED;
236     }
237   } else {
238     // Only insert the new element if it is greater than the least element.
239     if (cmp_(v, elements_.front())) {
240       elements_.back() = std::forward<U>(v);  // NOLINT(build/c++11)
241       std::push_heap(elements_.begin(), elements_.end(), cmp_);
242       if (dropped) *dropped = std::move(elements_.front());
243       std::pop_heap(elements_.begin(), elements_.end(), cmp_);
244     } else {
245       if (dropped) *dropped = std::forward<U>(v);  // NOLINT(build/c++11)
246     }
247   }
248 }
249 
250 template <class T, class Cmp>
peek_bottom()251 const T &TopN<T, Cmp>::peek_bottom() {
252   TFLITE_DCHECK(!empty());
253   if (state_ == UNORDERED) {
254     // We need to do a linear scan to find out the bottom element
255     int min_candidate = 0;
256     for (size_t i = 1; i < elements_.size(); ++i) {
257       if (cmp_(elements_[min_candidate], elements_[i])) {
258         min_candidate = i;
259       }
260     }
261     // By swapping the element at position 0 and the minimal
262     // element, we transition to the BOTTOM_KNOWN state
263     if (min_candidate != 0) {
264       using std::swap;
265       swap(elements_[0], elements_[min_candidate]);
266     }
267     state_ = BOTTOM_KNOWN;
268   }
269   return elements_.front();
270 }
271 
272 template <class T, class Cmp>
Extract()273 std::vector<T> *TopN<T, Cmp>::Extract() {
274   auto out = new std::vector<T>;
275   out->swap(elements_);
276   if (state_ != HEAP_SORTED) {
277     std::sort(out->begin(), out->end(), cmp_);
278   } else {
279     out->pop_back();
280     std::sort_heap(out->begin(), out->end(), cmp_);
281   }
282   return out;
283 }
284 
285 template <class T, class Cmp>
ExtractUnsorted()286 std::vector<T> *TopN<T, Cmp>::ExtractUnsorted() {
287   auto out = new std::vector<T>;
288   out->swap(elements_);
289   if (state_ == HEAP_SORTED) {
290     // Remove the limit_+1'th element.
291     out->pop_back();
292   }
293   return out;
294 }
295 
296 template <class T, class Cmp>
ExtractNondestructive()297 std::vector<T> *TopN<T, Cmp>::ExtractNondestructive() const {
298   auto out = new std::vector<T>;
299   ExtractNondestructive(out);
300   return out;
301 }
302 
303 template <class T, class Cmp>
ExtractNondestructive(std::vector<T> * output)304 void TopN<T, Cmp>::ExtractNondestructive(std::vector<T> *output) const {
305   TFLITE_DCHECK(output);
306   *output = elements_;
307   if (state_ != HEAP_SORTED) {
308     std::sort(output->begin(), output->end(), cmp_);
309   } else {
310     output->pop_back();
311     std::sort_heap(output->begin(), output->end(), cmp_);
312   }
313 }
314 
315 template <class T, class Cmp>
ExtractUnsortedNondestructive()316 std::vector<T> *TopN<T, Cmp>::ExtractUnsortedNondestructive() const {
317   auto elements = new std::vector<T>;
318   ExtractUnsortedNondestructive(elements);
319   return elements;
320 }
321 
322 template <class T, class Cmp>
ExtractUnsortedNondestructive(std::vector<T> * output)323 void TopN<T, Cmp>::ExtractUnsortedNondestructive(std::vector<T> *output) const {
324   TFLITE_DCHECK(output);
325   *output = elements_;
326   if (state_ == HEAP_SORTED) {
327     // Remove the limit_+1'th element.
328     output->pop_back();
329   }
330 }
331 
332 template <class T, class Cmp>
Reset()333 void TopN<T, Cmp>::Reset() {
334   elements_.clear();
335   state_ = UNORDERED;
336 }
337 
338 }  // namespace gtl
339 }  // namespace tflite
340 
341 #endif  // TENSORFLOW_LITE_EXPERIMENTAL_KERNELS_TOP_N_H_
342