1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
17 #define TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
18 
19 // clang-format off
20 // Required for IS_MOBILE_PLATFORM
21 #include "tensorflow/core/platform/platform.h"
22 // clang-format on
23 
24 // We replace this implementation with a null implementation for mobile
25 // platforms.
26 #ifdef IS_MOBILE_PLATFORM
27 #define TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H  // prevent accidental use
28                                                        // of
29 // mobile_percentile_sampler.h
30 #include "tensorflow/core/lib/monitoring/mobile_percentile_sampler.h"
31 #undef TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H
32 #else
33 
34 #include <cmath>
35 #include <map>
36 
37 #include "tensorflow/core/lib/core/status.h"
38 #include "tensorflow/core/lib/monitoring/collection_registry.h"
39 #include "tensorflow/core/lib/monitoring/metric_def.h"
40 #include "tensorflow/core/lib/monitoring/types.h"
41 #include "tensorflow/core/platform/macros.h"
42 #include "tensorflow/core/platform/mutex.h"
43 #include "tensorflow/core/platform/thread_annotations.h"
44 
45 namespace tensorflow {
46 namespace monitoring {
47 
48 // PercentileSamplerCell stores each value of an PercentileSampler.
49 // The class uses a circular buffer to maintain a window of samples.
50 //
51 // This class is thread-safe.
52 class PercentileSamplerCell {
53  public:
PercentileSamplerCell(UnitOfMeasure unit_of_measure,std::vector<double> percentiles,size_t max_samples)54   PercentileSamplerCell(UnitOfMeasure unit_of_measure,
55                         std::vector<double> percentiles, size_t max_samples)
56       : unit_of_measure_(unit_of_measure),
57         percentiles_(std::move(percentiles)),
58         samples_(max_samples),
59         num_samples_(0),
60         next_position_(0),
61         total_samples_(0),
62         accumulator_(0.0) {}
63 
64   // Atomically adds a sample.
65   void Add(double sample);
66 
67   Percentiles value() const;
68 
69  private:
70   struct Sample {
71     bool operator<(const Sample& rhs) const { return value < rhs.value; }
72 
73     uint64 nstime = 0;
74     double value = NAN;
75   };
76 
77   std::vector<Sample> GetSamples(size_t* total_samples,
78                                  long double* accumulator) const;
79 
80   mutable mutex mu_;
81   UnitOfMeasure unit_of_measure_;
82   const std::vector<double> percentiles_;
83   std::vector<Sample> samples_ TF_GUARDED_BY(mu_);
84   size_t num_samples_ TF_GUARDED_BY(mu_);
85   size_t next_position_ TF_GUARDED_BY(mu_);
86   size_t total_samples_ TF_GUARDED_BY(mu_);
87   long double accumulator_ TF_GUARDED_BY(mu_);
88 
89   TF_DISALLOW_COPY_AND_ASSIGN(PercentileSamplerCell);
90 };
91 
92 // A stateful class for updating a cumulative percentile sampled metric.
93 //
94 // This class stores, in each cell, up to max_samples values in a circular
95 // buffer, and returns the percentiles information as cell value.
96 //
97 // PercentileSampler allocates storage and maintains a cell for each value. You
98 // can retrieve an individual cell using a label-tuple and update it separately.
99 // This improves performance since operations related to retrieval, like
100 // map-indexing and locking, are avoided.
101 //
102 // This class is thread-safe.
103 template <int NumLabels>
104 class PercentileSampler {
105  public:
~PercentileSampler()106   ~PercentileSampler() {
107     // Deleted here, before the metric_def is destroyed.
108     registration_handle_.reset();
109   }
110 
111   // Creates the metric based on the metric-definition arguments and buckets.
112   //
113   // Example;
114   // auto* sampler_with_label =
115   // PercentileSampler<1>::New({"/tensorflow/sampler",
116   //   "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024,
117   //   UnitOfMeasure::kTime);
118   static PercentileSampler* New(
119       const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
120           metric_def,
121       std::vector<double> percentiles, size_t max_samples,
122       UnitOfMeasure unit_of_measure);
123 
124   // Retrieves the cell for the specified labels, creating it on demand if
125   // not already present.
126   template <typename... Labels>
127   PercentileSamplerCell* GetCell(const Labels&... labels)
128       TF_LOCKS_EXCLUDED(mu_);
129 
GetStatus()130   Status GetStatus() { return status_; }
131 
132  private:
133   friend class PercentileSamplerCell;
134 
PercentileSampler(const MetricDef<MetricKind::kCumulative,Percentiles,NumLabels> & metric_def,std::vector<double> percentiles,size_t max_samples,UnitOfMeasure unit_of_measure)135   PercentileSampler(const MetricDef<MetricKind::kCumulative, Percentiles,
136                                     NumLabels>& metric_def,
137                     std::vector<double> percentiles, size_t max_samples,
138                     UnitOfMeasure unit_of_measure)
139       : metric_def_(metric_def),
140         unit_of_measure_(unit_of_measure),
141         percentiles_(std::move(percentiles)),
142         max_samples_(max_samples),
143         registration_handle_(CollectionRegistry::Default()->Register(
144             &metric_def_, [&](MetricCollectorGetter getter) {
145               auto metric_collector = getter.Get(&metric_def_);
146               mutex_lock l(mu_);
147               for (const auto& cell : cells_) {
148                 metric_collector.CollectValue(cell.first, cell.second.value());
149               }
150             })) {
151     if (registration_handle_) {
152       for (size_t i = 0; i < percentiles_.size(); ++i) {
153         if (percentiles_[i] < 0.0 || percentiles_[i] > 100.0) {
154           status_ = Status(tensorflow::error::Code::INVALID_ARGUMENT,
155                            "Percentile values must be in [0, 100] range.");
156           break;
157         }
158         if (i + 1 < percentiles_.size() &&
159             percentiles_[i] >= percentiles_[i + 1]) {
160           status_ =
161               Status(tensorflow::error::Code::INVALID_ARGUMENT,
162                      "Percentile values must be in strictly ascending order.");
163           break;
164         }
165       }
166     } else {
167       status_ = Status(tensorflow::error::Code::ALREADY_EXISTS,
168                        "Another metric with the same name already exists.");
169     }
170   }
171 
172   mutable mutex mu_;
173 
174   Status status_;
175 
176   // The metric definition. This will be used to identify the metric when we
177   // register it for collection.
178   const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels> metric_def_;
179 
180   UnitOfMeasure unit_of_measure_ = UnitOfMeasure::kNumber;
181 
182   // The percentiles samples required for this metric.
183   const std::vector<double> percentiles_;
184 
185   // The maximum size of the samples colected by the PercentileSamplerCell cell.
186   const size_t max_samples_ = 0;
187 
188   // Registration handle with the CollectionRegistry.
189   std::unique_ptr<CollectionRegistry::RegistrationHandle> registration_handle_;
190 
191   using LabelArray = std::array<string, NumLabels>;
192   // we need a container here that guarantees pointer stability of the value,
193   // namely, the pointer of the value should remain valid even after more cells
194   // are inserted.
195   std::map<LabelArray, PercentileSamplerCell> cells_ TF_GUARDED_BY(mu_);
196 
197   TF_DISALLOW_COPY_AND_ASSIGN(PercentileSampler);
198 };
199 
200 template <int NumLabels>
New(const MetricDef<MetricKind::kCumulative,Percentiles,NumLabels> & metric_def,std::vector<double> percentiles,size_t max_samples,UnitOfMeasure unit_of_measure)201 PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
202     const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
203         metric_def,
204     std::vector<double> percentiles, size_t max_samples,
205     UnitOfMeasure unit_of_measure) {
206   return new PercentileSampler<NumLabels>(metric_def, std::move(percentiles),
207                                           max_samples, unit_of_measure);
208 }
209 
210 template <int NumLabels>
211 template <typename... Labels>
GetCell(const Labels &...labels)212 PercentileSamplerCell* PercentileSampler<NumLabels>::GetCell(
213     const Labels&... labels) TF_LOCKS_EXCLUDED(mu_) {
214   // Provides a more informative error message than the one during array
215   // construction below.
216   static_assert(
217       sizeof...(Labels) == NumLabels,
218       "Mismatch between PercentileSampler<NumLabels> and number of labels "
219       "provided in GetCell(...).");
220 
221   const LabelArray& label_array = {{labels...}};
222   mutex_lock l(mu_);
223   const auto found_it = cells_.find(label_array);
224   if (found_it != cells_.end()) {
225     return &(found_it->second);
226   }
227   return &(cells_
228                .emplace(std::piecewise_construct,
229                         std::forward_as_tuple(label_array),
230                         std::forward_as_tuple(unit_of_measure_, percentiles_,
231                                               max_samples_))
232                .first->second);
233 }
234 
235 }  // namespace monitoring
236 }  // namespace tensorflow
237 
238 #endif  // IS_MOBILE_PLATFORM
239 #endif  // TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
240