1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_LIB_MONITORING_COLLECTION_REGISTRY_H_
17 #define TENSORFLOW_CORE_LIB_MONITORING_COLLECTION_REGISTRY_H_
18 
19 #include <map>
20 #include <memory>
21 
22 #include "tensorflow/core/framework/summary.pb.h"
23 #include "tensorflow/core/lib/monitoring/collected_metrics.h"
24 #include "tensorflow/core/lib/monitoring/metric_def.h"
25 #include "tensorflow/core/lib/monitoring/types.h"
26 #include "tensorflow/core/platform/env.h"
27 #include "tensorflow/core/platform/logging.h"
28 #include "tensorflow/core/platform/macros.h"
29 #include "tensorflow/core/platform/mutex.h"
30 #include "tensorflow/core/platform/stringpiece.h"
31 #include "tensorflow/core/platform/thread_annotations.h"
32 #include "tensorflow/core/platform/types.h"
33 
34 namespace tensorflow {
35 namespace monitoring {
36 
37 namespace test_util {
38 class CollectionRegistryTestAccess;
39 }  // namespace test_util
40 
41 namespace internal {
42 class Collector;
43 }  // namespace internal
44 
45 // Metric implementations would get an instance of this class using the
46 // MetricCollectorGetter in the collection-function lambda, so that their values
47 // can be collected.
48 //
49 // Read the documentation on CollectionRegistry::Register() for more details.
50 //
51 // For example:
52 //   auto metric_collector = metric_collector_getter->Get(&metric_def);
53 //   metric_collector.CollectValue(some_labels, some_value);
54 //   metric_collector.CollectValue(others_labels, other_value);
55 //
56 // This class is NOT thread-safe.
57 template <MetricKind metric_kind, typename Value, int NumLabels>
58 class MetricCollector {
59  public:
60   ~MetricCollector() = default;
61 
62   // Collects the value with these labels.
63   void CollectValue(const std::array<string, NumLabels>& labels, Value value);
64 
65  private:
66   friend class internal::Collector;
67 
MetricCollector(const MetricDef<metric_kind,Value,NumLabels> * const metric_def,const uint64 registration_time_millis,internal::Collector * const collector,PointSet * const point_set)68   MetricCollector(
69       const MetricDef<metric_kind, Value, NumLabels>* const metric_def,
70       const uint64 registration_time_millis,
71       internal::Collector* const collector, PointSet* const point_set)
72       : metric_def_(metric_def),
73         registration_time_millis_(registration_time_millis),
74         collector_(collector),
75         point_set_(point_set) {
76     point_set_->metric_name = string(metric_def->name());
77   }
78 
79   const MetricDef<metric_kind, Value, NumLabels>* const metric_def_;
80   const uint64 registration_time_millis_;
81   internal::Collector* const collector_;
82   PointSet* const point_set_;
83 
84   // This is made copyable because we can't hand out references of this class
85   // from MetricCollectorGetter because this class is templatized, and we need
86   // MetricCollectorGetter not to be templatized and hence MetricCollectorGetter
87   // can't own an instance of this class.
88 };
89 
90 // Returns a MetricCollector with the same template parameters as the
91 // metric-definition, so that the values of a metric can be collected.
92 //
93 // The collection-function defined by a metric takes this as a parameter.
94 //
95 // Read the documentation on CollectionRegistry::Register() for more details.
96 class MetricCollectorGetter {
97  public:
98   // Returns the MetricCollector with the same template parameters as the
99   // metric_def.
100   template <MetricKind metric_kind, typename Value, int NumLabels>
101   MetricCollector<metric_kind, Value, NumLabels> Get(
102       const MetricDef<metric_kind, Value, NumLabels>* const metric_def);
103 
104  private:
105   friend class internal::Collector;
106 
MetricCollectorGetter(internal::Collector * const collector,const AbstractMetricDef * const allowed_metric_def,const uint64 registration_time_millis)107   MetricCollectorGetter(internal::Collector* const collector,
108                         const AbstractMetricDef* const allowed_metric_def,
109                         const uint64 registration_time_millis)
110       : collector_(collector),
111         allowed_metric_def_(allowed_metric_def),
112         registration_time_millis_(registration_time_millis) {}
113 
114   internal::Collector* const collector_;
115   const AbstractMetricDef* const allowed_metric_def_;
116   const uint64 registration_time_millis_;
117 };
118 
119 // A collection registry for metrics.
120 //
121 // Metrics are registered here so that their state can be collected later and
122 // exported.
123 //
124 // This class is thread-safe.
125 class CollectionRegistry {
126  public:
127   ~CollectionRegistry() = default;
128 
129   // Returns the default registry for the process.
130   //
131   // This registry belongs to this library and should never be deleted.
132   static CollectionRegistry* Default();
133 
134   using CollectionFunction = std::function<void(MetricCollectorGetter getter)>;
135 
136   // Registers the metric and the collection-function which can be used to
137   // collect its values. Returns a Registration object, which when upon
138   // destruction would cause the metric to be unregistered from this registry.
139   //
140   // IMPORTANT: Delete the handle before the metric-def is deleted.
141   //
142   // Example usage;
143   // CollectionRegistry::Default()->Register(
144   //   &metric_def,
145   //   [&](MetricCollectorGetter getter) {
146   //     auto metric_collector = getter.Get(&metric_def);
147   //     for (const auto& cell : cells) {
148   //       metric_collector.CollectValue(cell.labels(), cell.value());
149   //     }
150   //   });
151   class RegistrationHandle;
152   std::unique_ptr<RegistrationHandle> Register(
153       const AbstractMetricDef* metric_def,
154       const CollectionFunction& collection_function)
155       TF_LOCKS_EXCLUDED(mu_) TF_MUST_USE_RESULT;
156 
157   // Options for collecting metrics.
158   struct CollectMetricsOptions {
CollectMetricsOptionsCollectMetricsOptions159     CollectMetricsOptions() {}
160     bool collect_metric_descriptors = true;
161   };
162   // Goes through all the registered metrics, collects their definitions
163   // (optionally) and current values and returns them in a standard format.
164   std::unique_ptr<CollectedMetrics> CollectMetrics(
165       const CollectMetricsOptions& options) const;
166 
167  private:
168   friend class test_util::CollectionRegistryTestAccess;
169   friend class internal::Collector;
170 
171   CollectionRegistry(Env* env);
172 
173   // Unregisters the metric from this registry. This is private because the
174   // public interface provides a Registration handle which automatically calls
175   // this upon destruction.
176   void Unregister(const AbstractMetricDef* metric_def) TF_LOCKS_EXCLUDED(mu_);
177 
178   // TF environment, mainly used for timestamping.
179   Env* const env_;
180 
181   mutable mutex mu_;
182 
183   // Information required for collection.
184   struct CollectionInfo {
185     const AbstractMetricDef* const metric_def;
186     CollectionFunction collection_function;
187     uint64 registration_time_millis;
188   };
189   std::map<StringPiece, CollectionInfo> registry_ TF_GUARDED_BY(mu_);
190 
191   TF_DISALLOW_COPY_AND_ASSIGN(CollectionRegistry);
192 };
193 
194 ////
195 // Implementation details follow. API readers may skip.
196 ////
197 
198 class CollectionRegistry::RegistrationHandle {
199  public:
RegistrationHandle(CollectionRegistry * const export_registry,const AbstractMetricDef * const metric_def)200   RegistrationHandle(CollectionRegistry* const export_registry,
201                      const AbstractMetricDef* const metric_def)
202       : export_registry_(export_registry), metric_def_(metric_def) {}
203 
~RegistrationHandle()204   ~RegistrationHandle() { export_registry_->Unregister(metric_def_); }
205 
206  private:
207   CollectionRegistry* const export_registry_;
208   const AbstractMetricDef* const metric_def_;
209 };
210 
211 namespace internal {
212 
213 template <typename Value>
214 void CollectValue(Value value, Point* point);
215 
216 template <>
CollectValue(int64 value,Point * const point)217 inline void CollectValue(int64 value, Point* const point) {
218   point->value_type = ValueType::kInt64;
219   point->int64_value = value;
220 }
221 
222 template <>
CollectValue(string value,Point * const point)223 inline void CollectValue(string value, Point* const point) {
224   point->value_type = ValueType::kString;
225   point->string_value = std::move(value);
226 }
227 
228 template <>
CollectValue(bool value,Point * const point)229 inline void CollectValue(bool value, Point* const point) {
230   point->value_type = ValueType::kBool;
231   point->bool_value = value;
232 }
233 
234 template <>
CollectValue(HistogramProto value,Point * const point)235 inline void CollectValue(HistogramProto value, Point* const point) {
236   point->value_type = ValueType::kHistogram;
237   // This is inefficient. If and when we hit snags, we can change the API to do
238   // this more efficiently.
239   point->histogram_value = std::move(value);
240 }
241 
242 template <>
CollectValue(Percentiles value,Point * const point)243 inline void CollectValue(Percentiles value, Point* const point) {
244   point->value_type = ValueType::kPercentiles;
245   point->percentiles_value = std::move(value);
246 }
247 
248 // Used by the CollectionRegistry class to collect all the values of all the
249 // metrics in the registry. This is an implementation detail of the
250 // CollectionRegistry class, please do not depend on this.
251 //
252 // This cannot be a private nested class because we need to forward declare this
253 // so that the MetricCollector and MetricCollectorGetter classes can be friends
254 // with it.
255 //
256 // This class is thread-safe.
257 class Collector {
258  public:
Collector(const uint64 collection_time_millis)259   Collector(const uint64 collection_time_millis)
260       : collected_metrics_(new CollectedMetrics()),
261         collection_time_millis_(collection_time_millis) {}
262 
263   template <MetricKind metric_kind, typename Value, int NumLabels>
GetMetricCollector(const MetricDef<metric_kind,Value,NumLabels> * const metric_def,const uint64 registration_time_millis,internal::Collector * const collector)264   MetricCollector<metric_kind, Value, NumLabels> GetMetricCollector(
265       const MetricDef<metric_kind, Value, NumLabels>* const metric_def,
266       const uint64 registration_time_millis,
267       internal::Collector* const collector) TF_LOCKS_EXCLUDED(mu_) {
268     auto* const point_set = [&]() {
269       mutex_lock l(mu_);
270       return collected_metrics_->point_set_map
271           .insert(std::make_pair(string(metric_def->name()),
272                                  std::unique_ptr<PointSet>(new PointSet())))
273           .first->second.get();
274     }();
275     return MetricCollector<metric_kind, Value, NumLabels>(
276         metric_def, registration_time_millis, collector, point_set);
277   }
278 
collection_time_millis()279   uint64 collection_time_millis() const { return collection_time_millis_; }
280 
281   void CollectMetricDescriptor(const AbstractMetricDef* const metric_def)
282       TF_LOCKS_EXCLUDED(mu_);
283 
284   void CollectMetricValues(
285       const CollectionRegistry::CollectionInfo& collection_info);
286 
287   std::unique_ptr<CollectedMetrics> ConsumeCollectedMetrics()
288       TF_LOCKS_EXCLUDED(mu_);
289 
290  private:
291   mutable mutex mu_;
292   std::unique_ptr<CollectedMetrics> collected_metrics_ TF_GUARDED_BY(mu_);
293   const uint64 collection_time_millis_;
294 
295   TF_DISALLOW_COPY_AND_ASSIGN(Collector);
296 };
297 
298 // Write the timestamps for the point based on the MetricKind.
299 //
300 // Gauge metrics will have start and end timestamps set to the collection time.
301 //
302 // Cumulative metrics will have the start timestamp set to the time when the
303 // collection function was registered, while the end timestamp will be set to
304 // the collection time.
305 template <MetricKind kind>
306 void WriteTimestamps(const uint64 registration_time_millis,
307                      const uint64 collection_time_millis, Point* const point);
308 
309 template <>
310 inline void WriteTimestamps<MetricKind::kGauge>(
311     const uint64 registration_time_millis, const uint64 collection_time_millis,
312     Point* const point) {
313   point->start_timestamp_millis = collection_time_millis;
314   point->end_timestamp_millis = collection_time_millis;
315 }
316 
317 template <>
318 inline void WriteTimestamps<MetricKind::kCumulative>(
319     const uint64 registration_time_millis, const uint64 collection_time_millis,
320     Point* const point) {
321   point->start_timestamp_millis = registration_time_millis;
322   // There's a chance that the clock goes backwards on the same machine, so we
323   // protect ourselves against that.
324   point->end_timestamp_millis =
325       registration_time_millis < collection_time_millis
326           ? collection_time_millis
327           : registration_time_millis;
328 }
329 
330 }  // namespace internal
331 
332 template <MetricKind metric_kind, typename Value, int NumLabels>
CollectValue(const std::array<string,NumLabels> & labels,Value value)333 void MetricCollector<metric_kind, Value, NumLabels>::CollectValue(
334     const std::array<string, NumLabels>& labels, Value value) {
335   point_set_->points.emplace_back(new Point());
336   auto* const point = point_set_->points.back().get();
337   const std::vector<string> label_descriptions =
338       metric_def_->label_descriptions();
339   point->labels.reserve(NumLabels);
340   for (int i = 0; i < NumLabels; ++i) {
341     point->labels.push_back({});
342     auto* const label = &point->labels.back();
343     label->name = label_descriptions[i];
344     label->value = labels[i];
345   }
346   internal::CollectValue(std::move(value), point);
347   internal::WriteTimestamps<metric_kind>(
348       registration_time_millis_, collector_->collection_time_millis(), point);
349 }
350 
351 template <MetricKind metric_kind, typename Value, int NumLabels>
Get(const MetricDef<metric_kind,Value,NumLabels> * const metric_def)352 MetricCollector<metric_kind, Value, NumLabels> MetricCollectorGetter::Get(
353     const MetricDef<metric_kind, Value, NumLabels>* const metric_def) {
354   if (allowed_metric_def_ != metric_def) {
355     LOG(FATAL) << "Expected collection for: " << allowed_metric_def_->name()
356                << " but instead got: " << metric_def->name();
357   }
358 
359   return collector_->GetMetricCollector(metric_def, registration_time_millis_,
360                                         collector_);
361 }
362 
363 class Exporter {
364  public:
~Exporter()365   virtual ~Exporter() {}
366   virtual void PeriodicallyExportMetrics() = 0;
367   virtual void ExportMetrics() = 0;
368 };
369 
370 namespace exporter_registration {
371 
372 class ExporterRegistration {
373  public:
ExporterRegistration(Exporter * exporter)374   explicit ExporterRegistration(Exporter* exporter) : exporter_(exporter) {
375     exporter_->PeriodicallyExportMetrics();
376   }
377 
378  private:
379   Exporter* exporter_;
380 };
381 
382 }  // namespace exporter_registration
383 
384 #define REGISTER_TF_METRICS_EXPORTER(exporter) \
385   REGISTER_TF_METRICS_EXPORTER_UNIQ_HELPER(__COUNTER__, exporter)
386 
387 #define REGISTER_TF_METRICS_EXPORTER_UNIQ_HELPER(ctr, exporter) \
388   REGISTER_TF_METRICS_EXPORTER_UNIQ(ctr, exporter)
389 
390 #define REGISTER_TF_METRICS_EXPORTER_UNIQ(ctr, exporter)                       \
391   static ::tensorflow::monitoring::exporter_registration::ExporterRegistration \
392       exporter_registration_##ctr(new exporter())
393 
394 }  // namespace monitoring
395 }  // namespace tensorflow
396 
397 #endif  // TENSORFLOW_CORE_LIB_MONITORING_COLLECTION_REGISTRY_H_
398