1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Histogram is an object that aggregates statistics, and can summarize them in
6 // various forms, including ASCII graphical, HTML, and numerically (as a
7 // vector of numbers corresponding to each of the aggregating buckets).
8 
9 // It supports calls to accumulate either time intervals (which are processed
10 // as integral number of milliseconds), or arbitrary integral units.
11 
12 // For Histogram (exponential histogram), LinearHistogram and CustomHistogram,
13 // the minimum for a declared range is 1 (instead of 0), while the maximum is
14 // (HistogramBase::kSampleType_MAX - 1). However, there will always be underflow
15 // and overflow buckets added automatically, so a 0 bucket will always exist
16 // even when a minimum value of 1 is specified.
17 
18 // Each use of a histogram with the same name will reference the same underlying
19 // data, so it is safe to record to the same histogram from multiple locations
20 // in the code. It is a runtime error if all uses of the same histogram do not
21 // agree exactly in type, bucket size and range.
22 
23 // For Histogram and LinearHistogram, the maximum for a declared range should
24 // always be larger (not equal) than minimal range. Zero and
25 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
26 // so the smallest legal bucket_count is 3. However CustomHistogram can have
27 // bucket count as 2 (when you give a custom ranges vector containing only 1
28 // range).
29 // For these 3 kinds of histograms, the max bucket count is always
30 // (Histogram::kBucketCount_MAX - 1).
31 
32 // The buckets layout of class Histogram is exponential. For example, buckets
33 // might contain (sequentially) the count of values in the following intervals:
34 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
35 // That bucket allocation would actually result from construction of a histogram
36 // for values between 1 and 64, with 8 buckets, such as:
37 // Histogram count("some name", 1, 64, 8);
38 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
39 // are also counted by the constructor in the user supplied "bucket_count"
40 // argument.
41 // The above example has an exponential ratio of 2 (doubling the bucket width
42 // in each consecutive bucket).  The Histogram class automatically calculates
43 // the smallest ratio that it can use to construct the number of buckets
44 // selected in the constructor.  An another example, if you had 50 buckets,
45 // and millisecond time values from 1 to 10000, then the ratio between
46 // consecutive bucket widths will be approximately somewhere around the 50th
47 // root of 10000.  This approach provides very fine grain (narrow) buckets
48 // at the low end of the histogram scale, but allows the histogram to cover a
49 // gigantic range with the addition of very few buckets.
50 
51 // Usually we use macros to define and use a histogram, which are defined in
52 // base/metrics/histogram_macros.h. Note: Callers should include that header
53 // directly if they only access the histogram APIs through macros.
54 //
55 // Macros use a pattern involving a function static variable, that is a pointer
56 // to a histogram.  This static is explicitly initialized on any thread
57 // that detects a uninitialized (NULL) pointer.  The potentially racy
58 // initialization is not a problem as it is always set to point to the same
59 // value (i.e., the FactoryGet always returns the same value).  FactoryGet
60 // is also completely thread safe, which results in a completely thread safe,
61 // and relatively fast, set of counters.  To avoid races at shutdown, the static
62 // pointer is NOT deleted, and we leak the histograms at process termination.
63 
64 #ifndef BASE_METRICS_HISTOGRAM_H_
65 #define BASE_METRICS_HISTOGRAM_H_
66 
67 #include <stddef.h>
68 #include <stdint.h>
69 
70 #include <map>
71 #include <memory>
72 #include <string>
73 #include <vector>
74 
75 #include "base/base_export.h"
76 #include "base/compiler_specific.h"
77 #include "base/gtest_prod_util.h"
78 #include "base/logging.h"
79 #include "base/macros.h"
80 #include "base/metrics/bucket_ranges.h"
81 #include "base/metrics/histogram_base.h"
82 #include "base/metrics/histogram_samples.h"
83 #include "base/time/time.h"
84 
85 namespace base {
86 
87 class BooleanHistogram;
88 class CustomHistogram;
89 class Histogram;
90 class LinearHistogram;
91 class Pickle;
92 class PickleIterator;
93 class SampleVector;
94 
95 class BASE_EXPORT Histogram : public HistogramBase {
96  public:
97   // Initialize maximum number of buckets in histograms as 16,384.
98   static const uint32_t kBucketCount_MAX;
99 
100   typedef std::vector<Count> Counts;
101 
102   ~Histogram() override;
103 
104   //----------------------------------------------------------------------------
105   // For a valid histogram, input should follow these restrictions:
106   // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
107   //              normalized up to 1)
108   // maximum > minimum
109   // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
110   // Additionally,
111   // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
112   // more buckets than the range of numbers; having more buckets than 1 per
113   // value in the range would be nonsensical.
114   static HistogramBase* FactoryGet(const std::string& name,
115                                    Sample minimum,
116                                    Sample maximum,
117                                    uint32_t bucket_count,
118                                    int32_t flags);
119   static HistogramBase* FactoryTimeGet(const std::string& name,
120                                        base::TimeDelta minimum,
121                                        base::TimeDelta maximum,
122                                        uint32_t bucket_count,
123                                        int32_t flags);
124 
125   // Overloads of the above two functions that take a const char* |name| param,
126   // to avoid code bloat from the std::string constructor being inlined into
127   // call sites.
128   static HistogramBase* FactoryGet(const char* name,
129                                    Sample minimum,
130                                    Sample maximum,
131                                    uint32_t bucket_count,
132                                    int32_t flags);
133   static HistogramBase* FactoryTimeGet(const char* name,
134                                        base::TimeDelta minimum,
135                                        base::TimeDelta maximum,
136                                        uint32_t bucket_count,
137                                        int32_t flags);
138 
139   // Create a histogram using data in persistent storage.
140   static std::unique_ptr<HistogramBase> PersistentCreate(
141       const std::string& name,
142       Sample minimum,
143       Sample maximum,
144       const BucketRanges* ranges,
145       HistogramBase::AtomicCount* counts,
146       HistogramBase::AtomicCount* logged_counts,
147       uint32_t counts_size,
148       HistogramSamples::Metadata* meta,
149       HistogramSamples::Metadata* logged_meta);
150 
151   static void InitializeBucketRanges(Sample minimum,
152                                      Sample maximum,
153                                      BucketRanges* ranges);
154 
155   // This constant if for FindCorruption. Since snapshots of histograms are
156   // taken asynchronously relative to sampling, and our counting code currently
157   // does not prevent race conditions, it is pretty likely that we'll catch a
158   // redundant count that doesn't match the sample count.  We allow for a
159   // certain amount of slop before flagging this as an inconsistency. Even with
160   // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
161   // so we'll eventually get the data, if it was not the result of a corruption.
162   static const int kCommonRaceBasedCountMismatch;
163 
164   // Check to see if bucket ranges, counts and tallies in the snapshot are
165   // consistent with the bucket ranges and checksums in our histogram.  This can
166   // produce a false-alarm if a race occurred in the reading of the data during
167   // a SnapShot process, but should otherwise be false at all times (unless we
168   // have memory over-writes, or DRAM failures). Flag definitions are located
169   // under "enum Inconsistency" in base/metrics/histogram_base.h.
170   uint32_t FindCorruption(const HistogramSamples& samples) const override;
171 
172   //----------------------------------------------------------------------------
173   // Accessors for factory construction, serialization and testing.
174   //----------------------------------------------------------------------------
declared_min()175   Sample declared_min() const { return declared_min_; }
declared_max()176   Sample declared_max() const { return declared_max_; }
177   virtual Sample ranges(uint32_t i) const;
178   virtual uint32_t bucket_count() const;
bucket_ranges()179   const BucketRanges* bucket_ranges() const { return bucket_ranges_; }
180 
181   // This function validates histogram construction arguments. It returns false
182   // if some of the arguments are totally bad.
183   // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
184   // converts it to good input: 1.
185   // TODO(kaiwang): Be more restrict and return false for any bad input, and
186   // make this a readonly validating function.
187   static bool InspectConstructionArguments(const std::string& name,
188                                            Sample* minimum,
189                                            Sample* maximum,
190                                            uint32_t* bucket_count);
191 
192   // HistogramBase implementation:
193   uint64_t name_hash() const override;
194   HistogramType GetHistogramType() const override;
195   bool HasConstructionArguments(Sample expected_minimum,
196                                 Sample expected_maximum,
197                                 uint32_t expected_bucket_count) const override;
198   void Add(Sample value) override;
199   void AddCount(Sample value, int count) override;
200   std::unique_ptr<HistogramSamples> SnapshotSamples() const override;
201   std::unique_ptr<HistogramSamples> SnapshotDelta() override;
202   std::unique_ptr<HistogramSamples> SnapshotFinalDelta() const override;
203   void AddSamples(const HistogramSamples& samples) override;
204   bool AddSamplesFromPickle(base::PickleIterator* iter) override;
205   void WriteHTMLGraph(std::string* output) const override;
206   void WriteAscii(std::string* output) const override;
207 
208  protected:
209   // This class, defined entirely within the .cc file, contains all the
210   // common logic for building a Histogram and can be overridden by more
211   // specific types to alter details of how the creation is done. It is
212   // defined as an embedded class (rather than an anonymous one) so it
213   // can access the protected constructors.
214   class Factory;
215 
216   // |ranges| should contain the underflow and overflow buckets. See top
217   // comments for example.
218   Histogram(const std::string& name,
219             Sample minimum,
220             Sample maximum,
221             const BucketRanges* ranges);
222 
223   // Traditionally, histograms allocate their own memory for the bucket
224   // vector but "shared" histograms use memory regions allocated from a
225   // special memory segment that is passed in here.  It is assumed that
226   // the life of this memory is managed externally and exceeds the lifetime
227   // of this object. Practically, this memory is never released until the
228   // process exits and the OS cleans it up.
229   Histogram(const std::string& name,
230             Sample minimum,
231             Sample maximum,
232             const BucketRanges* ranges,
233             HistogramBase::AtomicCount* counts,
234             HistogramBase::AtomicCount* logged_counts,
235             uint32_t counts_size,
236             HistogramSamples::Metadata* meta,
237             HistogramSamples::Metadata* logged_meta);
238 
239   // HistogramBase implementation:
240   bool SerializeInfoImpl(base::Pickle* pickle) const override;
241 
242   // Method to override to skip the display of the i'th bucket if it's empty.
243   virtual bool PrintEmptyBucket(uint32_t index) const;
244 
245   // Get normalized size, relative to the ranges(i).
246   virtual double GetBucketSize(Count current, uint32_t i) const;
247 
248   // Return a string description of what goes in a given bucket.
249   // Most commonly this is the numeric value, but in derived classes it may
250   // be a name (or string description) given to the bucket.
251   virtual const std::string GetAsciiBucketRange(uint32_t it) const;
252 
253  private:
254   // Allow tests to corrupt our innards for testing purposes.
255   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
256   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
257   FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);
258 
259   friend class StatisticsRecorder;  // To allow it to delete duplicates.
260   friend class StatisticsRecorderTest;
261 
262   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
263       base::PickleIterator* iter);
264   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
265 
266   // Implementation of SnapshotSamples function.
267   std::unique_ptr<SampleVector> SnapshotSampleVector() const;
268 
269   //----------------------------------------------------------------------------
270   // Helpers for emitting Ascii graphic.  Each method appends data to output.
271 
272   void WriteAsciiImpl(bool graph_it,
273                       const std::string& newline,
274                       std::string* output) const;
275 
276   // Find out how large (graphically) the largest bucket will appear to be.
277   double GetPeakBucketSize(const SampleVector& samples) const;
278 
279   // Write a common header message describing this histogram.
280   void WriteAsciiHeader(const SampleVector& samples,
281                         Count sample_count,
282                         std::string* output) const;
283 
284   // Write information about previous, current, and next buckets.
285   // Information such as cumulative percentage, etc.
286   void WriteAsciiBucketContext(const int64_t past,
287                                const Count current,
288                                const int64_t remaining,
289                                const uint32_t i,
290                                std::string* output) const;
291 
292   // WriteJSON calls these.
293   void GetParameters(DictionaryValue* params) const override;
294 
295   void GetCountAndBucketData(Count* count,
296                              int64_t* sum,
297                              ListValue* buckets) const override;
298 
299   // Does not own this object. Should get from StatisticsRecorder.
300   const BucketRanges* bucket_ranges_;
301 
302   Sample declared_min_;  // Less than this goes into the first bucket.
303   Sample declared_max_;  // Over this goes into the last bucket.
304 
305   // Finally, provide the state that changes with the addition of each new
306   // sample.
307   std::unique_ptr<SampleVector> samples_;
308 
309   // Also keep a previous uploaded state for calculating deltas.
310   std::unique_ptr<HistogramSamples> logged_samples_;
311 
312   // Flag to indicate if PrepareFinalDelta has been previously called. It is
313   // used to DCHECK that a final delta is not created multiple times.
314   mutable bool final_delta_created_ = false;
315 
316   DISALLOW_COPY_AND_ASSIGN(Histogram);
317 };
318 
319 //------------------------------------------------------------------------------
320 
321 // LinearHistogram is a more traditional histogram, with evenly spaced
322 // buckets.
323 class BASE_EXPORT LinearHistogram : public Histogram {
324  public:
325   ~LinearHistogram() override;
326 
327   /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
328      default underflow bucket. */
329   static HistogramBase* FactoryGet(const std::string& name,
330                                    Sample minimum,
331                                    Sample maximum,
332                                    uint32_t bucket_count,
333                                    int32_t flags);
334   static HistogramBase* FactoryTimeGet(const std::string& name,
335                                        TimeDelta minimum,
336                                        TimeDelta maximum,
337                                        uint32_t bucket_count,
338                                        int32_t flags);
339 
340   // Overloads of the above two functions that take a const char* |name| param,
341   // to avoid code bloat from the std::string constructor being inlined into
342   // call sites.
343   static HistogramBase* FactoryGet(const char* name,
344                                    Sample minimum,
345                                    Sample maximum,
346                                    uint32_t bucket_count,
347                                    int32_t flags);
348   static HistogramBase* FactoryTimeGet(const char* name,
349                                        TimeDelta minimum,
350                                        TimeDelta maximum,
351                                        uint32_t bucket_count,
352                                        int32_t flags);
353 
354   // Create a histogram using data in persistent storage.
355   static std::unique_ptr<HistogramBase> PersistentCreate(
356       const std::string& name,
357       Sample minimum,
358       Sample maximum,
359       const BucketRanges* ranges,
360       HistogramBase::AtomicCount* counts,
361       HistogramBase::AtomicCount* logged_counts,
362       uint32_t counts_size,
363       HistogramSamples::Metadata* meta,
364       HistogramSamples::Metadata* logged_meta);
365 
366   struct DescriptionPair {
367     Sample sample;
368     const char* description;  // Null means end of a list of pairs.
369   };
370 
371   // Create a LinearHistogram and store a list of number/text values for use in
372   // writing the histogram graph.
373   // |descriptions| can be NULL, which means no special descriptions to set. If
374   // it's not NULL, the last element in the array must has a NULL in its
375   // "description" field.
376   static HistogramBase* FactoryGetWithRangeDescription(
377       const std::string& name,
378       Sample minimum,
379       Sample maximum,
380       uint32_t bucket_count,
381       int32_t flags,
382       const DescriptionPair descriptions[]);
383 
384   static void InitializeBucketRanges(Sample minimum,
385                                      Sample maximum,
386                                      BucketRanges* ranges);
387 
388   // Overridden from Histogram:
389   HistogramType GetHistogramType() const override;
390 
391  protected:
392   class Factory;
393 
394   LinearHistogram(const std::string& name,
395                   Sample minimum,
396                   Sample maximum,
397                   const BucketRanges* ranges);
398 
399   LinearHistogram(const std::string& name,
400                   Sample minimum,
401                   Sample maximum,
402                   const BucketRanges* ranges,
403                   HistogramBase::AtomicCount* counts,
404                   HistogramBase::AtomicCount* logged_counts,
405                   uint32_t counts_size,
406                   HistogramSamples::Metadata* meta,
407                   HistogramSamples::Metadata* logged_meta);
408 
409   double GetBucketSize(Count current, uint32_t i) const override;
410 
411   // If we have a description for a bucket, then return that.  Otherwise
412   // let parent class provide a (numeric) description.
413   const std::string GetAsciiBucketRange(uint32_t i) const override;
414 
415   // Skip printing of name for numeric range if we have a name (and if this is
416   // an empty bucket).
417   bool PrintEmptyBucket(uint32_t index) const override;
418 
419  private:
420   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
421       base::PickleIterator* iter);
422   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
423 
424   // For some ranges, we store a printable description of a bucket range.
425   // If there is no description, then GetAsciiBucketRange() uses parent class
426   // to provide a description.
427   typedef std::map<Sample, std::string> BucketDescriptionMap;
428   BucketDescriptionMap bucket_description_;
429 
430   DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
431 };
432 
433 //------------------------------------------------------------------------------
434 
435 // BooleanHistogram is a histogram for booleans.
436 class BASE_EXPORT BooleanHistogram : public LinearHistogram {
437  public:
438   static HistogramBase* FactoryGet(const std::string& name, int32_t flags);
439 
440   // Overload of the above function that takes a const char* |name| param,
441   // to avoid code bloat from the std::string constructor being inlined into
442   // call sites.
443   static HistogramBase* FactoryGet(const char* name, int32_t flags);
444 
445   // Create a histogram using data in persistent storage.
446   static std::unique_ptr<HistogramBase> PersistentCreate(
447       const std::string& name,
448       const BucketRanges* ranges,
449       HistogramBase::AtomicCount* counts,
450       HistogramBase::AtomicCount* logged_counts,
451       HistogramSamples::Metadata* meta,
452       HistogramSamples::Metadata* logged_meta);
453 
454   HistogramType GetHistogramType() const override;
455 
456  protected:
457   class Factory;
458 
459  private:
460   BooleanHistogram(const std::string& name, const BucketRanges* ranges);
461   BooleanHistogram(const std::string& name,
462                    const BucketRanges* ranges,
463                    HistogramBase::AtomicCount* counts,
464                    HistogramBase::AtomicCount* logged_counts,
465                    HistogramSamples::Metadata* meta,
466                    HistogramSamples::Metadata* logged_meta);
467 
468   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
469       base::PickleIterator* iter);
470   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
471 
472   DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
473 };
474 
475 //------------------------------------------------------------------------------
476 
477 // CustomHistogram is a histogram for a set of custom integers.
478 class BASE_EXPORT CustomHistogram : public Histogram {
479  public:
480   // |custom_ranges| contains a vector of limits on ranges. Each limit should be
481   // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
482   // compatibility). The limits can be unordered or contain duplication, but
483   // client should not depend on this.
484   static HistogramBase* FactoryGet(const std::string& name,
485                                    const std::vector<Sample>& custom_ranges,
486                                    int32_t flags);
487 
488   // Overload of the above function that takes a const char* |name| param,
489   // to avoid code bloat from the std::string constructor being inlined into
490   // call sites.
491   static HistogramBase* FactoryGet(const char* name,
492                                    const std::vector<Sample>& custom_ranges,
493                                    int32_t flags);
494 
495   // Create a histogram using data in persistent storage.
496   static std::unique_ptr<HistogramBase> PersistentCreate(
497       const std::string& name,
498       const BucketRanges* ranges,
499       HistogramBase::AtomicCount* counts,
500       HistogramBase::AtomicCount* logged_counts,
501       uint32_t counts_size,
502       HistogramSamples::Metadata* meta,
503       HistogramSamples::Metadata* logged_meta);
504 
505   // Overridden from Histogram:
506   HistogramType GetHistogramType() const override;
507 
508   // Helper method for transforming an array of valid enumeration values
509   // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
510   // This function ensures that a guard bucket exists right after any
511   // valid sample value (unless the next higher sample is also a valid value),
512   // so that invalid samples never fall into the same bucket as valid samples.
513   // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
514   static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
515                                                  uint32_t num_values);
516  protected:
517   class Factory;
518 
519   CustomHistogram(const std::string& name,
520                   const BucketRanges* ranges);
521 
522   CustomHistogram(const std::string& name,
523                   const BucketRanges* ranges,
524                   HistogramBase::AtomicCount* counts,
525                   HistogramBase::AtomicCount* logged_counts,
526                   uint32_t counts_size,
527                   HistogramSamples::Metadata* meta,
528                   HistogramSamples::Metadata* logged_meta);
529 
530   // HistogramBase implementation:
531   bool SerializeInfoImpl(base::Pickle* pickle) const override;
532 
533   double GetBucketSize(Count current, uint32_t i) const override;
534 
535  private:
536   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
537       base::PickleIterator* iter);
538   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
539 
540   static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);
541 
542   DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
543 };
544 
545 }  // namespace base
546 
547 #endif  // BASE_METRICS_HISTOGRAM_H_
548