1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef ART_RUNTIME_BASE_HISTOGRAM_H_
17 #define ART_RUNTIME_BASE_HISTOGRAM_H_
18 
19 #include <vector>
20 #include <string>
21 
22 #include "base/logging.h"
23 #include "utils.h"
24 
25 namespace art {
26 
27 // Creates a data histogram  for a better understanding of statistical data.
28 // Histogram analysis goes beyond simple mean and standard deviation to provide
29 // percentiles values, describing where the $% of the input data lies.
30 // Designed to be simple and used with timing logger in art.
31 
32 template <class Value> class Histogram {
33   const double kAdjust;
34   const size_t kInitialBucketCount;
35 
36  public:
37   class CumulativeData {
38     friend class Histogram<Value>;
39     std::vector<uint64_t> freq_;
40     std::vector<double> perc_;
41   };
42 
43   // Used by the cumulative timing logger to search the histogram set using for an existing split
44   // with the same name using CumulativeLogger::HistogramComparator.
45   explicit Histogram(const char* name);
46   // This is the expected constructor when creating new Histograms.
47   Histogram(const char* name, Value initial_bucket_width, size_t max_buckets = 100);
48   void AddValue(Value);
49   // Builds the cumulative distribution function from the frequency data.
50   // Accumulative summation of frequencies.
51   // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
52   // Accumulative summation of percentiles; which is the frequency / SampleSize
53   // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
54   void CreateHistogram(CumulativeData* data) const;
55   // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
56   void Reset();
57   double Mean() const;
58   double Variance() const;
59   double Percentile(double per, const CumulativeData& data) const;
60   void PrintConfidenceIntervals(std::ostream& os, double interval,
61                                 const CumulativeData& data) const;
62   void PrintBins(std::ostream& os, const CumulativeData& data) const;
63   Value GetRange(size_t bucket_idx) const;
64   size_t GetBucketCount() const;
65 
SampleSize()66   uint64_t SampleSize() const {
67     return sample_size_;
68   }
69 
Sum()70   Value Sum() const {
71     return sum_;
72   }
73 
AdjustedSum()74   Value AdjustedSum() const {
75     return sum_ * kAdjust;
76   }
77 
Min()78   Value Min() const {
79     return min_value_added_;
80   }
81 
Max()82   Value Max() const {
83     return max_value_added_;
84   }
85 
Name()86   const std::string& Name() const {
87     return name_;
88   }
89 
90  private:
91   void Initialize();
92   size_t FindBucket(Value val) const;
93   void BucketiseValue(Value val);
94   // Add more buckets to the histogram to fill in a new value that exceeded
95   // the max_read_value_.
96   void GrowBuckets(Value val);
97   std::string name_;
98   // Maximum number of buckets.
99   const size_t max_buckets_;
100   // Number of samples placed in histogram.
101   size_t sample_size_;
102   // Width of the bucket range. The lower the value is the more accurate
103   // histogram percentiles are. Grows adaptively when we hit max buckets.
104   Value bucket_width_;
105   // How many occurrences of values fall within a bucket at index i where i covers the range
106   // starting at  min_ + i * bucket_width_ with size bucket_size_.
107   std::vector<uint32_t> frequency_;
108   // Summation of all the elements inputed by the user.
109   Value sum_;
110   // Minimum value that can fit in the histogram. Fixed to zero for now.
111   Value min_;
112   // Maximum value that can fit in the histogram, grows adaptively.
113   Value max_;
114   // Summation of the values entered. Used to calculate variance.
115   Value sum_of_squares_;
116   // Maximum value entered in the histogram.
117   Value min_value_added_;
118   // Minimum value entered in the histogram.
119   Value max_value_added_;
120 
121   DISALLOW_COPY_AND_ASSIGN(Histogram);
122 };
123 }  // namespace art
124 
125 #endif  // ART_RUNTIME_BASE_HISTOGRAM_H_
126