1 //===-- Clustering.h --------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// Utilities to compute benchmark result clusters.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
16 #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
17 
18 #include "BenchmarkResult.h"
19 #include "llvm/Support/Error.h"
20 #include <vector>
21 
22 namespace exegesis {
23 
24 class InstructionBenchmarkClustering {
25 public:
26   // Clusters `Points` using DBSCAN with the given parameters. See the cc file
27   // for more explanations on the algorithm.
28   static llvm::Expected<InstructionBenchmarkClustering>
29   create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
30          double Epsilon);
31 
32   class ClusterId {
33   public:
noise()34     static ClusterId noise() { return ClusterId(kNoise); }
error()35     static ClusterId error() { return ClusterId(kError); }
makeValid(size_t Id)36     static ClusterId makeValid(size_t Id) { return ClusterId(Id); }
ClusterId()37     ClusterId() : Id_(kUndef) {}
38     bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
39     bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
40 
isValid()41     bool isValid() const { return Id_ <= kMaxValid; }
isUndef()42     bool isUndef() const { return Id_ == kUndef; }
isNoise()43     bool isNoise() const { return Id_ == kNoise; }
isError()44     bool isError() const { return Id_ == kError; }
45 
46     // Precondition: isValid().
getId()47     size_t getId() const {
48       assert(isValid());
49       return Id_;
50     }
51 
52   private:
ClusterId(size_t Id)53     explicit ClusterId(size_t Id) : Id_(Id) {}
54     static constexpr const size_t kMaxValid =
55         std::numeric_limits<size_t>::max() - 4;
56     static constexpr const size_t kNoise = kMaxValid + 1;
57     static constexpr const size_t kError = kMaxValid + 2;
58     static constexpr const size_t kUndef = kMaxValid + 3;
59     size_t Id_;
60   };
61 
62   struct Cluster {
63     Cluster() = delete;
ClusterCluster64     explicit Cluster(const ClusterId &Id) : Id(Id) {}
65 
66     const ClusterId Id;
67     // Indices of benchmarks within the cluster.
68     std::vector<int> PointIndices;
69   };
70 
getClusterIdForPoint(size_t P)71   ClusterId getClusterIdForPoint(size_t P) const {
72     return ClusterIdForPoint_[P];
73   }
74 
getPoints()75   const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
76 
getCluster(ClusterId Id)77   const Cluster &getCluster(ClusterId Id) const {
78     assert(!Id.isUndef() && "unlabeled cluster");
79     if (Id.isNoise()) {
80       return NoiseCluster_;
81     }
82     if (Id.isError()) {
83       return ErrorCluster_;
84     }
85     return Clusters_[Id.getId()];
86   }
87 
getValidClusters()88   const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
89 
90   // Returns true if the given point is within a distance Epsilon of each other.
91   bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
92                    const std::vector<BenchmarkMeasure> &Q) const;
93 
94 private:
95   InstructionBenchmarkClustering(
96       const std::vector<InstructionBenchmark> &Points, double EpsilonSquared);
97   llvm::Error validateAndSetup();
98   void dbScan(size_t MinPts);
99   std::vector<size_t> rangeQuery(size_t Q) const;
100 
101   const std::vector<InstructionBenchmark> &Points_;
102   const double EpsilonSquared_;
103   int NumDimensions_ = 0;
104   // ClusterForPoint_[P] is the cluster id for Points[P].
105   std::vector<ClusterId> ClusterIdForPoint_;
106   std::vector<Cluster> Clusters_;
107   Cluster NoiseCluster_;
108   Cluster ErrorCluster_;
109 };
110 
111 } // namespace exegesis
112 
113 #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
114