aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@yandex-team.com>2023-06-29 10:00:50 +0300
committervitalyisaev <vitalyisaev@yandex-team.com>2023-06-29 10:00:50 +0300
commit6ffe9e53658409f212834330e13564e4952558f6 (patch)
tree85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h
parent726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff)
downloadydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h')
-rw-r--r--contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h171
1 files changed, 171 insertions, 0 deletions
diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h
new file mode 100644
index 0000000000..a4da3af774
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h
@@ -0,0 +1,171 @@
+//===-- Clustering.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Utilities to compute benchmark result clusters.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
+#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
+
+#include "BenchmarkResult.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/Error.h"
+#include <limits>
+#include <vector>
+
+namespace llvm {
+namespace exegesis {
+
+class InstructionBenchmarkClustering {
+public:
+ enum ModeE { Dbscan, Naive };
+
+ // Clusters `Points` using DBSCAN with the given parameters. See the cc file
+ // for more explanations on the algorithm.
+ static Expected<InstructionBenchmarkClustering>
+ create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
+ size_t DbscanMinPts, double AnalysisClusteringEpsilon,
+ const MCSubtargetInfo *SubtargetInfo = nullptr,
+ const MCInstrInfo *InstrInfo = nullptr);
+
+ class ClusterId {
+ public:
+ static ClusterId noise() { return ClusterId(kNoise); }
+ static ClusterId error() { return ClusterId(kError); }
+ static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
+ return ClusterId(Id, IsUnstable);
+ }
+ static ClusterId makeValidUnstable(size_t Id) {
+ return makeValid(Id, /*IsUnstable=*/true);
+ }
+
+ ClusterId() : Id_(kUndef), IsUnstable_(false) {}
+
+ // Compare id's, ignoring the 'unstability' bit.
+ bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
+ bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
+
+ bool isValid() const { return Id_ <= kMaxValid; }
+ bool isUnstable() const { return IsUnstable_; }
+ bool isNoise() const { return Id_ == kNoise; }
+ bool isError() const { return Id_ == kError; }
+ bool isUndef() const { return Id_ == kUndef; }
+
+ // Precondition: isValid().
+ size_t getId() const {
+ assert(isValid());
+ return Id_;
+ }
+
+ private:
+ ClusterId(size_t Id, bool IsUnstable = false)
+ : Id_(Id), IsUnstable_(IsUnstable) {}
+
+ static constexpr const size_t kMaxValid =
+ (std::numeric_limits<size_t>::max() >> 1) - 4;
+ static constexpr const size_t kNoise = kMaxValid + 1;
+ static constexpr const size_t kError = kMaxValid + 2;
+ static constexpr const size_t kUndef = kMaxValid + 3;
+
+ size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
+ size_t IsUnstable_ : 1;
+ };
+ static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");
+
+ struct Cluster {
+ Cluster() = delete;
+ explicit Cluster(const ClusterId &Id) : Id(Id) {}
+
+ const ClusterId Id;
+ // Indices of benchmarks within the cluster.
+ std::vector<int> PointIndices;
+ };
+
+ ClusterId getClusterIdForPoint(size_t P) const {
+ return ClusterIdForPoint_[P];
+ }
+
+ const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
+
+ const Cluster &getCluster(ClusterId Id) const {
+ assert(!Id.isUndef() && "unlabeled cluster");
+ if (Id.isNoise()) {
+ return NoiseCluster_;
+ }
+ if (Id.isError()) {
+ return ErrorCluster_;
+ }
+ return Clusters_[Id.getId()];
+ }
+
+ const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
+
+ // Returns true if the given point is within a distance Epsilon of each other.
+ bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
+ const std::vector<BenchmarkMeasure> &Q,
+ const double EpsilonSquared_) const {
+ double DistanceSquared = 0.0;
+ for (size_t I = 0, E = P.size(); I < E; ++I) {
+ const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
+ DistanceSquared += Diff * Diff;
+ }
+ return DistanceSquared <= EpsilonSquared_;
+ }
+
+private:
+ InstructionBenchmarkClustering(
+ const std::vector<InstructionBenchmark> &Points,
+ double AnalysisClusteringEpsilonSquared);
+
+ Error validateAndSetup();
+
+ void clusterizeDbScan(size_t MinPts);
+ void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo,
+ const MCInstrInfo &InstrInfo);
+
+ // Stabilization is only needed if dbscan was used to clusterize.
+ void stabilize(unsigned NumOpcodes);
+
+ void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
+
+ bool areAllNeighbours(ArrayRef<size_t> Pts) const;
+
+ const std::vector<InstructionBenchmark> &Points_;
+ const double AnalysisClusteringEpsilonSquared_;
+
+ int NumDimensions_ = 0;
+ // ClusterForPoint_[P] is the cluster id for Points[P].
+ std::vector<ClusterId> ClusterIdForPoint_;
+ std::vector<Cluster> Clusters_;
+ Cluster NoiseCluster_;
+ Cluster ErrorCluster_;
+};
+
+class SchedClassClusterCentroid {
+public:
+ const std::vector<PerInstructionStats> &getStats() const {
+ return Representative;
+ }
+
+ std::vector<BenchmarkMeasure> getAsPoint() const;
+
+ void addPoint(ArrayRef<BenchmarkMeasure> Point);
+
+ bool validate(InstructionBenchmark::ModeE Mode) const;
+
+private:
+ // Measurement stats for the points in the SchedClassCluster.
+ std::vector<PerInstructionStats> Representative;
+};
+
+} // namespace exegesis
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H