diff options
author | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
commit | 6ffe9e53658409f212834330e13564e4952558f6 (patch) | |
tree | 85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h | |
parent | 726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff) | |
download | ydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz |
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h')
-rw-r--r-- | contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h new file mode 100644 index 0000000000..a4da3af774 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h @@ -0,0 +1,171 @@ +//===-- Clustering.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Utilities to compute benchmark result clusters. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H +#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H + +#include "BenchmarkResult.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Support/Error.h" +#include <limits> +#include <vector> + +namespace llvm { +namespace exegesis { + +class InstructionBenchmarkClustering { +public: + enum ModeE { Dbscan, Naive }; + + // Clusters `Points` using DBSCAN with the given parameters. See the cc file + // for more explanations on the algorithm. + static Expected<InstructionBenchmarkClustering> + create(const std::vector<InstructionBenchmark> &Points, ModeE Mode, + size_t DbscanMinPts, double AnalysisClusteringEpsilon, + const MCSubtargetInfo *SubtargetInfo = nullptr, + const MCInstrInfo *InstrInfo = nullptr); + + class ClusterId { + public: + static ClusterId noise() { return ClusterId(kNoise); } + static ClusterId error() { return ClusterId(kError); } + static ClusterId makeValid(size_t Id, bool IsUnstable = false) { + return ClusterId(Id, IsUnstable); + } + static ClusterId makeValidUnstable(size_t Id) { + return makeValid(Id, /*IsUnstable=*/true); + } + + ClusterId() : Id_(kUndef), IsUnstable_(false) {} + + // Compare id's, ignoring the 'unstability' bit. + bool operator==(const ClusterId &O) const { return Id_ == O.Id_; } + bool operator<(const ClusterId &O) const { return Id_ < O.Id_; } + + bool isValid() const { return Id_ <= kMaxValid; } + bool isUnstable() const { return IsUnstable_; } + bool isNoise() const { return Id_ == kNoise; } + bool isError() const { return Id_ == kError; } + bool isUndef() const { return Id_ == kUndef; } + + // Precondition: isValid(). + size_t getId() const { + assert(isValid()); + return Id_; + } + + private: + ClusterId(size_t Id, bool IsUnstable = false) + : Id_(Id), IsUnstable_(IsUnstable) {} + + static constexpr const size_t kMaxValid = + (std::numeric_limits<size_t>::max() >> 1) - 4; + static constexpr const size_t kNoise = kMaxValid + 1; + static constexpr const size_t kError = kMaxValid + 2; + static constexpr const size_t kUndef = kMaxValid + 3; + + size_t Id_ : (std::numeric_limits<size_t>::digits - 1); + size_t IsUnstable_ : 1; + }; + static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field."); + + struct Cluster { + Cluster() = delete; + explicit Cluster(const ClusterId &Id) : Id(Id) {} + + const ClusterId Id; + // Indices of benchmarks within the cluster. + std::vector<int> PointIndices; + }; + + ClusterId getClusterIdForPoint(size_t P) const { + return ClusterIdForPoint_[P]; + } + + const std::vector<InstructionBenchmark> &getPoints() const { return Points_; } + + const Cluster &getCluster(ClusterId Id) const { + assert(!Id.isUndef() && "unlabeled cluster"); + if (Id.isNoise()) { + return NoiseCluster_; + } + if (Id.isError()) { + return ErrorCluster_; + } + return Clusters_[Id.getId()]; + } + + const std::vector<Cluster> &getValidClusters() const { return Clusters_; } + + // Returns true if the given point is within a distance Epsilon of each other. + bool isNeighbour(const std::vector<BenchmarkMeasure> &P, + const std::vector<BenchmarkMeasure> &Q, + const double EpsilonSquared_) const { + double DistanceSquared = 0.0; + for (size_t I = 0, E = P.size(); I < E; ++I) { + const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; + DistanceSquared += Diff * Diff; + } + return DistanceSquared <= EpsilonSquared_; + } + +private: + InstructionBenchmarkClustering( + const std::vector<InstructionBenchmark> &Points, + double AnalysisClusteringEpsilonSquared); + + Error validateAndSetup(); + + void clusterizeDbScan(size_t MinPts); + void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo, + const MCInstrInfo &InstrInfo); + + // Stabilization is only needed if dbscan was used to clusterize. + void stabilize(unsigned NumOpcodes); + + void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const; + + bool areAllNeighbours(ArrayRef<size_t> Pts) const; + + const std::vector<InstructionBenchmark> &Points_; + const double AnalysisClusteringEpsilonSquared_; + + int NumDimensions_ = 0; + // ClusterForPoint_[P] is the cluster id for Points[P]. + std::vector<ClusterId> ClusterIdForPoint_; + std::vector<Cluster> Clusters_; + Cluster NoiseCluster_; + Cluster ErrorCluster_; +}; + +class SchedClassClusterCentroid { +public: + const std::vector<PerInstructionStats> &getStats() const { + return Representative; + } + + std::vector<BenchmarkMeasure> getAsPoint() const; + + void addPoint(ArrayRef<BenchmarkMeasure> Point); + + bool validate(InstructionBenchmark::ModeE Mode) const; + +private: + // Measurement stats for the points in the SchedClassCluster. + std::vector<PerInstructionStats> Representative; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H |