diff options
author | pavook <pavook@yandex-team.com> | 2024-09-16 15:07:43 +0300 |
---|---|---|
committer | pavook <pavook@yandex-team.com> | 2024-09-16 15:20:44 +0300 |
commit | 62bca5194267cb7a3ebad59cf75224fcb4d82188 (patch) | |
tree | 2bd95f80d93d6a5ac243de5ac2bf02beb3179c59 | |
parent | 7c696077507829e429a5c82f061c0664d183bc3d (diff) | |
download | ydb-62bca5194267cb7a3ebad59cf75224fcb4d82188.tar.gz |
YT-22118: Refactor statistic paths to use TStatisticPath
Current statistic path comparison doesn't allow for a correct check of the prefix invariant (see YT-22118).
This PR refactors all usages of statistics to use the safe TStatisticPath class, which checks for all sorts of oddities and allows for easy and safe path concatenation.
commit_hash:69085bdbb06e0e172ae86155ede22b41853284e2
-rw-r--r-- | yt/yt/core/misc/statistic_path.cpp | 14 | ||||
-rw-r--r-- | yt/yt/core/misc/statistic_path.h | 10 | ||||
-rw-r--r-- | yt/yt/core/misc/statistics-inl.h | 108 | ||||
-rw-r--r-- | yt/yt/core/misc/statistics.cpp | 91 | ||||
-rw-r--r-- | yt/yt/core/misc/statistics.h | 38 | ||||
-rw-r--r-- | yt/yt/core/misc/unittests/statistic_path_ut.cpp | 66 | ||||
-rw-r--r-- | yt/yt/core/misc/unittests/statistics_ut.cpp | 114 | ||||
-rw-r--r-- | yt/yt/core/ytree/serialize.cpp | 9 |
8 files changed, 269 insertions, 181 deletions
diff --git a/yt/yt/core/misc/statistic_path.cpp b/yt/yt/core/misc/statistic_path.cpp index 19f478d9f9..e59dddcf05 100644 --- a/yt/yt/core/misc/statistic_path.cpp +++ b/yt/yt/core/misc/statistic_path.cpp @@ -98,6 +98,13 @@ TErrorOr<TStatisticPath> ParseStatisticPath(const TStatisticPathType& path) return TStatisticPath(path); } +TErrorOr<TStatisticPath> SlashedStatisticPath(const TStatisticPathType& path) +{ + TString copy; + std::replace_copy(path.begin(), path.end(), std::back_inserter(copy), TChar('/'), Delimiter); + return ParseStatisticPath(copy); +} + //////////////////////////////////////////////////////////////////////////////// const TStatisticPathType& TStatisticPath::Path() const noexcept @@ -209,14 +216,15 @@ std::strong_ordering operator<=>(const TStatisticPath& lhs, const TStatisticPath //////////////////////////////////////////////////////////////////////////////// -namespace NStatisticPathLiterals { - TStatisticPathLiteral operator""_L(const char* str, size_t len) { return TStatisticPathLiteral(TStatisticPathType(str, len)); } -} // namespace NStatisticPathLiterals +TStatisticPath operator""_SP(const char* str, size_t len) +{ + return SlashedStatisticPath(TStatisticPathType(str, len)).ValueOrThrow(); +} //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/core/misc/statistic_path.h b/yt/yt/core/misc/statistic_path.h index 8908e13089..4e5d3833f8 100644 --- a/yt/yt/core/misc/statistic_path.h +++ b/yt/yt/core/misc/statistic_path.h @@ -65,6 +65,9 @@ TError CheckStatisticPath(const TStatisticPathType& path); //! and returns a `TStatisticPath` built from it if it is. Returns an error with explanation otherwise. TErrorOr<TStatisticPath> ParseStatisticPath(const TStatisticPathType& path); +//! Replaces all `/` in `path` with `Delimiter` and calls ParseStatisticPath. +TErrorOr<TStatisticPath> SlashedStatisticPath(const TStatisticPathType& path); + // TODO(pavook) constexpr when constexpr std::string arrives. class TStatisticPath { @@ -221,11 +224,12 @@ private: //////////////////////////////////////////////////////////////////////////////// -namespace NStatisticPathLiterals { - +//! Statistic path literal. [[nodiscard]] TStatisticPathLiteral operator""_L(const char* str, size_t len); -} // namespace NStatisticPathLiterals +//! Literal suffix that returns a slashed statistic path from a given string, +//! i.e. replaces all `/` with Delimiters. +[[nodiscard]] TStatisticPath operator""_SP(const char* str, size_t len); //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/core/misc/statistics-inl.h b/yt/yt/core/misc/statistics-inl.h index f8f3a1af90..d90add25e8 100644 --- a/yt/yt/core/misc/statistics-inl.h +++ b/yt/yt/core/misc/statistics-inl.h @@ -4,22 +4,26 @@ #include "statistics.h" #endif +#include "statistic_path.h" + #include <yt/yt/core/ypath/tokenizer.h> #include <yt/yt/core/ytree/fluent.h> +#include <library/cpp/iterator/enumerate.h> + namespace NYT { //////////////////////////////////////////////////////////////////////////////// template <class T> -void TStatistics::AddSample(const NYPath::TYPath& path, const T& sample) +void TStatistics::AddSample(const NStatisticPath::TStatisticPath& path, const T& sample) { AddSample(path, NYTree::ConvertToNode(sample)); } template <class T> -void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const T& sample) +void TStatistics::ReplacePathWithSample(const NStatisticPath::TStatisticPath& path, const T& sample) { ReplacePathWithSample(path, NYTree::ConvertToNode(sample)); } @@ -36,20 +40,22 @@ void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const T& sam template <typename TSummaryMap> std::pair<EStatisticPathConflictType, typename TSummaryMap::iterator> IsCompatibleStatistic( TSummaryMap& existingStatistics, - const NYPath::TYPath& path) + const NStatisticPath::TStatisticPath& path) { auto it = existingStatistics.lower_bound(path); if (it != existingStatistics.end()) { if (it->first == path) { return {EStatisticPathConflictType::Exists, it}; } - if (NYPath::HasPrefix(it->first, path)) { + // TODO(pavook) std::ranges::starts_with(it->first, path) when C++23 arrives. + if (it->first.StartsWith(path)) { return {EStatisticPathConflictType::IsPrefix, it}; } } if (it != existingStatistics.begin()) { auto prev = std::prev(it); - if (NYPath::HasPrefix(path, prev->first)) { + // TODO(pavook) std::ranges::starts_with(path, prev->first) when C++23 arrives. + if (path.StartsWith(prev->first)) { return {EStatisticPathConflictType::HasPrefix, prev}; } } @@ -62,7 +68,7 @@ std::pair<EStatisticPathConflictType, typename TSummaryMap::iterator> IsCompatib template <typename TSummaryMap, typename... Ts> std::pair<typename TSummaryMap::iterator, bool> CheckedEmplaceStatistic( TSummaryMap& existingStatistics, - const NYPath::TYPath& path, + const NStatisticPath::TStatisticPath& path, Ts&&... args) { auto [conflictType, hintIt] = IsCompatibleStatistic(existingStatistics, path); @@ -103,7 +109,7 @@ void TTaggedStatistics<TTags>::AppendStatistics(const TStatistics& statistics, T } template <class TTags> -void TTaggedStatistics<TTags>::AppendTaggedSummary(const NYPath::TYPath& path, const TTaggedStatistics<TTags>::TTaggedSummaries& taggedSummaries) +void TTaggedStatistics<TTags>::AppendTaggedSummary(const NStatisticPath::TStatisticPath& path, const TTaggedStatistics<TTags>::TTaggedSummaries& taggedSummaries) { auto [taggedSummariesIt, emplaceHappened] = CheckedEmplaceStatistic(Data_, path, taggedSummaries); if (emplaceHappened) { @@ -121,7 +127,7 @@ void TTaggedStatistics<TTags>::AppendTaggedSummary(const NYPath::TYPath& path, c } template <class TTags> -const THashMap<TTags, TSummary>* TTaggedStatistics<TTags>::FindTaggedSummaries(const NYPath::TYPath& path) const +const THashMap<TTags, TSummary>* TTaggedStatistics<TTags>::FindTaggedSummaries(const NStatisticPath::TStatisticPath& path) const { auto it = Data_.find(path); if (it != Data_.end()) { @@ -149,7 +155,7 @@ void TTaggedStatistics<TTags>::Persist(const TStreamPersistenceContext& context) template <class TTags> void Serialize(const TTaggedStatistics<TTags>& statistics, NYson::IYsonConsumer* consumer) { - SerializeYsonPathsMap<THashMap<TTags, TSummary>>( + SerializeStatisticPathsMap<THashMap<TTags, TSummary>>( statistics.GetData(), consumer, [] (const THashMap<TTags, TSummary>& summaries, NYson::IYsonConsumer* consumer) { @@ -166,86 +172,50 @@ void Serialize(const TTaggedStatistics<TTags>& statistics, NYson::IYsonConsumer* //////////////////////////////////////////////////////////////////////////////// -Y_FORCE_INLINE int SkipEqualTokens(NYPath::TTokenizer& first, NYPath::TTokenizer& second) -{ - int commonDepth = 0; - while (true) { - first.Advance(); - second.Advance(); - // Note that both tokenizers can't reach EndOfStream token, because it would mean that - // currentKey is prefix of prefixKey or vice versa that is prohibited in TStatistics. - first.Expect(NYPath::ETokenType::Slash); - second.Expect(NYPath::ETokenType::Slash); - - first.Advance(); - second.Advance(); - first.Expect(NYPath::ETokenType::Literal); - second.Expect(NYPath::ETokenType::Literal); - if (first.GetLiteralValue() == second.GetLiteralValue()) { - ++commonDepth; - } else { - break; - } - } - - return commonDepth; -} - template <class TMapValue> -void SerializeYsonPathsMap( - const std::map<NYTree::TYPath, TMapValue>& map, +void SerializeStatisticPathsMap( + const std::map<NStatisticPath::TStatisticPath, TMapValue>& map, NYson::IYsonConsumer* consumer, const std::function<void(const TMapValue&, NYson::IYsonConsumer*)>& valueSerializer) { using NYT::Serialize; + // Global map. consumer->OnBeginMap(); // Depth of the previous key defined as a number of nested maps before the summary itself. - int previousDepth = 0; - NYPath::TYPath previousPath; + size_t previousDepth = 0; + NStatisticPath::TStatisticPath previousPath; for (const auto& [currentPath, value] : map) { - NYPath::TTokenizer previousTokenizer(previousPath); - NYPath::TTokenizer currentTokenizer(currentPath); + auto enumeratedCurrent = Enumerate(currentPath); + auto enumeratedPrevious = Enumerate(previousPath); - // The depth of the common part of two keys, needed to determine the number of maps to close. - int commonDepth = 0; + auto [currentIt, previousIt] = std::ranges::mismatch(enumeratedCurrent, enumeratedPrevious); - if (previousPath) { - // First we find the position in which current key is different from the - // previous one in order to close necessary number of maps. - commonDepth = SkipEqualTokens(currentTokenizer, previousTokenizer); + // No statistic path should be a path-prefix of another statistic path. + YT_VERIFY(currentIt != enumeratedCurrent.end()); - // Close all redundant maps. - while (previousDepth > commonDepth) { - consumer->OnEndMap(); - --previousDepth; - } - } else { - currentTokenizer.Advance(); - currentTokenizer.Expect(NYPath::ETokenType::Slash); - currentTokenizer.Advance(); - currentTokenizer.Expect(NYPath::ETokenType::Literal); + // The depth of the common part of two keys, needed to determine the number of maps to close. + size_t commonDepth = std::get<0>(*currentIt); + + // Close the maps that need to be closed. + while (previousDepth > commonDepth) { + consumer->OnEndMap(); + --previousDepth; } - int currentDepth = commonDepth; // Open all newly appeared maps. - while (true) { - consumer->OnKeyedItem(currentTokenizer.GetLiteralValue()); - currentTokenizer.Advance(); - if (currentTokenizer.GetType() == NYPath::ETokenType::Slash) { + size_t currentDepth = commonDepth; + auto beginIt = currentIt; + auto endIt = enumeratedCurrent.end(); + for (; currentIt != endIt; ++currentIt) { + if (currentIt != beginIt) { consumer->OnBeginMap(); ++currentDepth; - currentTokenizer.Advance(); - currentTokenizer.Expect(NYPath::ETokenType::Literal); - } else if (currentTokenizer.GetType() == NYPath::ETokenType::EndOfStream) { - break; - } else { - THROW_ERROR_EXCEPTION("Wrong token type in statistics path") - << TErrorAttribute("token_type", currentTokenizer.GetType()) - << TErrorAttribute("statistics_path", currentPath); } + consumer->OnKeyedItem(std::get<1>(*currentIt)); } + // Serialize value. valueSerializer(value, consumer); diff --git a/yt/yt/core/misc/statistics.cpp b/yt/yt/core/misc/statistics.cpp index 476433b57b..0181964243 100644 --- a/yt/yt/core/misc/statistics.cpp +++ b/yt/yt/core/misc/statistics.cpp @@ -1,5 +1,7 @@ #include "statistics.h" +#include "statistic_path.h" + #include <yt/yt/core/ypath/token.h> #include <yt/yt/core/ypath/tokenizer.h> @@ -11,7 +13,7 @@ namespace NYT { using namespace NYTree; using namespace NYson; -using namespace NYPath; +using namespace NStatisticPath; //////////////////////////////////////////////////////////////////////////////// @@ -91,32 +93,32 @@ bool TSummary::operator ==(const TSummary& other) const //////////////////////////////////////////////////////////////////////////////// -TSummary& TStatistics::GetSummary(const NYPath::TYPath& path) +TSummary& TStatistics::GetSummary(const TStatisticPath& path) { auto [it, _] = CheckedEmplaceStatistic(Data_, path, TSummary()); return it->second; } -void TStatistics::AddSample(const NYPath::TYPath& path, i64 sample) +void TStatistics::AddSample(const TStatisticPath& path, i64 sample) { GetSummary(path).AddSample(sample); } -void TStatistics::AddSample(const NYPath::TYPath& path, const INodePtr& sample) +void TStatistics::AddSample(const TStatisticPath& path, const INodePtr& sample) { ProcessNodeWithCallback(path, sample, [this] (const auto&... args) { AddSample(args...); }); } -void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const i64 sample) +void TStatistics::ReplacePathWithSample(const TStatisticPath& path, const i64 sample) { auto& summary = GetSummary(path); summary.Reset(); summary.AddSample(sample); } -void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const INodePtr& sample) +void TStatistics::ReplacePathWithSample(const TStatisticPath& path, const INodePtr& sample) { ProcessNodeWithCallback(path, sample, [this] (const auto&... args) { ReplacePathWithSample(args...); @@ -124,7 +126,7 @@ void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const INodeP } template <class TCallback> -void TStatistics::ProcessNodeWithCallback(const NYPath::TYPath& path, const NYTree::INodePtr& sample, TCallback callback) +void TStatistics::ProcessNodeWithCallback(const TStatisticPath& path, const NYTree::INodePtr& sample, TCallback callback) { switch (sample->GetType()) { case ENodeType::Int64: @@ -137,11 +139,7 @@ void TStatistics::ProcessNodeWithCallback(const NYPath::TYPath& path, const NYTr case ENodeType::Map: for (const auto& [key, child] : sample->AsMap()->GetChildren()) { - if (key.empty()) { - THROW_ERROR_EXCEPTION("Statistic cannot have an empty name") - << TErrorAttribute("path_prefix", path); - } - callback(path + "/" + ToYPathLiteral(key), child); + callback(path / TStatisticPathLiteral(TString(key)), child); } break; @@ -166,20 +164,20 @@ void TStatistics::MergeWithOverride(const TStatistics& statistics) Data_.insert(otherData.begin(), otherData.end()); } -TStatistics::TSummaryRange TStatistics::GetRangeByPrefix(const TString& prefix) const +// TODO(pavook) quite funky implementation details. Should we move this into statistic_path.h? +TStatistics::TSummaryRange TStatistics::GetRangeByPrefix(const TStatisticPath& prefix) const { - auto begin = Data().lower_bound(prefix + '/'); + // lower_bound is equivalent to upper_bound in this case, but upper_bound is semantically better. + auto begin = Data().upper_bound(prefix); // This will effectively return an iterator to the first path not starting with "`prefix`/". - auto end = Data().lower_bound(prefix + ('/' + 1)); + auto end = Data().lower_bound(ParseStatisticPath(prefix.Path() + TString(TChar(Delimiter + 1))).ValueOrThrow()); return TSummaryRange(begin, end); } -void TStatistics::RemoveRangeByPrefix(const TString& prefixPath) +void TStatistics::RemoveRangeByPrefix(const TStatisticPath& prefixPath) { - auto begin = Data().lower_bound(prefixPath + '/'); - // This will effectively return an iterator to the first path not starting with "`prefix`/". - auto end = Data().lower_bound(prefixPath + ('/' + 1)); - Data_.erase(begin, end); + auto range = GetRangeByPrefix(prefixPath); + Data_.erase(range.begin(), range.end()); } void TStatistics::Persist(const TStreamPersistenceContext& context) @@ -200,7 +198,7 @@ void Serialize(const TStatistics& statistics, IYsonConsumer* consumer) consumer->OnEndAttributes(); } - SerializeYsonPathsMap<TSummary>( + SerializeStatisticPathsMap<TSummary>( statistics.Data(), consumer, [] (const TSummary& summary, IYsonConsumer* consumer) { @@ -214,30 +212,32 @@ i64 GetSum(const TSummary& summary) return summary.GetSum(); } -i64 GetNumericValue(const TStatistics& statistics, const TString& path) +i64 GetNumericValue(const TStatistics& statistics, const TStatisticPath& path) { auto value = FindNumericValue(statistics, path); if (!value) { - THROW_ERROR_EXCEPTION("Statistics %v is not present", - path); + THROW_ERROR_EXCEPTION("Statistics is not present") + << TErrorAttribute("requested_path", path); } else { return *value; } } -std::optional<i64> FindNumericValue(const TStatistics& statistics, const TString& path) +std::optional<i64> FindNumericValue(const TStatistics& statistics, const TStatisticPath& path) { auto summary = FindSummary(statistics, path); return summary ? std::make_optional(summary->GetSum()) : std::nullopt; } -std::optional<TSummary> FindSummary(const TStatistics& statistics, const TString& path) +std::optional<TSummary> FindSummary(const TStatistics& statistics, const TStatisticPath& path) { const auto& data = statistics.Data(); auto iterator = data.lower_bound(path); - if (iterator != data.end() && iterator->first != path && HasPrefix(iterator->first, path)) { - THROW_ERROR_EXCEPTION("Invalid statistics type: cannot get summary of %v since it is a map", - path); + if (iterator != data.end() && iterator->first != path && + iterator->first.StartsWith(path)) + { + THROW_ERROR_EXCEPTION("Invalid statistics type: cannot get summary since it is a map") << + TErrorAttribute("requested_path", path); } else if (iterator == data.end() || iterator->first != path) { return std::nullopt; } else { @@ -245,9 +245,9 @@ std::optional<TSummary> FindSummary(const TStatistics& statistics, const TString } } - //////////////////////////////////////////////////////////////////////////////// +// TODO(pavook) anonymous namespace? class TStatisticsBuildingConsumer : public TYsonConsumerBase , public IBuildingYsonConsumer<TStatistics> @@ -328,15 +328,17 @@ public: // If we are here, we are either: // * at the root (then do nothing) // * at some directory (then the last key was the directory name) - if (!LastKey_.empty()) { - DirectoryNameLengths_.push_back(LastKey_.size()); - CurrentPath_.append('/'); - CurrentPath_.append(LastKey_); - LastKey_.clear(); + if (!FirstMapOpen_) { + FirstMapOpen_ = true; } else { - if (!CurrentPath_.empty()) { - THROW_ERROR_EXCEPTION("Empty keys are not allowed for statistics"); + // TODO(pavook) is it okay to throw here? + auto literal = TStatisticPathLiteral(LastKey_); + if (CurrentPath_.Empty()) { + CurrentPath_ = std::move(literal); + } else { + CurrentPath_.Append(literal); } + LastKey_.clear(); } } @@ -347,7 +349,7 @@ public: THROW_ERROR_EXCEPTION("Attributes other than \"timestamp\" are not allowed"); } } else { - LastKey_ = ToYPathLiteral(key); + LastKey_ = key; } } @@ -368,16 +370,17 @@ public: AtSummaryMap_ = false; } - if (!CurrentPath_.empty()) { + if (!CurrentPath_.Empty()) { // We need to go to the parent. - CurrentPath_.resize(CurrentPath_.size() - DirectoryNameLengths_.back() - 1); - DirectoryNameLengths_.pop_back(); + CurrentPath_.PopBack(); + } else { + FirstMapOpen_ = false; } } void OnBeginAttributes() override { - if (!CurrentPath_.empty()) { + if (!CurrentPath_.Empty()) { THROW_ERROR_EXCEPTION("Attributes are not allowed for statistics"); } AtAttributes_ = true; @@ -396,8 +399,7 @@ public: private: TStatistics Statistics_; - TString CurrentPath_; - std::vector<int> DirectoryNameLengths_; + TStatisticPath CurrentPath_; TSummary CurrentSummary_; i64 FilledSummaryFields_ = 0; @@ -405,6 +407,7 @@ private: TString LastKey_; + bool FirstMapOpen_ = false; bool AtSummaryMap_ = false; bool AtAttributes_ = false; }; diff --git a/yt/yt/core/misc/statistics.h b/yt/yt/core/misc/statistics.h index 53b1a04afc..606992cfc4 100644 --- a/yt/yt/core/misc/statistics.h +++ b/yt/yt/core/misc/statistics.h @@ -52,25 +52,25 @@ void Serialize(const TSummary& summary, NYson::IYsonConsumer* consumer); class TStatistics { public: - using TSummaryMap = std::map<NYPath::TYPath, TSummary>; + using TSummaryMap = std::map<NStatisticPath::TStatisticPath, TSummary>; using TSummaryRange = TIteratorRange<TSummaryMap::const_iterator>; DEFINE_BYREF_RO_PROPERTY(TSummaryMap, Data); DEFINE_BYVAL_RW_PROPERTY(std::optional<TInstant>, Timestamp); public: - void AddSample(const NYPath::TYPath& path, i64 sample); + void AddSample(const NStatisticPath::TStatisticPath& path, i64 sample); - void AddSample(const NYPath::TYPath& path, const NYTree::INodePtr& sample); + void AddSample(const NStatisticPath::TStatisticPath& path, const NYTree::INodePtr& sample); template <class T> - void AddSample(const NYPath::TYPath& path, const T& sample); + void AddSample(const NStatisticPath::TStatisticPath& path, const T& sample); - void ReplacePathWithSample(const NYPath::TYPath& path, i64 sample); + void ReplacePathWithSample(const NStatisticPath::TStatisticPath& path, i64 sample); - void ReplacePathWithSample(const NYPath::TYPath& path, const NYTree::INodePtr& sample); + void ReplacePathWithSample(const NStatisticPath::TStatisticPath& path, const NYTree::INodePtr& sample); template <class T> - void ReplacePathWithSample(const NYPath::TYPath& path, const T& sample); + void ReplacePathWithSample(const NStatisticPath::TStatisticPath& path, const T& sample); //! Merge statistics by merging summaries for each common statistics path. void Merge(const TStatistics& statistics); @@ -82,27 +82,27 @@ public: * Pre-requisites: `prefixPath` must not have terminating slash. * Examples: /a/b is a prefix path for /a/b/hij but not for /a/bcd/efg nor /a/b itself. */ - TSummaryRange GetRangeByPrefix(const TString& prefixPath) const; + TSummaryRange GetRangeByPrefix(const NStatisticPath::TStatisticPath& prefixPath) const; //! Remove all the elements starting from prefixPath. //! The requirements for prefixPath are the same as in GetRangeByPrefix. - void RemoveRangeByPrefix(const TString& prefixPath); + void RemoveRangeByPrefix(const NStatisticPath::TStatisticPath& prefixPath); void Persist(const TStreamPersistenceContext& context); private: template <class TCallback> - void ProcessNodeWithCallback(const NYPath::TYPath& path, const NYTree::INodePtr& sample, TCallback callback); + void ProcessNodeWithCallback(const NStatisticPath::TStatisticPath& path, const NYTree::INodePtr& sample, TCallback callback); - TSummary& GetSummary(const NYPath::TYPath& path); + TSummary& GetSummary(const NStatisticPath::TStatisticPath& path); friend class TStatisticsBuildingConsumer; }; -i64 GetNumericValue(const TStatistics& statistics, const TString& path); +i64 GetNumericValue(const TStatistics& statistics, const NStatisticPath::TStatisticPath& path); -std::optional<i64> FindNumericValue(const TStatistics& statistics, const TString& path); -std::optional<TSummary> FindSummary(const TStatistics& statistics, const TString& path); +std::optional<i64> FindNumericValue(const TStatistics& statistics, const NStatisticPath::TStatisticPath& path); +std::optional<TSummary> FindSummary(const TStatistics& statistics, const NStatisticPath::TStatisticPath& path); //////////////////////////////////////////////////////////////////////////////// @@ -142,12 +142,12 @@ class TTaggedStatistics { public: using TTaggedSummaries = THashMap<TTags, TSummary>; - using TSummaryMap = std::map<NYPath::TYPath, TTaggedSummaries>; + using TSummaryMap = std::map<NStatisticPath::TStatisticPath, TTaggedSummaries>; void AppendStatistics(const TStatistics& statistics, TTags tags); - void AppendTaggedSummary(const NYPath::TYPath& path, const TTaggedSummaries& taggedSummaries); + void AppendTaggedSummary(const NStatisticPath::TStatisticPath& path, const TTaggedSummaries& taggedSummaries); - const TTaggedSummaries* FindTaggedSummaries(const NYPath::TYPath& path) const; + const TTaggedSummaries* FindTaggedSummaries(const NStatisticPath::TStatisticPath& path) const; const TSummaryMap& GetData() const; void Persist(const TStreamPersistenceContext& context); @@ -164,8 +164,8 @@ void Serialize(const TTaggedStatistics<TTags>& statistics, NYson::IYsonConsumer* //////////////////////////////////////////////////////////////////////////////// template <class TValue> -void SerializeYsonPathsMap( - const std::map<NYTree::TYPath, TValue>& map, +void SerializeStatisticPathsMap( + const std::map<NStatisticPath::TStatisticPath, TValue>& map, NYson::IYsonConsumer* consumer, const std::function<void(const TValue&, NYson::IYsonConsumer*)>& valueSerializer); diff --git a/yt/yt/core/misc/unittests/statistic_path_ut.cpp b/yt/yt/core/misc/unittests/statistic_path_ut.cpp index e92d73c452..ad6ec5720c 100644 --- a/yt/yt/core/misc/unittests/statistic_path_ut.cpp +++ b/yt/yt/core/misc/unittests/statistic_path_ut.cpp @@ -1,13 +1,17 @@ #include <yt/yt/core/test_framework/framework.h> -#include <yt/yt/core/misc/error.h> #include <yt/yt/core/misc/statistic_path.h> +#include <yt/yt/core/misc/error.h> + +#include <yt/yt/core/ytree/convert.h> + namespace NYT { namespace { using namespace NStatisticPath; -using namespace NStatisticPathLiterals; +using namespace NYson; +using namespace NYTree; //////////////////////////////////////////////////////////////////////////////// @@ -23,6 +27,8 @@ TEST(TStatisticPathTest, Literal) EXPECT_THROW(TStatisticPathLiteral("\0"), TErrorException); EXPECT_THROW(TStatisticPathLiteral(TString(Delimiter)), TErrorException); EXPECT_THROW(TStatisticPathLiteral(""), TErrorException); + + EXPECT_EQ("A"_L / "BB"_L / "CCC"_L, "/A/BB/CCC"_SP); } //////////////////////////////////////////////////////////////////////////////// @@ -212,5 +218,61 @@ TEST(TStatisticPathTest, Swap) //////////////////////////////////////////////////////////////////////////////// +TEST(TStatisticPath, Serialize) +{ + TStatisticPath path = "A"_L / "B/C"_L; + EXPECT_EQ( + ConvertTo<TString>(ConvertToYsonString(path)), + path.Path()); +} + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TStatisticPath, Deserialize) +{ + TStatisticPath simplePath = "/A/B/C"_SP; + + TYsonString pathStr = ConvertToYsonString(simplePath.Path()); + EXPECT_EQ(ConvertTo<TStatisticPath>(pathStr), simplePath); + + TYsonString slashedPathStr = ConvertToYsonString("/A/B/C"); + EXPECT_EQ(ConvertTo<TStatisticPath>(slashedPathStr), simplePath); + + TYsonString invalidPathStr = ConvertToYsonString("abc"); + EXPECT_THROW_WITH_SUBSTRING( + ConvertTo<TStatisticPath>(invalidPathStr), + "must start with a delimiter"); + + TYsonString invalidSlashedPathStr = ConvertToYsonString("/abc/"); + EXPECT_THROW_WITH_SUBSTRING( + ConvertTo<TStatisticPath>(invalidSlashedPathStr), + "must not end with a delimiter"); +} + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TStatisticPath, SaveLoad) +{ + TStringStream stream; + TStreamSaveContext saveContext(&stream); + TStreamLoadContext loadContext(&stream); + + TStatisticPath simplePath1 = "A"_L / "B/C"_L / "X\02YZ"_L; + TStatisticPath simplePath2 = "A"_L / "BB"_L; + Save(saveContext, simplePath1); + Save(saveContext, simplePath2); + saveContext.Finish(); + + TStatisticPath loadedPath1; + TStatisticPath loadedPath2; + Load(loadContext, loadedPath1); + Load(loadContext, loadedPath2); + + EXPECT_EQ(loadedPath1, simplePath1); + EXPECT_EQ(loadedPath2, simplePath2); +} + +//////////////////////////////////////////////////////////////////////////////// + } // namespace } // namespace NYT diff --git a/yt/yt/core/misc/unittests/statistics_ut.cpp b/yt/yt/core/misc/unittests/statistics_ut.cpp index cab0babb45..12955b2017 100644 --- a/yt/yt/core/misc/unittests/statistics_ut.cpp +++ b/yt/yt/core/misc/unittests/statistics_ut.cpp @@ -12,6 +12,7 @@ namespace NYT { using namespace NYTree; using namespace NYson; +using namespace NStatisticPath; template <> struct TYsonFormatTraits<TSummary> @@ -60,7 +61,7 @@ TEST(TStatistics, Summary) //////////////////////////////////////////////////////////////////////////////// -TStatistics CreateStatistics(std::initializer_list<std::pair<NYPath::TYPath, i64>> data) +TStatistics CreateStatistics(std::initializer_list<std::pair<TStatisticPath, i64>> data) { TStatistics result; for (const auto& item : data) { @@ -75,38 +76,40 @@ TEST(TStatistics, AddSample) TStatistics statistics; statistics.AddSample( - "/key/subkey", + "key"_L / "subkey"_L, origin); - EXPECT_EQ(5, GetNumericValue(statistics, "/key/subkey/x")); - EXPECT_EQ(7, GetNumericValue(statistics, "/key/subkey/y")); + EXPECT_EQ(5, GetNumericValue(statistics, "key"_L / "subkey"_L / "x"_L)); + EXPECT_EQ(7, GetNumericValue(statistics, "key"_L / "subkey"_L / "y"_L)); - statistics.AddSample("/key/sub", 42); - EXPECT_EQ(42, GetNumericValue(statistics, "/key/sub")); + statistics.AddSample("key"_L / "sub"_L, 42); + EXPECT_EQ(42, GetNumericValue(statistics, "key"_L / "sub"_L)); // Cannot add sample to the map node. - EXPECT_THROW(statistics.AddSample("/key/subkey", 24), std::exception); + EXPECT_THROW(statistics.AddSample("key"_L / "subkey"_L, 24), std::exception); statistics.Merge(CreateStatistics({ - {"/key/subkey/x", 5}, - {"/key/subkey/z", 9}})); + {"key"_L / "subkey"_L / "x"_L, 5}, + {"key"_L / "subkey"_L / "z"_L, 9}})); - EXPECT_EQ(10, GetNumericValue(statistics, "/key/subkey/x")); - EXPECT_EQ(7, GetNumericValue(statistics, "/key/subkey/y")); - EXPECT_EQ(9, GetNumericValue(statistics, "/key/subkey/z")); + EXPECT_EQ(10, GetNumericValue(statistics, "key"_L / "subkey"_L / "x"_L)); + EXPECT_EQ(7, GetNumericValue(statistics, "key"_L / "subkey"_L / "y"_L)); + EXPECT_EQ(9, GetNumericValue(statistics, "key"_L / "subkey"_L / "z"_L)); EXPECT_THROW( - statistics.Merge(CreateStatistics({{"/key", 5}})), + statistics.Merge(CreateStatistics({{"key"_L, 5}})), std::exception); - statistics.AddSample("/key/subkey/x", 10); - EXPECT_EQ(20, GetNumericValue(statistics, "/key/subkey/x")); + statistics.AddSample("key"_L / "subkey"_L / "x"_L, 10); + EXPECT_EQ(20, GetNumericValue(statistics, "key"_L / "subkey"_L / "x"_L)); auto ysonStatistics = ConvertToYsonString(statistics); auto deserializedStatistics = ConvertTo<TStatistics>(ysonStatistics); - EXPECT_EQ(20, GetNumericValue(deserializedStatistics, "/key/subkey/x")); - EXPECT_EQ(42, GetNumericValue(deserializedStatistics, "/key/sub")); + EXPECT_EQ(statistics.Data(), deserializedStatistics.Data()); + + EXPECT_EQ(20, GetNumericValue(deserializedStatistics, "key"_L / "subkey"_L / "x"_L)); + EXPECT_EQ(42, GetNumericValue(deserializedStatistics, "key"_L / "sub"_L)); } //////////////////////////////////////////////////////////////////////////////// @@ -116,7 +119,7 @@ class TStatisticsUpdater public: void AddSample(const INodePtr& node) { - Statistics_.AddSample("/custom", node); + Statistics_.AddSample("custom"_L, node); } const TStatistics& GetStatistics() @@ -151,9 +154,9 @@ TEST(TStatistics, Consumer) .EndMap(); const auto& statistics = statisticsUpdater.GetStatistics(); - EXPECT_EQ(4, GetNumericValue(statistics, "/custom/k1")); - EXPECT_EQ(-7, GetNumericValue(statistics, "/custom/k2")); - EXPECT_EQ(42, GetNumericValue(statistics, "/custom/key/subkey")); + EXPECT_EQ(4, GetNumericValue(statistics, "custom"_L / "k1"_L)); + EXPECT_EQ(-7, GetNumericValue(statistics, "custom"_L / "k2"_L)); + EXPECT_EQ(42, GetNumericValue(statistics, "custom"_L / "key"_L / "subkey"_L)); } //////////////////////////////////////////////////////////////////////////////// @@ -181,10 +184,10 @@ TEST(TStatistics, BuildingConsumer) auto statistics = ConvertTo<TStatistics>(statisticsYson); auto data = statistics.Data(); - std::map<TString, TSummary> expectedData { - { "/abc/def", TSummary(42, 3, 5, 21, 10) }, - { "/abc/degh", TSummary(27, 1, 27, 27, 27) }, - { "/xyz", TSummary(50, 5, 8, 12, std::nullopt) }, + std::map<TStatisticPath, TSummary> expectedData { + { "abc"_L / "def"_L, TSummary(42, 3, 5, 21, 10) }, + { "abc"_L / "degh"_L, TSummary(27, 1, 27, 27, 27) }, + { "xyz"_L, TSummary(50, 5, 8, 12, std::nullopt) }, }; EXPECT_EQ(expectedData, data); @@ -192,41 +195,72 @@ TEST(TStatistics, BuildingConsumer) //////////////////////////////////////////////////////////////////////////////// +TEST(TStatistics, GetRangeByPrefix) { + TStatistics statistics = CreateStatistics({ + {"a"_L, 1}, + {"b"_L / "a"_L, 1}, + {"b"_L / "aa"_L / "a"_L, 1}, + {"b"_L / TStatisticPathLiteral(TString(std::numeric_limits<char>::max())), 1}, + {"c"_L, 1}, + }); + + { + // We should not return the node that exactly matches the prefix. + EXPECT_TRUE(statistics.GetRangeByPrefix("a"_L).empty()); + } + + { + auto actualRange = statistics.GetRangeByPrefix("b"_L); + TStatistics expectedRange = CreateStatistics({ + {"b"_L / "a"_L, 1}, + {"b"_L / "aa"_L / "a"_L, 1}, + {"b"_L / TStatisticPathLiteral(TString(std::numeric_limits<char>::max())), 1}, + }); + + EXPECT_EQ(std::map(actualRange.begin(), actualRange.end()), expectedRange.Data()); + } +} + +//////////////////////////////////////////////////////////////////////////////// + TEST(TTaggedStatistics, AppendStatistics) { TTaggedStatistics<int> taggedStatistics; { TStatistics statistics; - statistics.AddSample("/abc/def", 1); - statistics.AddSample("/abc/defg", 2); - statistics.AddSample("/xyz", 3); + statistics.AddSample("abc"_L / "def"_L, 1); + statistics.AddSample("abc"_L / "defg"_L, 2); + statistics.AddSample("xyz"_L, 3); taggedStatistics.AppendStatistics(statistics, 1); } { TStatistics statistics; - statistics.AddSample("/abc/def", 1); - statistics.AddSample("/ijk", 2); + statistics.AddSample("abc"_L / "def"_L, 1); + statistics.AddSample("ijk"_L, 2); taggedStatistics.AppendStatistics(statistics, 2); } { TStatistics statistics; - statistics.AddSample("/abc/def", 2); + statistics.AddSample("abc"_L / "def"_L, 2); taggedStatistics.AppendStatistics(statistics, 1); } { auto actualData = taggedStatistics.GetData(); - std::map<TString, THashMap<int, TSummary>> expectedData { + std::map<TStatisticPath, THashMap<int, TSummary>> expectedData { // std::nullopt because Last is always dropped during merge, see TSummary::Merge. - {"/abc/def", { - {1, TSummary(3, 2, 1, 2, std::nullopt)}, - {2, TSummary(1, 1, 1, 1, 1)}}}, - {"/abc/defg", {{1, TSummary(2, 1, 2, 2, 2)}}}, - {"/xyz", {{1, TSummary(3, 1, 3, 3, 3)}}}, - {"/ijk", {{2, TSummary(2, 1, 2, 2, 2)}}} + {"abc"_L / "def"_L, { + {1, TSummary(3, 2, 1, 2, std::nullopt)}, + {2, TSummary(1, 1, 1, 1, 1)}}}, + {"abc"_L / "defg"_L, { + {1, TSummary(2, 1, 2, 2, 2)}}}, + {"xyz"_L, { + {1, TSummary(3, 1, 3, 3, 3)}}}, + {"ijk"_L, { + {2, TSummary(2, 1, 2, 2, 2)}}} }; EXPECT_EQ(expectedData, actualData); @@ -234,13 +268,13 @@ TEST(TTaggedStatistics, AppendStatistics) { TStatistics statistics; - statistics.AddSample("/xyz/suffix", 1); + statistics.AddSample("xyz"_L / "suffix"_L, 1); EXPECT_THROW(taggedStatistics.AppendStatistics(statistics, 3), std::exception); } { TStatistics statistics; - statistics.AddSample("/abc", 1); // prefix + statistics.AddSample("abc"_L, 1); // prefix EXPECT_THROW(taggedStatistics.AppendStatistics(statistics, 3), std::exception); } } diff --git a/yt/yt/core/ytree/serialize.cpp b/yt/yt/core/ytree/serialize.cpp index 13a118efd8..b0ebda3871 100644 --- a/yt/yt/core/ytree/serialize.cpp +++ b/yt/yt/core/ytree/serialize.cpp @@ -363,7 +363,14 @@ void Deserialize(TGuid& value, INodePtr node) // TStatisticPath. void Deserialize(NStatisticPath::TStatisticPath& value, INodePtr node) { - value = NStatisticPath::ParseStatisticPath(node->AsString()->GetValue()).ValueOrThrow(); + const TString& path = node->AsString()->GetValue(); + + // Try to parse slashed paths. + if (!path.empty() && path.StartsWith('/')) { + value = NStatisticPath::SlashedStatisticPath(path).ValueOrThrow(); + } else { + value = NStatisticPath::ParseStatisticPath(path).ValueOrThrow(); + } } // Subtypes of google::protobuf::Message |