aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpavook <pavook@yandex-team.com>2024-09-16 15:07:43 +0300
committerpavook <pavook@yandex-team.com>2024-09-16 15:20:44 +0300
commit62bca5194267cb7a3ebad59cf75224fcb4d82188 (patch)
tree2bd95f80d93d6a5ac243de5ac2bf02beb3179c59
parent7c696077507829e429a5c82f061c0664d183bc3d (diff)
downloadydb-62bca5194267cb7a3ebad59cf75224fcb4d82188.tar.gz
YT-22118: Refactor statistic paths to use TStatisticPath
Current statistic path comparison doesn't allow for a correct check of the prefix invariant (see YT-22118). This PR refactors all usages of statistics to use the safe TStatisticPath class, which checks for all sorts of oddities and allows for easy and safe path concatenation. commit_hash:69085bdbb06e0e172ae86155ede22b41853284e2
-rw-r--r--yt/yt/core/misc/statistic_path.cpp14
-rw-r--r--yt/yt/core/misc/statistic_path.h10
-rw-r--r--yt/yt/core/misc/statistics-inl.h108
-rw-r--r--yt/yt/core/misc/statistics.cpp91
-rw-r--r--yt/yt/core/misc/statistics.h38
-rw-r--r--yt/yt/core/misc/unittests/statistic_path_ut.cpp66
-rw-r--r--yt/yt/core/misc/unittests/statistics_ut.cpp114
-rw-r--r--yt/yt/core/ytree/serialize.cpp9
8 files changed, 269 insertions, 181 deletions
diff --git a/yt/yt/core/misc/statistic_path.cpp b/yt/yt/core/misc/statistic_path.cpp
index 19f478d9f9..e59dddcf05 100644
--- a/yt/yt/core/misc/statistic_path.cpp
+++ b/yt/yt/core/misc/statistic_path.cpp
@@ -98,6 +98,13 @@ TErrorOr<TStatisticPath> ParseStatisticPath(const TStatisticPathType& path)
return TStatisticPath(path);
}
+TErrorOr<TStatisticPath> SlashedStatisticPath(const TStatisticPathType& path)
+{
+ TString copy;
+ std::replace_copy(path.begin(), path.end(), std::back_inserter(copy), TChar('/'), Delimiter);
+ return ParseStatisticPath(copy);
+}
+
////////////////////////////////////////////////////////////////////////////////
const TStatisticPathType& TStatisticPath::Path() const noexcept
@@ -209,14 +216,15 @@ std::strong_ordering operator<=>(const TStatisticPath& lhs, const TStatisticPath
////////////////////////////////////////////////////////////////////////////////
-namespace NStatisticPathLiterals {
-
TStatisticPathLiteral operator""_L(const char* str, size_t len)
{
return TStatisticPathLiteral(TStatisticPathType(str, len));
}
-} // namespace NStatisticPathLiterals
+TStatisticPath operator""_SP(const char* str, size_t len)
+{
+ return SlashedStatisticPath(TStatisticPathType(str, len)).ValueOrThrow();
+}
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/core/misc/statistic_path.h b/yt/yt/core/misc/statistic_path.h
index 8908e13089..4e5d3833f8 100644
--- a/yt/yt/core/misc/statistic_path.h
+++ b/yt/yt/core/misc/statistic_path.h
@@ -65,6 +65,9 @@ TError CheckStatisticPath(const TStatisticPathType& path);
//! and returns a `TStatisticPath` built from it if it is. Returns an error with explanation otherwise.
TErrorOr<TStatisticPath> ParseStatisticPath(const TStatisticPathType& path);
+//! Replaces all `/` in `path` with `Delimiter` and calls ParseStatisticPath.
+TErrorOr<TStatisticPath> SlashedStatisticPath(const TStatisticPathType& path);
+
// TODO(pavook) constexpr when constexpr std::string arrives.
class TStatisticPath
{
@@ -221,11 +224,12 @@ private:
////////////////////////////////////////////////////////////////////////////////
-namespace NStatisticPathLiterals {
-
+//! Statistic path literal.
[[nodiscard]] TStatisticPathLiteral operator""_L(const char* str, size_t len);
-} // namespace NStatisticPathLiterals
+//! Literal suffix that returns a slashed statistic path from a given string,
+//! i.e. replaces all `/` with Delimiters.
+[[nodiscard]] TStatisticPath operator""_SP(const char* str, size_t len);
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/core/misc/statistics-inl.h b/yt/yt/core/misc/statistics-inl.h
index f8f3a1af90..d90add25e8 100644
--- a/yt/yt/core/misc/statistics-inl.h
+++ b/yt/yt/core/misc/statistics-inl.h
@@ -4,22 +4,26 @@
#include "statistics.h"
#endif
+#include "statistic_path.h"
+
#include <yt/yt/core/ypath/tokenizer.h>
#include <yt/yt/core/ytree/fluent.h>
+#include <library/cpp/iterator/enumerate.h>
+
namespace NYT {
////////////////////////////////////////////////////////////////////////////////
template <class T>
-void TStatistics::AddSample(const NYPath::TYPath& path, const T& sample)
+void TStatistics::AddSample(const NStatisticPath::TStatisticPath& path, const T& sample)
{
AddSample(path, NYTree::ConvertToNode(sample));
}
template <class T>
-void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const T& sample)
+void TStatistics::ReplacePathWithSample(const NStatisticPath::TStatisticPath& path, const T& sample)
{
ReplacePathWithSample(path, NYTree::ConvertToNode(sample));
}
@@ -36,20 +40,22 @@ void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const T& sam
template <typename TSummaryMap>
std::pair<EStatisticPathConflictType, typename TSummaryMap::iterator> IsCompatibleStatistic(
TSummaryMap& existingStatistics,
- const NYPath::TYPath& path)
+ const NStatisticPath::TStatisticPath& path)
{
auto it = existingStatistics.lower_bound(path);
if (it != existingStatistics.end()) {
if (it->first == path) {
return {EStatisticPathConflictType::Exists, it};
}
- if (NYPath::HasPrefix(it->first, path)) {
+ // TODO(pavook) std::ranges::starts_with(it->first, path) when C++23 arrives.
+ if (it->first.StartsWith(path)) {
return {EStatisticPathConflictType::IsPrefix, it};
}
}
if (it != existingStatistics.begin()) {
auto prev = std::prev(it);
- if (NYPath::HasPrefix(path, prev->first)) {
+ // TODO(pavook) std::ranges::starts_with(path, prev->first) when C++23 arrives.
+ if (path.StartsWith(prev->first)) {
return {EStatisticPathConflictType::HasPrefix, prev};
}
}
@@ -62,7 +68,7 @@ std::pair<EStatisticPathConflictType, typename TSummaryMap::iterator> IsCompatib
template <typename TSummaryMap, typename... Ts>
std::pair<typename TSummaryMap::iterator, bool> CheckedEmplaceStatistic(
TSummaryMap& existingStatistics,
- const NYPath::TYPath& path,
+ const NStatisticPath::TStatisticPath& path,
Ts&&... args)
{
auto [conflictType, hintIt] = IsCompatibleStatistic(existingStatistics, path);
@@ -103,7 +109,7 @@ void TTaggedStatistics<TTags>::AppendStatistics(const TStatistics& statistics, T
}
template <class TTags>
-void TTaggedStatistics<TTags>::AppendTaggedSummary(const NYPath::TYPath& path, const TTaggedStatistics<TTags>::TTaggedSummaries& taggedSummaries)
+void TTaggedStatistics<TTags>::AppendTaggedSummary(const NStatisticPath::TStatisticPath& path, const TTaggedStatistics<TTags>::TTaggedSummaries& taggedSummaries)
{
auto [taggedSummariesIt, emplaceHappened] = CheckedEmplaceStatistic(Data_, path, taggedSummaries);
if (emplaceHappened) {
@@ -121,7 +127,7 @@ void TTaggedStatistics<TTags>::AppendTaggedSummary(const NYPath::TYPath& path, c
}
template <class TTags>
-const THashMap<TTags, TSummary>* TTaggedStatistics<TTags>::FindTaggedSummaries(const NYPath::TYPath& path) const
+const THashMap<TTags, TSummary>* TTaggedStatistics<TTags>::FindTaggedSummaries(const NStatisticPath::TStatisticPath& path) const
{
auto it = Data_.find(path);
if (it != Data_.end()) {
@@ -149,7 +155,7 @@ void TTaggedStatistics<TTags>::Persist(const TStreamPersistenceContext& context)
template <class TTags>
void Serialize(const TTaggedStatistics<TTags>& statistics, NYson::IYsonConsumer* consumer)
{
- SerializeYsonPathsMap<THashMap<TTags, TSummary>>(
+ SerializeStatisticPathsMap<THashMap<TTags, TSummary>>(
statistics.GetData(),
consumer,
[] (const THashMap<TTags, TSummary>& summaries, NYson::IYsonConsumer* consumer) {
@@ -166,86 +172,50 @@ void Serialize(const TTaggedStatistics<TTags>& statistics, NYson::IYsonConsumer*
////////////////////////////////////////////////////////////////////////////////
-Y_FORCE_INLINE int SkipEqualTokens(NYPath::TTokenizer& first, NYPath::TTokenizer& second)
-{
- int commonDepth = 0;
- while (true) {
- first.Advance();
- second.Advance();
- // Note that both tokenizers can't reach EndOfStream token, because it would mean that
- // currentKey is prefix of prefixKey or vice versa that is prohibited in TStatistics.
- first.Expect(NYPath::ETokenType::Slash);
- second.Expect(NYPath::ETokenType::Slash);
-
- first.Advance();
- second.Advance();
- first.Expect(NYPath::ETokenType::Literal);
- second.Expect(NYPath::ETokenType::Literal);
- if (first.GetLiteralValue() == second.GetLiteralValue()) {
- ++commonDepth;
- } else {
- break;
- }
- }
-
- return commonDepth;
-}
-
template <class TMapValue>
-void SerializeYsonPathsMap(
- const std::map<NYTree::TYPath, TMapValue>& map,
+void SerializeStatisticPathsMap(
+ const std::map<NStatisticPath::TStatisticPath, TMapValue>& map,
NYson::IYsonConsumer* consumer,
const std::function<void(const TMapValue&, NYson::IYsonConsumer*)>& valueSerializer)
{
using NYT::Serialize;
+ // Global map.
consumer->OnBeginMap();
// Depth of the previous key defined as a number of nested maps before the summary itself.
- int previousDepth = 0;
- NYPath::TYPath previousPath;
+ size_t previousDepth = 0;
+ NStatisticPath::TStatisticPath previousPath;
for (const auto& [currentPath, value] : map) {
- NYPath::TTokenizer previousTokenizer(previousPath);
- NYPath::TTokenizer currentTokenizer(currentPath);
+ auto enumeratedCurrent = Enumerate(currentPath);
+ auto enumeratedPrevious = Enumerate(previousPath);
- // The depth of the common part of two keys, needed to determine the number of maps to close.
- int commonDepth = 0;
+ auto [currentIt, previousIt] = std::ranges::mismatch(enumeratedCurrent, enumeratedPrevious);
- if (previousPath) {
- // First we find the position in which current key is different from the
- // previous one in order to close necessary number of maps.
- commonDepth = SkipEqualTokens(currentTokenizer, previousTokenizer);
+ // No statistic path should be a path-prefix of another statistic path.
+ YT_VERIFY(currentIt != enumeratedCurrent.end());
- // Close all redundant maps.
- while (previousDepth > commonDepth) {
- consumer->OnEndMap();
- --previousDepth;
- }
- } else {
- currentTokenizer.Advance();
- currentTokenizer.Expect(NYPath::ETokenType::Slash);
- currentTokenizer.Advance();
- currentTokenizer.Expect(NYPath::ETokenType::Literal);
+ // The depth of the common part of two keys, needed to determine the number of maps to close.
+ size_t commonDepth = std::get<0>(*currentIt);
+
+ // Close the maps that need to be closed.
+ while (previousDepth > commonDepth) {
+ consumer->OnEndMap();
+ --previousDepth;
}
- int currentDepth = commonDepth;
// Open all newly appeared maps.
- while (true) {
- consumer->OnKeyedItem(currentTokenizer.GetLiteralValue());
- currentTokenizer.Advance();
- if (currentTokenizer.GetType() == NYPath::ETokenType::Slash) {
+ size_t currentDepth = commonDepth;
+ auto beginIt = currentIt;
+ auto endIt = enumeratedCurrent.end();
+ for (; currentIt != endIt; ++currentIt) {
+ if (currentIt != beginIt) {
consumer->OnBeginMap();
++currentDepth;
- currentTokenizer.Advance();
- currentTokenizer.Expect(NYPath::ETokenType::Literal);
- } else if (currentTokenizer.GetType() == NYPath::ETokenType::EndOfStream) {
- break;
- } else {
- THROW_ERROR_EXCEPTION("Wrong token type in statistics path")
- << TErrorAttribute("token_type", currentTokenizer.GetType())
- << TErrorAttribute("statistics_path", currentPath);
}
+ consumer->OnKeyedItem(std::get<1>(*currentIt));
}
+
// Serialize value.
valueSerializer(value, consumer);
diff --git a/yt/yt/core/misc/statistics.cpp b/yt/yt/core/misc/statistics.cpp
index 476433b57b..0181964243 100644
--- a/yt/yt/core/misc/statistics.cpp
+++ b/yt/yt/core/misc/statistics.cpp
@@ -1,5 +1,7 @@
#include "statistics.h"
+#include "statistic_path.h"
+
#include <yt/yt/core/ypath/token.h>
#include <yt/yt/core/ypath/tokenizer.h>
@@ -11,7 +13,7 @@ namespace NYT {
using namespace NYTree;
using namespace NYson;
-using namespace NYPath;
+using namespace NStatisticPath;
////////////////////////////////////////////////////////////////////////////////
@@ -91,32 +93,32 @@ bool TSummary::operator ==(const TSummary& other) const
////////////////////////////////////////////////////////////////////////////////
-TSummary& TStatistics::GetSummary(const NYPath::TYPath& path)
+TSummary& TStatistics::GetSummary(const TStatisticPath& path)
{
auto [it, _] = CheckedEmplaceStatistic(Data_, path, TSummary());
return it->second;
}
-void TStatistics::AddSample(const NYPath::TYPath& path, i64 sample)
+void TStatistics::AddSample(const TStatisticPath& path, i64 sample)
{
GetSummary(path).AddSample(sample);
}
-void TStatistics::AddSample(const NYPath::TYPath& path, const INodePtr& sample)
+void TStatistics::AddSample(const TStatisticPath& path, const INodePtr& sample)
{
ProcessNodeWithCallback(path, sample, [this] (const auto&... args) {
AddSample(args...);
});
}
-void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const i64 sample)
+void TStatistics::ReplacePathWithSample(const TStatisticPath& path, const i64 sample)
{
auto& summary = GetSummary(path);
summary.Reset();
summary.AddSample(sample);
}
-void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const INodePtr& sample)
+void TStatistics::ReplacePathWithSample(const TStatisticPath& path, const INodePtr& sample)
{
ProcessNodeWithCallback(path, sample, [this] (const auto&... args) {
ReplacePathWithSample(args...);
@@ -124,7 +126,7 @@ void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const INodeP
}
template <class TCallback>
-void TStatistics::ProcessNodeWithCallback(const NYPath::TYPath& path, const NYTree::INodePtr& sample, TCallback callback)
+void TStatistics::ProcessNodeWithCallback(const TStatisticPath& path, const NYTree::INodePtr& sample, TCallback callback)
{
switch (sample->GetType()) {
case ENodeType::Int64:
@@ -137,11 +139,7 @@ void TStatistics::ProcessNodeWithCallback(const NYPath::TYPath& path, const NYTr
case ENodeType::Map:
for (const auto& [key, child] : sample->AsMap()->GetChildren()) {
- if (key.empty()) {
- THROW_ERROR_EXCEPTION("Statistic cannot have an empty name")
- << TErrorAttribute("path_prefix", path);
- }
- callback(path + "/" + ToYPathLiteral(key), child);
+ callback(path / TStatisticPathLiteral(TString(key)), child);
}
break;
@@ -166,20 +164,20 @@ void TStatistics::MergeWithOverride(const TStatistics& statistics)
Data_.insert(otherData.begin(), otherData.end());
}
-TStatistics::TSummaryRange TStatistics::GetRangeByPrefix(const TString& prefix) const
+// TODO(pavook) quite funky implementation details. Should we move this into statistic_path.h?
+TStatistics::TSummaryRange TStatistics::GetRangeByPrefix(const TStatisticPath& prefix) const
{
- auto begin = Data().lower_bound(prefix + '/');
+ // lower_bound is equivalent to upper_bound in this case, but upper_bound is semantically better.
+ auto begin = Data().upper_bound(prefix);
// This will effectively return an iterator to the first path not starting with "`prefix`/".
- auto end = Data().lower_bound(prefix + ('/' + 1));
+ auto end = Data().lower_bound(ParseStatisticPath(prefix.Path() + TString(TChar(Delimiter + 1))).ValueOrThrow());
return TSummaryRange(begin, end);
}
-void TStatistics::RemoveRangeByPrefix(const TString& prefixPath)
+void TStatistics::RemoveRangeByPrefix(const TStatisticPath& prefixPath)
{
- auto begin = Data().lower_bound(prefixPath + '/');
- // This will effectively return an iterator to the first path not starting with "`prefix`/".
- auto end = Data().lower_bound(prefixPath + ('/' + 1));
- Data_.erase(begin, end);
+ auto range = GetRangeByPrefix(prefixPath);
+ Data_.erase(range.begin(), range.end());
}
void TStatistics::Persist(const TStreamPersistenceContext& context)
@@ -200,7 +198,7 @@ void Serialize(const TStatistics& statistics, IYsonConsumer* consumer)
consumer->OnEndAttributes();
}
- SerializeYsonPathsMap<TSummary>(
+ SerializeStatisticPathsMap<TSummary>(
statistics.Data(),
consumer,
[] (const TSummary& summary, IYsonConsumer* consumer) {
@@ -214,30 +212,32 @@ i64 GetSum(const TSummary& summary)
return summary.GetSum();
}
-i64 GetNumericValue(const TStatistics& statistics, const TString& path)
+i64 GetNumericValue(const TStatistics& statistics, const TStatisticPath& path)
{
auto value = FindNumericValue(statistics, path);
if (!value) {
- THROW_ERROR_EXCEPTION("Statistics %v is not present",
- path);
+ THROW_ERROR_EXCEPTION("Statistics is not present")
+ << TErrorAttribute("requested_path", path);
} else {
return *value;
}
}
-std::optional<i64> FindNumericValue(const TStatistics& statistics, const TString& path)
+std::optional<i64> FindNumericValue(const TStatistics& statistics, const TStatisticPath& path)
{
auto summary = FindSummary(statistics, path);
return summary ? std::make_optional(summary->GetSum()) : std::nullopt;
}
-std::optional<TSummary> FindSummary(const TStatistics& statistics, const TString& path)
+std::optional<TSummary> FindSummary(const TStatistics& statistics, const TStatisticPath& path)
{
const auto& data = statistics.Data();
auto iterator = data.lower_bound(path);
- if (iterator != data.end() && iterator->first != path && HasPrefix(iterator->first, path)) {
- THROW_ERROR_EXCEPTION("Invalid statistics type: cannot get summary of %v since it is a map",
- path);
+ if (iterator != data.end() && iterator->first != path &&
+ iterator->first.StartsWith(path))
+ {
+ THROW_ERROR_EXCEPTION("Invalid statistics type: cannot get summary since it is a map") <<
+ TErrorAttribute("requested_path", path);
} else if (iterator == data.end() || iterator->first != path) {
return std::nullopt;
} else {
@@ -245,9 +245,9 @@ std::optional<TSummary> FindSummary(const TStatistics& statistics, const TString
}
}
-
////////////////////////////////////////////////////////////////////////////////
+// TODO(pavook) anonymous namespace?
class TStatisticsBuildingConsumer
: public TYsonConsumerBase
, public IBuildingYsonConsumer<TStatistics>
@@ -328,15 +328,17 @@ public:
// If we are here, we are either:
// * at the root (then do nothing)
// * at some directory (then the last key was the directory name)
- if (!LastKey_.empty()) {
- DirectoryNameLengths_.push_back(LastKey_.size());
- CurrentPath_.append('/');
- CurrentPath_.append(LastKey_);
- LastKey_.clear();
+ if (!FirstMapOpen_) {
+ FirstMapOpen_ = true;
} else {
- if (!CurrentPath_.empty()) {
- THROW_ERROR_EXCEPTION("Empty keys are not allowed for statistics");
+ // TODO(pavook) is it okay to throw here?
+ auto literal = TStatisticPathLiteral(LastKey_);
+ if (CurrentPath_.Empty()) {
+ CurrentPath_ = std::move(literal);
+ } else {
+ CurrentPath_.Append(literal);
}
+ LastKey_.clear();
}
}
@@ -347,7 +349,7 @@ public:
THROW_ERROR_EXCEPTION("Attributes other than \"timestamp\" are not allowed");
}
} else {
- LastKey_ = ToYPathLiteral(key);
+ LastKey_ = key;
}
}
@@ -368,16 +370,17 @@ public:
AtSummaryMap_ = false;
}
- if (!CurrentPath_.empty()) {
+ if (!CurrentPath_.Empty()) {
// We need to go to the parent.
- CurrentPath_.resize(CurrentPath_.size() - DirectoryNameLengths_.back() - 1);
- DirectoryNameLengths_.pop_back();
+ CurrentPath_.PopBack();
+ } else {
+ FirstMapOpen_ = false;
}
}
void OnBeginAttributes() override
{
- if (!CurrentPath_.empty()) {
+ if (!CurrentPath_.Empty()) {
THROW_ERROR_EXCEPTION("Attributes are not allowed for statistics");
}
AtAttributes_ = true;
@@ -396,8 +399,7 @@ public:
private:
TStatistics Statistics_;
- TString CurrentPath_;
- std::vector<int> DirectoryNameLengths_;
+ TStatisticPath CurrentPath_;
TSummary CurrentSummary_;
i64 FilledSummaryFields_ = 0;
@@ -405,6 +407,7 @@ private:
TString LastKey_;
+ bool FirstMapOpen_ = false;
bool AtSummaryMap_ = false;
bool AtAttributes_ = false;
};
diff --git a/yt/yt/core/misc/statistics.h b/yt/yt/core/misc/statistics.h
index 53b1a04afc..606992cfc4 100644
--- a/yt/yt/core/misc/statistics.h
+++ b/yt/yt/core/misc/statistics.h
@@ -52,25 +52,25 @@ void Serialize(const TSummary& summary, NYson::IYsonConsumer* consumer);
class TStatistics
{
public:
- using TSummaryMap = std::map<NYPath::TYPath, TSummary>;
+ using TSummaryMap = std::map<NStatisticPath::TStatisticPath, TSummary>;
using TSummaryRange = TIteratorRange<TSummaryMap::const_iterator>;
DEFINE_BYREF_RO_PROPERTY(TSummaryMap, Data);
DEFINE_BYVAL_RW_PROPERTY(std::optional<TInstant>, Timestamp);
public:
- void AddSample(const NYPath::TYPath& path, i64 sample);
+ void AddSample(const NStatisticPath::TStatisticPath& path, i64 sample);
- void AddSample(const NYPath::TYPath& path, const NYTree::INodePtr& sample);
+ void AddSample(const NStatisticPath::TStatisticPath& path, const NYTree::INodePtr& sample);
template <class T>
- void AddSample(const NYPath::TYPath& path, const T& sample);
+ void AddSample(const NStatisticPath::TStatisticPath& path, const T& sample);
- void ReplacePathWithSample(const NYPath::TYPath& path, i64 sample);
+ void ReplacePathWithSample(const NStatisticPath::TStatisticPath& path, i64 sample);
- void ReplacePathWithSample(const NYPath::TYPath& path, const NYTree::INodePtr& sample);
+ void ReplacePathWithSample(const NStatisticPath::TStatisticPath& path, const NYTree::INodePtr& sample);
template <class T>
- void ReplacePathWithSample(const NYPath::TYPath& path, const T& sample);
+ void ReplacePathWithSample(const NStatisticPath::TStatisticPath& path, const T& sample);
//! Merge statistics by merging summaries for each common statistics path.
void Merge(const TStatistics& statistics);
@@ -82,27 +82,27 @@ public:
* Pre-requisites: `prefixPath` must not have terminating slash.
* Examples: /a/b is a prefix path for /a/b/hij but not for /a/bcd/efg nor /a/b itself.
*/
- TSummaryRange GetRangeByPrefix(const TString& prefixPath) const;
+ TSummaryRange GetRangeByPrefix(const NStatisticPath::TStatisticPath& prefixPath) const;
//! Remove all the elements starting from prefixPath.
//! The requirements for prefixPath are the same as in GetRangeByPrefix.
- void RemoveRangeByPrefix(const TString& prefixPath);
+ void RemoveRangeByPrefix(const NStatisticPath::TStatisticPath& prefixPath);
void Persist(const TStreamPersistenceContext& context);
private:
template <class TCallback>
- void ProcessNodeWithCallback(const NYPath::TYPath& path, const NYTree::INodePtr& sample, TCallback callback);
+ void ProcessNodeWithCallback(const NStatisticPath::TStatisticPath& path, const NYTree::INodePtr& sample, TCallback callback);
- TSummary& GetSummary(const NYPath::TYPath& path);
+ TSummary& GetSummary(const NStatisticPath::TStatisticPath& path);
friend class TStatisticsBuildingConsumer;
};
-i64 GetNumericValue(const TStatistics& statistics, const TString& path);
+i64 GetNumericValue(const TStatistics& statistics, const NStatisticPath::TStatisticPath& path);
-std::optional<i64> FindNumericValue(const TStatistics& statistics, const TString& path);
-std::optional<TSummary> FindSummary(const TStatistics& statistics, const TString& path);
+std::optional<i64> FindNumericValue(const TStatistics& statistics, const NStatisticPath::TStatisticPath& path);
+std::optional<TSummary> FindSummary(const TStatistics& statistics, const NStatisticPath::TStatisticPath& path);
////////////////////////////////////////////////////////////////////////////////
@@ -142,12 +142,12 @@ class TTaggedStatistics
{
public:
using TTaggedSummaries = THashMap<TTags, TSummary>;
- using TSummaryMap = std::map<NYPath::TYPath, TTaggedSummaries>;
+ using TSummaryMap = std::map<NStatisticPath::TStatisticPath, TTaggedSummaries>;
void AppendStatistics(const TStatistics& statistics, TTags tags);
- void AppendTaggedSummary(const NYPath::TYPath& path, const TTaggedSummaries& taggedSummaries);
+ void AppendTaggedSummary(const NStatisticPath::TStatisticPath& path, const TTaggedSummaries& taggedSummaries);
- const TTaggedSummaries* FindTaggedSummaries(const NYPath::TYPath& path) const;
+ const TTaggedSummaries* FindTaggedSummaries(const NStatisticPath::TStatisticPath& path) const;
const TSummaryMap& GetData() const;
void Persist(const TStreamPersistenceContext& context);
@@ -164,8 +164,8 @@ void Serialize(const TTaggedStatistics<TTags>& statistics, NYson::IYsonConsumer*
////////////////////////////////////////////////////////////////////////////////
template <class TValue>
-void SerializeYsonPathsMap(
- const std::map<NYTree::TYPath, TValue>& map,
+void SerializeStatisticPathsMap(
+ const std::map<NStatisticPath::TStatisticPath, TValue>& map,
NYson::IYsonConsumer* consumer,
const std::function<void(const TValue&, NYson::IYsonConsumer*)>& valueSerializer);
diff --git a/yt/yt/core/misc/unittests/statistic_path_ut.cpp b/yt/yt/core/misc/unittests/statistic_path_ut.cpp
index e92d73c452..ad6ec5720c 100644
--- a/yt/yt/core/misc/unittests/statistic_path_ut.cpp
+++ b/yt/yt/core/misc/unittests/statistic_path_ut.cpp
@@ -1,13 +1,17 @@
#include <yt/yt/core/test_framework/framework.h>
-#include <yt/yt/core/misc/error.h>
#include <yt/yt/core/misc/statistic_path.h>
+#include <yt/yt/core/misc/error.h>
+
+#include <yt/yt/core/ytree/convert.h>
+
namespace NYT {
namespace {
using namespace NStatisticPath;
-using namespace NStatisticPathLiterals;
+using namespace NYson;
+using namespace NYTree;
////////////////////////////////////////////////////////////////////////////////
@@ -23,6 +27,8 @@ TEST(TStatisticPathTest, Literal)
EXPECT_THROW(TStatisticPathLiteral("\0"), TErrorException);
EXPECT_THROW(TStatisticPathLiteral(TString(Delimiter)), TErrorException);
EXPECT_THROW(TStatisticPathLiteral(""), TErrorException);
+
+ EXPECT_EQ("A"_L / "BB"_L / "CCC"_L, "/A/BB/CCC"_SP);
}
////////////////////////////////////////////////////////////////////////////////
@@ -212,5 +218,61 @@ TEST(TStatisticPathTest, Swap)
////////////////////////////////////////////////////////////////////////////////
+TEST(TStatisticPath, Serialize)
+{
+ TStatisticPath path = "A"_L / "B/C"_L;
+ EXPECT_EQ(
+ ConvertTo<TString>(ConvertToYsonString(path)),
+ path.Path());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TStatisticPath, Deserialize)
+{
+ TStatisticPath simplePath = "/A/B/C"_SP;
+
+ TYsonString pathStr = ConvertToYsonString(simplePath.Path());
+ EXPECT_EQ(ConvertTo<TStatisticPath>(pathStr), simplePath);
+
+ TYsonString slashedPathStr = ConvertToYsonString("/A/B/C");
+ EXPECT_EQ(ConvertTo<TStatisticPath>(slashedPathStr), simplePath);
+
+ TYsonString invalidPathStr = ConvertToYsonString("abc");
+ EXPECT_THROW_WITH_SUBSTRING(
+ ConvertTo<TStatisticPath>(invalidPathStr),
+ "must start with a delimiter");
+
+ TYsonString invalidSlashedPathStr = ConvertToYsonString("/abc/");
+ EXPECT_THROW_WITH_SUBSTRING(
+ ConvertTo<TStatisticPath>(invalidSlashedPathStr),
+ "must not end with a delimiter");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TStatisticPath, SaveLoad)
+{
+ TStringStream stream;
+ TStreamSaveContext saveContext(&stream);
+ TStreamLoadContext loadContext(&stream);
+
+ TStatisticPath simplePath1 = "A"_L / "B/C"_L / "X\02YZ"_L;
+ TStatisticPath simplePath2 = "A"_L / "BB"_L;
+ Save(saveContext, simplePath1);
+ Save(saveContext, simplePath2);
+ saveContext.Finish();
+
+ TStatisticPath loadedPath1;
+ TStatisticPath loadedPath2;
+ Load(loadContext, loadedPath1);
+ Load(loadContext, loadedPath2);
+
+ EXPECT_EQ(loadedPath1, simplePath1);
+ EXPECT_EQ(loadedPath2, simplePath2);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
} // namespace
} // namespace NYT
diff --git a/yt/yt/core/misc/unittests/statistics_ut.cpp b/yt/yt/core/misc/unittests/statistics_ut.cpp
index cab0babb45..12955b2017 100644
--- a/yt/yt/core/misc/unittests/statistics_ut.cpp
+++ b/yt/yt/core/misc/unittests/statistics_ut.cpp
@@ -12,6 +12,7 @@ namespace NYT {
using namespace NYTree;
using namespace NYson;
+using namespace NStatisticPath;
template <>
struct TYsonFormatTraits<TSummary>
@@ -60,7 +61,7 @@ TEST(TStatistics, Summary)
////////////////////////////////////////////////////////////////////////////////
-TStatistics CreateStatistics(std::initializer_list<std::pair<NYPath::TYPath, i64>> data)
+TStatistics CreateStatistics(std::initializer_list<std::pair<TStatisticPath, i64>> data)
{
TStatistics result;
for (const auto& item : data) {
@@ -75,38 +76,40 @@ TEST(TStatistics, AddSample)
TStatistics statistics;
statistics.AddSample(
- "/key/subkey",
+ "key"_L / "subkey"_L,
origin);
- EXPECT_EQ(5, GetNumericValue(statistics, "/key/subkey/x"));
- EXPECT_EQ(7, GetNumericValue(statistics, "/key/subkey/y"));
+ EXPECT_EQ(5, GetNumericValue(statistics, "key"_L / "subkey"_L / "x"_L));
+ EXPECT_EQ(7, GetNumericValue(statistics, "key"_L / "subkey"_L / "y"_L));
- statistics.AddSample("/key/sub", 42);
- EXPECT_EQ(42, GetNumericValue(statistics, "/key/sub"));
+ statistics.AddSample("key"_L / "sub"_L, 42);
+ EXPECT_EQ(42, GetNumericValue(statistics, "key"_L / "sub"_L));
// Cannot add sample to the map node.
- EXPECT_THROW(statistics.AddSample("/key/subkey", 24), std::exception);
+ EXPECT_THROW(statistics.AddSample("key"_L / "subkey"_L, 24), std::exception);
statistics.Merge(CreateStatistics({
- {"/key/subkey/x", 5},
- {"/key/subkey/z", 9}}));
+ {"key"_L / "subkey"_L / "x"_L, 5},
+ {"key"_L / "subkey"_L / "z"_L, 9}}));
- EXPECT_EQ(10, GetNumericValue(statistics, "/key/subkey/x"));
- EXPECT_EQ(7, GetNumericValue(statistics, "/key/subkey/y"));
- EXPECT_EQ(9, GetNumericValue(statistics, "/key/subkey/z"));
+ EXPECT_EQ(10, GetNumericValue(statistics, "key"_L / "subkey"_L / "x"_L));
+ EXPECT_EQ(7, GetNumericValue(statistics, "key"_L / "subkey"_L / "y"_L));
+ EXPECT_EQ(9, GetNumericValue(statistics, "key"_L / "subkey"_L / "z"_L));
EXPECT_THROW(
- statistics.Merge(CreateStatistics({{"/key", 5}})),
+ statistics.Merge(CreateStatistics({{"key"_L, 5}})),
std::exception);
- statistics.AddSample("/key/subkey/x", 10);
- EXPECT_EQ(20, GetNumericValue(statistics, "/key/subkey/x"));
+ statistics.AddSample("key"_L / "subkey"_L / "x"_L, 10);
+ EXPECT_EQ(20, GetNumericValue(statistics, "key"_L / "subkey"_L / "x"_L));
auto ysonStatistics = ConvertToYsonString(statistics);
auto deserializedStatistics = ConvertTo<TStatistics>(ysonStatistics);
- EXPECT_EQ(20, GetNumericValue(deserializedStatistics, "/key/subkey/x"));
- EXPECT_EQ(42, GetNumericValue(deserializedStatistics, "/key/sub"));
+ EXPECT_EQ(statistics.Data(), deserializedStatistics.Data());
+
+ EXPECT_EQ(20, GetNumericValue(deserializedStatistics, "key"_L / "subkey"_L / "x"_L));
+ EXPECT_EQ(42, GetNumericValue(deserializedStatistics, "key"_L / "sub"_L));
}
////////////////////////////////////////////////////////////////////////////////
@@ -116,7 +119,7 @@ class TStatisticsUpdater
public:
void AddSample(const INodePtr& node)
{
- Statistics_.AddSample("/custom", node);
+ Statistics_.AddSample("custom"_L, node);
}
const TStatistics& GetStatistics()
@@ -151,9 +154,9 @@ TEST(TStatistics, Consumer)
.EndMap();
const auto& statistics = statisticsUpdater.GetStatistics();
- EXPECT_EQ(4, GetNumericValue(statistics, "/custom/k1"));
- EXPECT_EQ(-7, GetNumericValue(statistics, "/custom/k2"));
- EXPECT_EQ(42, GetNumericValue(statistics, "/custom/key/subkey"));
+ EXPECT_EQ(4, GetNumericValue(statistics, "custom"_L / "k1"_L));
+ EXPECT_EQ(-7, GetNumericValue(statistics, "custom"_L / "k2"_L));
+ EXPECT_EQ(42, GetNumericValue(statistics, "custom"_L / "key"_L / "subkey"_L));
}
////////////////////////////////////////////////////////////////////////////////
@@ -181,10 +184,10 @@ TEST(TStatistics, BuildingConsumer)
auto statistics = ConvertTo<TStatistics>(statisticsYson);
auto data = statistics.Data();
- std::map<TString, TSummary> expectedData {
- { "/abc/def", TSummary(42, 3, 5, 21, 10) },
- { "/abc/degh", TSummary(27, 1, 27, 27, 27) },
- { "/xyz", TSummary(50, 5, 8, 12, std::nullopt) },
+ std::map<TStatisticPath, TSummary> expectedData {
+ { "abc"_L / "def"_L, TSummary(42, 3, 5, 21, 10) },
+ { "abc"_L / "degh"_L, TSummary(27, 1, 27, 27, 27) },
+ { "xyz"_L, TSummary(50, 5, 8, 12, std::nullopt) },
};
EXPECT_EQ(expectedData, data);
@@ -192,41 +195,72 @@ TEST(TStatistics, BuildingConsumer)
////////////////////////////////////////////////////////////////////////////////
+TEST(TStatistics, GetRangeByPrefix) {
+ TStatistics statistics = CreateStatistics({
+ {"a"_L, 1},
+ {"b"_L / "a"_L, 1},
+ {"b"_L / "aa"_L / "a"_L, 1},
+ {"b"_L / TStatisticPathLiteral(TString(std::numeric_limits<char>::max())), 1},
+ {"c"_L, 1},
+ });
+
+ {
+ // We should not return the node that exactly matches the prefix.
+ EXPECT_TRUE(statistics.GetRangeByPrefix("a"_L).empty());
+ }
+
+ {
+ auto actualRange = statistics.GetRangeByPrefix("b"_L);
+ TStatistics expectedRange = CreateStatistics({
+ {"b"_L / "a"_L, 1},
+ {"b"_L / "aa"_L / "a"_L, 1},
+ {"b"_L / TStatisticPathLiteral(TString(std::numeric_limits<char>::max())), 1},
+ });
+
+ EXPECT_EQ(std::map(actualRange.begin(), actualRange.end()), expectedRange.Data());
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
TEST(TTaggedStatistics, AppendStatistics)
{
TTaggedStatistics<int> taggedStatistics;
{
TStatistics statistics;
- statistics.AddSample("/abc/def", 1);
- statistics.AddSample("/abc/defg", 2);
- statistics.AddSample("/xyz", 3);
+ statistics.AddSample("abc"_L / "def"_L, 1);
+ statistics.AddSample("abc"_L / "defg"_L, 2);
+ statistics.AddSample("xyz"_L, 3);
taggedStatistics.AppendStatistics(statistics, 1);
}
{
TStatistics statistics;
- statistics.AddSample("/abc/def", 1);
- statistics.AddSample("/ijk", 2);
+ statistics.AddSample("abc"_L / "def"_L, 1);
+ statistics.AddSample("ijk"_L, 2);
taggedStatistics.AppendStatistics(statistics, 2);
}
{
TStatistics statistics;
- statistics.AddSample("/abc/def", 2);
+ statistics.AddSample("abc"_L / "def"_L, 2);
taggedStatistics.AppendStatistics(statistics, 1);
}
{
auto actualData = taggedStatistics.GetData();
- std::map<TString, THashMap<int, TSummary>> expectedData {
+ std::map<TStatisticPath, THashMap<int, TSummary>> expectedData {
// std::nullopt because Last is always dropped during merge, see TSummary::Merge.
- {"/abc/def", {
- {1, TSummary(3, 2, 1, 2, std::nullopt)},
- {2, TSummary(1, 1, 1, 1, 1)}}},
- {"/abc/defg", {{1, TSummary(2, 1, 2, 2, 2)}}},
- {"/xyz", {{1, TSummary(3, 1, 3, 3, 3)}}},
- {"/ijk", {{2, TSummary(2, 1, 2, 2, 2)}}}
+ {"abc"_L / "def"_L, {
+ {1, TSummary(3, 2, 1, 2, std::nullopt)},
+ {2, TSummary(1, 1, 1, 1, 1)}}},
+ {"abc"_L / "defg"_L, {
+ {1, TSummary(2, 1, 2, 2, 2)}}},
+ {"xyz"_L, {
+ {1, TSummary(3, 1, 3, 3, 3)}}},
+ {"ijk"_L, {
+ {2, TSummary(2, 1, 2, 2, 2)}}}
};
EXPECT_EQ(expectedData, actualData);
@@ -234,13 +268,13 @@ TEST(TTaggedStatistics, AppendStatistics)
{
TStatistics statistics;
- statistics.AddSample("/xyz/suffix", 1);
+ statistics.AddSample("xyz"_L / "suffix"_L, 1);
EXPECT_THROW(taggedStatistics.AppendStatistics(statistics, 3), std::exception);
}
{
TStatistics statistics;
- statistics.AddSample("/abc", 1); // prefix
+ statistics.AddSample("abc"_L, 1); // prefix
EXPECT_THROW(taggedStatistics.AppendStatistics(statistics, 3), std::exception);
}
}
diff --git a/yt/yt/core/ytree/serialize.cpp b/yt/yt/core/ytree/serialize.cpp
index 13a118efd8..b0ebda3871 100644
--- a/yt/yt/core/ytree/serialize.cpp
+++ b/yt/yt/core/ytree/serialize.cpp
@@ -363,7 +363,14 @@ void Deserialize(TGuid& value, INodePtr node)
// TStatisticPath.
void Deserialize(NStatisticPath::TStatisticPath& value, INodePtr node)
{
- value = NStatisticPath::ParseStatisticPath(node->AsString()->GetValue()).ValueOrThrow();
+ const TString& path = node->AsString()->GetValue();
+
+ // Try to parse slashed paths.
+ if (!path.empty() && path.StartsWith('/')) {
+ value = NStatisticPath::SlashedStatisticPath(path).ValueOrThrow();
+ } else {
+ value = NStatisticPath::ParseStatisticPath(path).ValueOrThrow();
+ }
}
// Subtypes of google::protobuf::Message