diff options
author | pavook <pavook@yandex-team.com> | 2024-07-09 21:51:58 +0300 |
---|---|---|
committer | pavook <pavook@yandex-team.com> | 2024-07-09 22:04:51 +0300 |
commit | 61749bcd2a4418b6332c7a4b4568a1ea7c68bf66 (patch) | |
tree | 6ca5945e20af302e76657dc5f8ea2e073ff3836f | |
parent | 4e76d760c9740b2785c03ee4717e1c48e61ae049 (diff) | |
download | ydb-61749bcd2a4418b6332c7a4b4568a1ea7c68bf66.tar.gz |
YT-22068: Always check prefix invariant on adding new statistics
Prefix invariant: "no statistic path is a (path) prefix of another statistic path".
- Check that the statistic is compatible **before** inserting it.
- Set an operation alert if an incompatible statistic appears at some point.
- Make sure that incompatible statistics do not crash anyone.
- Add a test checking that all characters with code points less than `/` are banned in statistic paths to ensure correctness of the check.
- Add a test checking AppendStatistic behavior.
fc5782fd97cdd191a94f1efa2d9c3ef11ac114f5
-rw-r--r-- | yt/yt/core/misc/statistics-inl.h | 74 | ||||
-rw-r--r-- | yt/yt/core/misc/statistics.cpp | 34 | ||||
-rw-r--r-- | yt/yt/core/misc/statistics.h | 9 | ||||
-rw-r--r-- | yt/yt/core/misc/unittests/statistics_ut.cpp | 66 |
4 files changed, 147 insertions, 36 deletions
diff --git a/yt/yt/core/misc/statistics-inl.h b/yt/yt/core/misc/statistics-inl.h index 588d0a05c1..6a2e5ec511 100644 --- a/yt/yt/core/misc/statistics-inl.h +++ b/yt/yt/core/misc/statistics-inl.h @@ -26,11 +26,78 @@ void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const T& sam //////////////////////////////////////////////////////////////////////////////// +/*! Checks if the existing statistics in TSummaryMap are compatible with a statistic + * at |path|. Returns a pair of the conflict type (has a prefix in existing + * statistics, is a prefix of an existing statistic, or no conflict at all) and + * an iterator. If there is a conflict, iterator points to the conflicting statistic, + * otherwise it is a hint. Assumes that the |existingStatistics| + * are compatible. + */ +template <typename TSummaryMap> +std::pair<EStatisticPathConflictType, typename TSummaryMap::iterator> IsCompatibleStatistic( + TSummaryMap& existingStatistics, + const NYPath::TYPath& path) +{ + auto hint = existingStatistics.lower_bound(path); + if (hint != existingStatistics.end()) { + if (hint->first == path) { + return {EStatisticPathConflictType::Exists, hint}; + } + if (NYPath::HasPrefix(hint->first, path)) { + return {EStatisticPathConflictType::IsPrefix, hint}; + } + } + if (hint != existingStatistics.begin()) { + auto prev = std::prev(hint); + if (NYPath::HasPrefix(path, prev->first)) { + return {EStatisticPathConflictType::HasPrefix, prev}; + } + } + return {EStatisticPathConflictType::None, hint}; +} + +//////////////////////////////////////////////////////////////////////////////// + +//! Tries to emplace statistic into TSummaryMap, and checks if it is valid and compatible. +template <typename TSummaryMap, typename... Ts> +std::pair<typename TSummaryMap::iterator, bool> CheckedEmplaceStatistic( + TSummaryMap& existingStatistics, + const NYPath::TYPath& path, + Ts&&... args) +{ + if (auto c = CheckStatisticPath(path)) { + THROW_ERROR_EXCEPTION("Invalid character in a statistic path") + << TErrorAttribute("path", path) + << TErrorAttribute("invalid_character", *c); + } + auto [conflictType, hint] = IsCompatibleStatistic(existingStatistics, path); + if (conflictType == EStatisticPathConflictType::Exists) { + return {hint, false}; + } + if (conflictType != EStatisticPathConflictType::None) { + auto prefixPath = hint->first; + auto conflictPath = path; + + if (conflictType == EStatisticPathConflictType::IsPrefix) { + std::swap(prefixPath, conflictPath); + } + + THROW_ERROR_EXCEPTION("Statistic path cannot be a prefix of another statistic path") + << TErrorAttribute("prefix_path", prefixPath) + << TErrorAttribute("contained_in_path", conflictPath); + } + auto emplacedIterator = existingStatistics.emplace_hint(hint, path, std::forward<Ts>(args)...); + return {emplacedIterator, true}; +} + +//////////////////////////////////////////////////////////////////////////////// + template <class TTags> void TTaggedStatistics<TTags>::AppendStatistics(const TStatistics& statistics, TTags tags) { for (const auto& [path, summary] : statistics.Data()) { - auto& pathSummaries = Data_[path]; + auto [emplacedIterator, _] = CheckedEmplaceStatistic(Data_, path, TTaggedSummaries{}); + auto& pathSummaries = emplacedIterator->second; auto it = pathSummaries.find(tags); if (it == pathSummaries.end()) { pathSummaries.emplace(tags, summary); @@ -43,9 +110,8 @@ void TTaggedStatistics<TTags>::AppendStatistics(const TStatistics& statistics, T template <class TTags> void TTaggedStatistics<TTags>::AppendTaggedSummary(const NYPath::TYPath& path, const TTaggedStatistics<TTags>::TTaggedSummaries& taggedSummaries) { - auto taggedSummariesIt = Data_.find(path); - if (taggedSummariesIt == Data_.end()) { - Data_[path] = taggedSummaries; + auto [taggedSummariesIt, emplaceHappened] = CheckedEmplaceStatistic(Data_, path, taggedSummaries); + if (emplaceHappened) { return; } diff --git a/yt/yt/core/misc/statistics.cpp b/yt/yt/core/misc/statistics.cpp index 2116d01013..eabb4eda66 100644 --- a/yt/yt/core/misc/statistics.cpp +++ b/yt/yt/core/misc/statistics.cpp @@ -114,38 +114,8 @@ std::optional<char> CheckStatisticPath(const NYPath::TYPath& path) TSummary& TStatistics::GetSummary(const NYPath::TYPath& path) { - if (auto c = CheckStatisticPath(path)) { - THROW_ERROR_EXCEPTION("Invalid character %c in statistic path", *c) - << TErrorAttribute("path", path) - << TErrorAttribute("invalid_character", *c); - } - auto result = Data_.emplace(path, TSummary()); - auto it = result.first; - if (result.second) { - // This is a new statistic, check validity. - if (it != Data_.begin()) { - auto prev = std::prev(it); - if (HasPrefix(it->first, prev->first)) { - Data_.erase(it); - THROW_ERROR_EXCEPTION( - "Incompatible statistic paths: old %v, new %v", - prev->first, - path); - } - } - auto next = std::next(it); - if (next != Data_.end()) { - if (HasPrefix(next->first, it->first)) { - Data_.erase(it); - THROW_ERROR_EXCEPTION( - "Incompatible statistic paths: old %v, new %v", - next->first, - path); - } - } - } - - return it->second; + auto [iterator, _] = CheckedEmplaceStatistic(Data_, path, TSummary()); + return iterator->second; } void TStatistics::AddSample(const NYPath::TYPath& path, i64 sample) diff --git a/yt/yt/core/misc/statistics.h b/yt/yt/core/misc/statistics.h index 7e5a81bdf4..191cff0509 100644 --- a/yt/yt/core/misc/statistics.h +++ b/yt/yt/core/misc/statistics.h @@ -111,6 +111,15 @@ std::optional<TSummary> FindSummary(const TStatistics& statistics, const TString //////////////////////////////////////////////////////////////////////////////// +DEFINE_ENUM(EStatisticPathConflictType, + (HasPrefix) + (IsPrefix) + (Exists) + (None) +); + +//////////////////////////////////////////////////////////////////////////////// + void Serialize(const TStatistics& statistics, NYson::IYsonConsumer* consumer); void CreateBuildingYsonConsumer(std::unique_ptr<NYson::IBuildingYsonConsumer<TStatistics>>* buildingConsumer, NYson::EYsonType ysonType); diff --git a/yt/yt/core/misc/unittests/statistics_ut.cpp b/yt/yt/core/misc/unittests/statistics_ut.cpp index 737773e9ca..83345d8b12 100644 --- a/yt/yt/core/misc/unittests/statistics_ut.cpp +++ b/yt/yt/core/misc/unittests/statistics_ut.cpp @@ -117,6 +117,17 @@ TEST(TStatistics, AddSample) EXPECT_EQ(42, GetNumericValue(deserializedStatistics, "/key/sub")); } +TEST(TStatistics, InvalidNames) { + // For the statistic merge to work correctly, symbols with code points + // less than '/' must be forbidden. See YT-22118. + TStatistics statistics; + for (char c = std::numeric_limits<char>::min(); c < '/'; ++c) { + EXPECT_THROW( + statistics.AddSample("/key/abc" + TString{c} + "def", 5), + std::exception); + } +} + //////////////////////////////////////////////////////////////////////////////// class TStatisticsUpdater @@ -200,5 +211,60 @@ TEST(TStatistics, BuildingConsumer) //////////////////////////////////////////////////////////////////////////////// +TEST(TTaggedStatistics, AppendStatistics) +{ + TTaggedStatistics<int> taggedStatistics; + { + TStatistics statistics; + statistics.AddSample("/abc/def", 1); + statistics.AddSample("/abc/defg", 2); + statistics.AddSample("/xyz", 3); + taggedStatistics.AppendStatistics(statistics, 1); + } + + { + TStatistics statistics; + statistics.AddSample("/abc/def", 1); + statistics.AddSample("/ijk", 2); + taggedStatistics.AppendStatistics(statistics, 2); + } + + { + TStatistics statistics; + statistics.AddSample("/abc/def", 2); + taggedStatistics.AppendStatistics(statistics, 1); + } + + { + auto actualData = taggedStatistics.GetData(); + + std::map<TString, THashMap<int, TSummary>> expectedData { + // std::nullopt because Last is always dropped during merge, see TSummary::Merge. + {"/abc/def", { + {1, TSummary(3, 2, 1, 2, std::nullopt)}, + {2, TSummary(1, 1, 1, 1, 1)}}}, + {"/abc/defg", {{1, TSummary(2, 1, 2, 2, 2)}}}, + {"/xyz", {{1, TSummary(3, 1, 3, 3, 3)}}}, + {"/ijk", {{2, TSummary(2, 1, 2, 2, 2)}}} + }; + + EXPECT_EQ(expectedData, actualData); + } + + { + TStatistics statistics; + statistics.AddSample("/xyz/suffix", 1); + EXPECT_THROW(taggedStatistics.AppendStatistics(statistics, 3), std::exception); + } + + { + TStatistics statistics; + statistics.AddSample("/abc", 1); // prefix + EXPECT_THROW(taggedStatistics.AppendStatistics(statistics, 3), std::exception); + } +} + +//////////////////////////////////////////////////////////////////////////////// + } // namespace NYT |