aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpavook <pavook@yandex-team.com>2024-07-09 21:51:58 +0300
committerpavook <pavook@yandex-team.com>2024-07-09 22:04:51 +0300
commit61749bcd2a4418b6332c7a4b4568a1ea7c68bf66 (patch)
tree6ca5945e20af302e76657dc5f8ea2e073ff3836f
parent4e76d760c9740b2785c03ee4717e1c48e61ae049 (diff)
downloadydb-61749bcd2a4418b6332c7a4b4568a1ea7c68bf66.tar.gz
YT-22068: Always check prefix invariant on adding new statistics
Prefix invariant: "no statistic path is a (path) prefix of another statistic path". - Check that the statistic is compatible **before** inserting it. - Set an operation alert if an incompatible statistic appears at some point. - Make sure that incompatible statistics do not crash anyone. - Add a test checking that all characters with code points less than `/` are banned in statistic paths to ensure correctness of the check. - Add a test checking AppendStatistic behavior. fc5782fd97cdd191a94f1efa2d9c3ef11ac114f5
-rw-r--r--yt/yt/core/misc/statistics-inl.h74
-rw-r--r--yt/yt/core/misc/statistics.cpp34
-rw-r--r--yt/yt/core/misc/statistics.h9
-rw-r--r--yt/yt/core/misc/unittests/statistics_ut.cpp66
4 files changed, 147 insertions, 36 deletions
diff --git a/yt/yt/core/misc/statistics-inl.h b/yt/yt/core/misc/statistics-inl.h
index 588d0a05c1..6a2e5ec511 100644
--- a/yt/yt/core/misc/statistics-inl.h
+++ b/yt/yt/core/misc/statistics-inl.h
@@ -26,11 +26,78 @@ void TStatistics::ReplacePathWithSample(const NYPath::TYPath& path, const T& sam
////////////////////////////////////////////////////////////////////////////////
+/*! Checks if the existing statistics in TSummaryMap are compatible with a statistic
+ * at |path|. Returns a pair of the conflict type (has a prefix in existing
+ * statistics, is a prefix of an existing statistic, or no conflict at all) and
+ * an iterator. If there is a conflict, iterator points to the conflicting statistic,
+ * otherwise it is a hint. Assumes that the |existingStatistics|
+ * are compatible.
+ */
+template <typename TSummaryMap>
+std::pair<EStatisticPathConflictType, typename TSummaryMap::iterator> IsCompatibleStatistic(
+ TSummaryMap& existingStatistics,
+ const NYPath::TYPath& path)
+{
+ auto hint = existingStatistics.lower_bound(path);
+ if (hint != existingStatistics.end()) {
+ if (hint->first == path) {
+ return {EStatisticPathConflictType::Exists, hint};
+ }
+ if (NYPath::HasPrefix(hint->first, path)) {
+ return {EStatisticPathConflictType::IsPrefix, hint};
+ }
+ }
+ if (hint != existingStatistics.begin()) {
+ auto prev = std::prev(hint);
+ if (NYPath::HasPrefix(path, prev->first)) {
+ return {EStatisticPathConflictType::HasPrefix, prev};
+ }
+ }
+ return {EStatisticPathConflictType::None, hint};
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! Tries to emplace statistic into TSummaryMap, and checks if it is valid and compatible.
+template <typename TSummaryMap, typename... Ts>
+std::pair<typename TSummaryMap::iterator, bool> CheckedEmplaceStatistic(
+ TSummaryMap& existingStatistics,
+ const NYPath::TYPath& path,
+ Ts&&... args)
+{
+ if (auto c = CheckStatisticPath(path)) {
+ THROW_ERROR_EXCEPTION("Invalid character in a statistic path")
+ << TErrorAttribute("path", path)
+ << TErrorAttribute("invalid_character", *c);
+ }
+ auto [conflictType, hint] = IsCompatibleStatistic(existingStatistics, path);
+ if (conflictType == EStatisticPathConflictType::Exists) {
+ return {hint, false};
+ }
+ if (conflictType != EStatisticPathConflictType::None) {
+ auto prefixPath = hint->first;
+ auto conflictPath = path;
+
+ if (conflictType == EStatisticPathConflictType::IsPrefix) {
+ std::swap(prefixPath, conflictPath);
+ }
+
+ THROW_ERROR_EXCEPTION("Statistic path cannot be a prefix of another statistic path")
+ << TErrorAttribute("prefix_path", prefixPath)
+ << TErrorAttribute("contained_in_path", conflictPath);
+ }
+ auto emplacedIterator = existingStatistics.emplace_hint(hint, path, std::forward<Ts>(args)...);
+ return {emplacedIterator, true};
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
template <class TTags>
void TTaggedStatistics<TTags>::AppendStatistics(const TStatistics& statistics, TTags tags)
{
for (const auto& [path, summary] : statistics.Data()) {
- auto& pathSummaries = Data_[path];
+ auto [emplacedIterator, _] = CheckedEmplaceStatistic(Data_, path, TTaggedSummaries{});
+ auto& pathSummaries = emplacedIterator->second;
auto it = pathSummaries.find(tags);
if (it == pathSummaries.end()) {
pathSummaries.emplace(tags, summary);
@@ -43,9 +110,8 @@ void TTaggedStatistics<TTags>::AppendStatistics(const TStatistics& statistics, T
template <class TTags>
void TTaggedStatistics<TTags>::AppendTaggedSummary(const NYPath::TYPath& path, const TTaggedStatistics<TTags>::TTaggedSummaries& taggedSummaries)
{
- auto taggedSummariesIt = Data_.find(path);
- if (taggedSummariesIt == Data_.end()) {
- Data_[path] = taggedSummaries;
+ auto [taggedSummariesIt, emplaceHappened] = CheckedEmplaceStatistic(Data_, path, taggedSummaries);
+ if (emplaceHappened) {
return;
}
diff --git a/yt/yt/core/misc/statistics.cpp b/yt/yt/core/misc/statistics.cpp
index 2116d01013..eabb4eda66 100644
--- a/yt/yt/core/misc/statistics.cpp
+++ b/yt/yt/core/misc/statistics.cpp
@@ -114,38 +114,8 @@ std::optional<char> CheckStatisticPath(const NYPath::TYPath& path)
TSummary& TStatistics::GetSummary(const NYPath::TYPath& path)
{
- if (auto c = CheckStatisticPath(path)) {
- THROW_ERROR_EXCEPTION("Invalid character %c in statistic path", *c)
- << TErrorAttribute("path", path)
- << TErrorAttribute("invalid_character", *c);
- }
- auto result = Data_.emplace(path, TSummary());
- auto it = result.first;
- if (result.second) {
- // This is a new statistic, check validity.
- if (it != Data_.begin()) {
- auto prev = std::prev(it);
- if (HasPrefix(it->first, prev->first)) {
- Data_.erase(it);
- THROW_ERROR_EXCEPTION(
- "Incompatible statistic paths: old %v, new %v",
- prev->first,
- path);
- }
- }
- auto next = std::next(it);
- if (next != Data_.end()) {
- if (HasPrefix(next->first, it->first)) {
- Data_.erase(it);
- THROW_ERROR_EXCEPTION(
- "Incompatible statistic paths: old %v, new %v",
- next->first,
- path);
- }
- }
- }
-
- return it->second;
+ auto [iterator, _] = CheckedEmplaceStatistic(Data_, path, TSummary());
+ return iterator->second;
}
void TStatistics::AddSample(const NYPath::TYPath& path, i64 sample)
diff --git a/yt/yt/core/misc/statistics.h b/yt/yt/core/misc/statistics.h
index 7e5a81bdf4..191cff0509 100644
--- a/yt/yt/core/misc/statistics.h
+++ b/yt/yt/core/misc/statistics.h
@@ -111,6 +111,15 @@ std::optional<TSummary> FindSummary(const TStatistics& statistics, const TString
////////////////////////////////////////////////////////////////////////////////
+DEFINE_ENUM(EStatisticPathConflictType,
+ (HasPrefix)
+ (IsPrefix)
+ (Exists)
+ (None)
+);
+
+////////////////////////////////////////////////////////////////////////////////
+
void Serialize(const TStatistics& statistics, NYson::IYsonConsumer* consumer);
void CreateBuildingYsonConsumer(std::unique_ptr<NYson::IBuildingYsonConsumer<TStatistics>>* buildingConsumer, NYson::EYsonType ysonType);
diff --git a/yt/yt/core/misc/unittests/statistics_ut.cpp b/yt/yt/core/misc/unittests/statistics_ut.cpp
index 737773e9ca..83345d8b12 100644
--- a/yt/yt/core/misc/unittests/statistics_ut.cpp
+++ b/yt/yt/core/misc/unittests/statistics_ut.cpp
@@ -117,6 +117,17 @@ TEST(TStatistics, AddSample)
EXPECT_EQ(42, GetNumericValue(deserializedStatistics, "/key/sub"));
}
+TEST(TStatistics, InvalidNames) {
+ // For the statistic merge to work correctly, symbols with code points
+ // less than '/' must be forbidden. See YT-22118.
+ TStatistics statistics;
+ for (char c = std::numeric_limits<char>::min(); c < '/'; ++c) {
+ EXPECT_THROW(
+ statistics.AddSample("/key/abc" + TString{c} + "def", 5),
+ std::exception);
+ }
+}
+
////////////////////////////////////////////////////////////////////////////////
class TStatisticsUpdater
@@ -200,5 +211,60 @@ TEST(TStatistics, BuildingConsumer)
////////////////////////////////////////////////////////////////////////////////
+TEST(TTaggedStatistics, AppendStatistics)
+{
+ TTaggedStatistics<int> taggedStatistics;
+ {
+ TStatistics statistics;
+ statistics.AddSample("/abc/def", 1);
+ statistics.AddSample("/abc/defg", 2);
+ statistics.AddSample("/xyz", 3);
+ taggedStatistics.AppendStatistics(statistics, 1);
+ }
+
+ {
+ TStatistics statistics;
+ statistics.AddSample("/abc/def", 1);
+ statistics.AddSample("/ijk", 2);
+ taggedStatistics.AppendStatistics(statistics, 2);
+ }
+
+ {
+ TStatistics statistics;
+ statistics.AddSample("/abc/def", 2);
+ taggedStatistics.AppendStatistics(statistics, 1);
+ }
+
+ {
+ auto actualData = taggedStatistics.GetData();
+
+ std::map<TString, THashMap<int, TSummary>> expectedData {
+ // std::nullopt because Last is always dropped during merge, see TSummary::Merge.
+ {"/abc/def", {
+ {1, TSummary(3, 2, 1, 2, std::nullopt)},
+ {2, TSummary(1, 1, 1, 1, 1)}}},
+ {"/abc/defg", {{1, TSummary(2, 1, 2, 2, 2)}}},
+ {"/xyz", {{1, TSummary(3, 1, 3, 3, 3)}}},
+ {"/ijk", {{2, TSummary(2, 1, 2, 2, 2)}}}
+ };
+
+ EXPECT_EQ(expectedData, actualData);
+ }
+
+ {
+ TStatistics statistics;
+ statistics.AddSample("/xyz/suffix", 1);
+ EXPECT_THROW(taggedStatistics.AppendStatistics(statistics, 3), std::exception);
+ }
+
+ {
+ TStatistics statistics;
+ statistics.AddSample("/abc", 1); // prefix
+ EXPECT_THROW(taggedStatistics.AppendStatistics(statistics, 3), std::exception);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
} // namespace NYT