diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-12-05 15:03:50 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-12-05 16:02:11 +0300 |
commit | 8ac1844d7108f0245a09c557c17ebf8de6fe3fe1 (patch) | |
tree | ff661687625885f98e9aa47d0cf6c7b174a98819 /yt | |
parent | e96f434399710cd67c5e25d1888930caedc1b655 (diff) | |
download | ydb-8ac1844d7108f0245a09c557c17ebf8de6fe3fe1.tar.gz |
Intermediate changes
commit_hash:a7241fa39ef025c93cfb2c8995772ba44b42c197
Diffstat (limited to 'yt')
-rw-r--r-- | yt/yt/library/profiling/solomon/tag_registry-inl.h | 31 | ||||
-rw-r--r-- | yt/yt/library/profiling/solomon/tag_registry.cpp | 139 | ||||
-rw-r--r-- | yt/yt/library/profiling/solomon/tag_registry.h | 8 | ||||
-rw-r--r-- | yt/yt/library/profiling/unittests/solomon_ut.cpp | 58 |
4 files changed, 216 insertions, 20 deletions
diff --git a/yt/yt/library/profiling/solomon/tag_registry-inl.h b/yt/yt/library/profiling/solomon/tag_registry-inl.h new file mode 100644 index 0000000000..226aa5a1c1 --- /dev/null +++ b/yt/yt/library/profiling/solomon/tag_registry-inl.h @@ -0,0 +1,31 @@ +#ifndef TAG_REGISTRY_INL_H +#error "Direct inclusion of this file is not allowed, include tag_registry.h" +// For the sake of sane code completion. +#include "tag_registry.h" +#endif + +namespace NYT::NProfiling { + +//////////////////////////////////////////////////////////////////////////////// + +template <class TTagPerfect> +TTagId TTagRegistry::EncodeSanitized(TTagPerfect&& tag) +{ + static_assert(std::is_same_v<std::remove_cvref_t<TTagPerfect>, TTag>); + + THashMap<TTag, TTagId>::insert_ctx insertCtx; + if (auto it = TagByName_.find(tag, insertCtx); it != TagByName_.end()) { + return it->second; + } else { + TTagId tagId = TagById_.size() + 1; + + TagByName_.emplace_direct(insertCtx, tag, tagId); + TagById_.push_back(std::forward<TTagPerfect>(tag)); + + return tagId; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NProfiling diff --git a/yt/yt/library/profiling/solomon/tag_registry.cpp b/yt/yt/library/profiling/solomon/tag_registry.cpp index 30d7f0d654..0bace2e5f1 100644 --- a/yt/yt/library/profiling/solomon/tag_registry.cpp +++ b/yt/yt/library/profiling/solomon/tag_registry.cpp @@ -10,48 +10,138 @@ namespace NYT::NProfiling { //////////////////////////////////////////////////////////////////////////////// -TTagIdList TTagRegistry::Encode(const TTagList& tags) +namespace { + +static constexpr int MaxLabelSize = 200; +static constexpr int HalfMaxLabelSize = MaxLabelSize / 2; + +struct TSanitizeParameters { - TTagIdList ids; + int ForbiddenCharCount; + int ResultingLength; - for (const auto& tag : tags) { - if (auto it = TagByName_.find(tag); it != TagByName_.end()) { - ids.push_back(it->second); + bool IsSanitizationRequired() const + { + return ForbiddenCharCount > 0 || ResultingLength > MaxLabelSize; + } +}; + +bool IsAllowedMonitoringTagValueChar(unsigned char c) +{ + return 31 < c && + c < 127 && + c != '|' && + c != '*' && + c != '?' && + c != '"' && + c != '\'' && + c != '\\' && + c != '`'; +} + +TSanitizeParameters ScanForSanitize(const std::string& value) +{ + int forbiddenCharCount = 0; + for (unsigned char c : value) { + forbiddenCharCount += static_cast<int>(!IsAllowedMonitoringTagValueChar(c)); + } + + return { + .ForbiddenCharCount = forbiddenCharCount, + .ResultingLength = static_cast<int>(value.size() + forbiddenCharCount * 2), + }; +} + +std::string SanitizeMonitoringTagValue(const std::string& value, int resultingLength) +{ + bool needTrim = resultingLength > MaxLabelSize; + + std::string result; + result.resize(std::min(resultingLength, MaxLabelSize)); + + int resultIndex = 0; + for (int index = 0; resultIndex < (needTrim ? HalfMaxLabelSize : resultingLength); ++index) { + unsigned char c = value[index]; + + if (IsAllowedMonitoringTagValueChar(value[index])) { + result[resultIndex++] = c; } else { - TagById_.push_back(tag); - TagByName_[tag] = TagById_.size(); - ids.push_back(TagById_.size()); + result[resultIndex++] = '%'; + result[resultIndex++] = IntToHexLowercase[c >> 4]; + result[resultIndex++] = IntToHexLowercase[c & 0x0f]; } } - return ids; + if (!needTrim) { + return result; + } + + resultIndex = MaxLabelSize - 1; + for (int index = ssize(value) - 1; resultIndex > HalfMaxLabelSize + 2; --index) { + unsigned char c = value[index]; + + if (IsAllowedMonitoringTagValueChar(value[index])) { + result[resultIndex--] = c; + } else { + result[resultIndex--] = IntToHexLowercase[c & 0x0f]; + result[resultIndex--] = IntToHexLowercase[c >> 4]; + result[resultIndex--] = '%'; + } + } + + result[HalfMaxLabelSize] = '.'; + result[HalfMaxLabelSize + 1] = '.'; + result[HalfMaxLabelSize + 2] = '.'; + + return result; } -TTagId TTagRegistry::Encode(const TTag& tag) +TTag SanitizeMonitoringTag(const TTag& tag, int resultingLength) { - if (auto it = TagByName_.find(tag); it != TagByName_.end()) { - return it->second; - } else { - TagById_.push_back(tag); - TagByName_[tag] = TagById_.size(); - return TagById_.size(); - } + return {tag.first, SanitizeMonitoringTagValue(tag.second, resultingLength)}; } +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + TTagIdList TTagRegistry::Encode(const TTagSet& tags) { return Encode(tags.Tags()); } +TTagIdList TTagRegistry::Encode(const TTagList& tags) +{ + TTagIdList ids; + for (const auto& tag : tags) { + ids.push_back(Encode(tag)); + } + + return ids; +} + +TTagId TTagRegistry::Encode(const TTag& tag) +{ + if (auto sanitizeParameters = ScanForSanitize(tag.second); + sanitizeParameters.IsSanitizationRequired()) + { + return EncodeSanitized(SanitizeMonitoringTag(tag, sanitizeParameters.ResultingLength)); + } else { + return EncodeSanitized(tag); + } +} + TCompactVector<std::optional<TTagId>, TypicalTagCount> TTagRegistry::TryEncode(const TTagList& tags) const { TCompactVector<std::optional<TTagId>, TypicalTagCount> ids; for (const auto& tag : tags) { - if (auto it = TagByName_.find(tag); it != TagByName_.end()) { - ids.push_back(it->second); + if (auto sanitizeParameters = ScanForSanitize(tag.second); + sanitizeParameters.IsSanitizationRequired()) + { + ids.push_back(TryEncodeSanitized(SanitizeMonitoringTag(tag, sanitizeParameters.ResultingLength))); } else { - ids.push_back({}); + ids.push_back(TryEncodeSanitized(tag)); } } @@ -93,6 +183,15 @@ void TTagRegistry::DumpTags(NProto::TSensorDump* dump) } } +TTagId TTagRegistry::TryEncodeSanitized(const TTag& tag) const +{ + if (auto it = TagByName_.find(tag); it != TagByName_.end()) { + return it->second; + } else { + return {}; + } +} + //////////////////////////////////////////////////////////////////////////////// void TTagWriter::WriteLabel(TTagId tag) diff --git a/yt/yt/library/profiling/solomon/tag_registry.h b/yt/yt/library/profiling/solomon/tag_registry.h index 8e76ae1175..d12459602c 100644 --- a/yt/yt/library/profiling/solomon/tag_registry.h +++ b/yt/yt/library/profiling/solomon/tag_registry.h @@ -30,6 +30,10 @@ public: void DumpTags(NProto::TSensorDump* dump); private: + template <class TTagPerfect> + TTagId EncodeSanitized(TTagPerfect&& tag); + TTagId TryEncodeSanitized(const TTag& tag) const; + // TODO(prime@): maybe do something about the fact that tags are never freed. THashMap<TTag, TTagId> TagByName_; std::deque<TTag> TagById_; @@ -60,3 +64,7 @@ private: //////////////////////////////////////////////////////////////////////////////// } // namespace NYT::NProfiling + +#define TAG_REGISTRY_INL_H +#include "tag_registry-inl.h" +#undef TAG_REGISTRY_INL_H diff --git a/yt/yt/library/profiling/unittests/solomon_ut.cpp b/yt/yt/library/profiling/unittests/solomon_ut.cpp index c26d478234..302e0fc40f 100644 --- a/yt/yt/library/profiling/unittests/solomon_ut.cpp +++ b/yt/yt/library/profiling/unittests/solomon_ut.cpp @@ -962,6 +962,64 @@ TEST_P(TOmitNameLabelSuffixTest, GaugeSummary) ASSERT_NEAR(gauges[Format("yt.davg%v{}", omitNameLabelSuffix ? "" : ".avg")], 40 + 1 / 3.0, 1e-6); } +TEST(TSolomonRegistry, IncorrectSolomonLabels) +{ + auto impl = New<TSolomonRegistry>(); + impl->SetWindowSize(12); + + TString longTag; + longTag.reserve(210); + for (int index = 0; index < 210; ++index) { + longTag.append('a' + index % 26); + } + TString longTagEncoded; + longTagEncoded.reserve(200); + longTagEncoded.append(longTag.begin(), 100).append("..."); + for (int index = 103; index < 200; ++index) { + longTagEncoded.append('a' + (index - 103 + 9) % 26); + } + + TString incorrectSymbolsTag = "aaa|*?\"'\\`bbb"; + incorrectSymbolsTag.back() = 0xff; + TString incorrectSymbolsTagEncoded = "aaa%7c%2a%3f%22%27%5c%60bb%ff"; + + TString longWithIncorrectSymbolsTag(200, 'a'); + longWithIncorrectSymbolsTag[98] = 0xff; + longWithIncorrectSymbolsTag[199] = 0x00; + TString longWithIncorrectSymbolsTagEncoded; + longWithIncorrectSymbolsTagEncoded.append(TString(98, 'a')) + .append("%f...") + .append(TString(94, 'a')) + .append("%00"); + + auto profiler = TProfiler(impl, "/debug") + .WithTag("tag0", longTag) + .WithTag("tag1", incorrectSymbolsTag) + .WithTag("tag2", longWithIncorrectSymbolsTag); + auto c0 = profiler.Counter("/c"); + c0.Increment(1); + + auto result = CollectSensors(impl); + + for (const auto& label : result.Labels) { + auto equal = [&label] (TStringBuf tag) { + return std::equal(label.begin(), label.begin() + 4, tag.begin()); + }; + + auto labelValue = label.substr(5, label.size() - 5); + + if (equal("tag0")) { + ASSERT_EQ(labelValue, longTagEncoded); + } else if (equal("tag1")) { + ASSERT_EQ(labelValue, incorrectSymbolsTagEncoded); + } else if (equal("tag2")) { + ASSERT_EQ(labelValue, longWithIncorrectSymbolsTagEncoded); + } else { + ASSERT_TRUE(false); + } + } +} + //////////////////////////////////////////////////////////////////////////////// } // namespace |