diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-12-10 13:34:14 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-12-10 14:10:45 +0300 |
commit | c255a91e6f0feaf9dc9a6a9b52d67ab18c43dae5 (patch) | |
tree | 4d940875884d3b3fee90bef38b898866ccc8aadd | |
parent | 1fd7d34d96a347197477d4eb34cbb2823c16066c (diff) | |
download | ydb-c255a91e6f0feaf9dc9a6a9b52d67ab18c43dae5.tar.gz |
Intermediate changes
commit_hash:30e75a336c73b67430370de2655cb84a61d5cf4e
-rw-r--r-- | yt/yt/library/profiling/solomon/tag_registry-inl.h | 31 | ||||
-rw-r--r-- | yt/yt/library/profiling/solomon/tag_registry.cpp | 139 | ||||
-rw-r--r-- | yt/yt/library/profiling/solomon/tag_registry.h | 8 | ||||
-rw-r--r-- | yt/yt/library/profiling/unittests/solomon_ut.cpp | 58 |
4 files changed, 20 insertions, 216 deletions
diff --git a/yt/yt/library/profiling/solomon/tag_registry-inl.h b/yt/yt/library/profiling/solomon/tag_registry-inl.h deleted file mode 100644 index 226aa5a1c1..0000000000 --- a/yt/yt/library/profiling/solomon/tag_registry-inl.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef TAG_REGISTRY_INL_H -#error "Direct inclusion of this file is not allowed, include tag_registry.h" -// For the sake of sane code completion. -#include "tag_registry.h" -#endif - -namespace NYT::NProfiling { - -//////////////////////////////////////////////////////////////////////////////// - -template <class TTagPerfect> -TTagId TTagRegistry::EncodeSanitized(TTagPerfect&& tag) -{ - static_assert(std::is_same_v<std::remove_cvref_t<TTagPerfect>, TTag>); - - THashMap<TTag, TTagId>::insert_ctx insertCtx; - if (auto it = TagByName_.find(tag, insertCtx); it != TagByName_.end()) { - return it->second; - } else { - TTagId tagId = TagById_.size() + 1; - - TagByName_.emplace_direct(insertCtx, tag, tagId); - TagById_.push_back(std::forward<TTagPerfect>(tag)); - - return tagId; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NProfiling diff --git a/yt/yt/library/profiling/solomon/tag_registry.cpp b/yt/yt/library/profiling/solomon/tag_registry.cpp index a2a1326d10..30d7f0d654 100644 --- a/yt/yt/library/profiling/solomon/tag_registry.cpp +++ b/yt/yt/library/profiling/solomon/tag_registry.cpp @@ -10,138 +10,48 @@ namespace NYT::NProfiling { //////////////////////////////////////////////////////////////////////////////// -namespace { - -static constexpr int MaxLabelSize = 200; -static constexpr int HalfMaxLabelSize = MaxLabelSize / 2; - -struct TSanitizeParameters -{ - int ForbiddenCharCount; - int ResultingLength; - - bool IsSanitizationRequired() const - { - return ForbiddenCharCount > 0 || ResultingLength > MaxLabelSize; - } -}; - -bool IsAllowedMonitoringTagValueChar(unsigned char c) -{ - return 31 < c && - c < 127 && - c != '|' && - c != '*' && - c != '?' && - c != '"' && - c != '\'' && - c != '\\' && - c != '`'; -} - -TSanitizeParameters ScanForSanitize(const std::string& value) -{ - int forbiddenCharCount = 0; - for (unsigned char c : value) { - forbiddenCharCount += static_cast<int>(!IsAllowedMonitoringTagValueChar(c)); - } - - return { - .ForbiddenCharCount = forbiddenCharCount, - .ResultingLength = static_cast<int>(value.size() + forbiddenCharCount * 2), - }; -} - -std::string SanitizeMonitoringTagValue(const std::string& value, int resultingLength) +TTagIdList TTagRegistry::Encode(const TTagList& tags) { - bool needTrim = resultingLength > MaxLabelSize; - - std::string result; - result.resize(std::min(resultingLength, MaxLabelSize)); - - int resultIndex = 0; - for (int index = 0; resultIndex < (needTrim ? HalfMaxLabelSize : resultingLength); ++index) { - unsigned char c = value[index]; - - if (IsAllowedMonitoringTagValueChar(value[index])) { - result[resultIndex++] = c; - } else { - result[resultIndex++] = '%'; - result[resultIndex++] = IntToHexLowercase[c >> 4]; - result[resultIndex++] = IntToHexLowercase[c & 0x0f]; - } - } - - if (!needTrim) { - return result; - } - - resultIndex = MaxLabelSize - 1; - for (int index = ssize(value) - 1; resultIndex > HalfMaxLabelSize + 2; --index) { - unsigned char c = value[index]; + TTagIdList ids; - if (IsAllowedMonitoringTagValueChar(value[index])) { - result[resultIndex--] = c; + for (const auto& tag : tags) { + if (auto it = TagByName_.find(tag); it != TagByName_.end()) { + ids.push_back(it->second); } else { - result[resultIndex--] = IntToHexLowercase[c & 0x0f]; - result[resultIndex--] = IntToHexLowercase[c >> 4]; - result[resultIndex--] = '%'; + TagById_.push_back(tag); + TagByName_[tag] = TagById_.size(); + ids.push_back(TagById_.size()); } } - result[HalfMaxLabelSize] = '.'; - result[HalfMaxLabelSize + 1] = '.'; - result[HalfMaxLabelSize + 2] = '.'; - - return result; + return ids; } -TTag SanitizeMonitoringTag(const TTag& tag, int resultingLength) +TTagId TTagRegistry::Encode(const TTag& tag) { - return {tag.first, SanitizeMonitoringTagValue(tag.second, resultingLength)}; + if (auto it = TagByName_.find(tag); it != TagByName_.end()) { + return it->second; + } else { + TagById_.push_back(tag); + TagByName_[tag] = TagById_.size(); + return TagById_.size(); + } } -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - TTagIdList TTagRegistry::Encode(const TTagSet& tags) { return Encode(tags.Tags()); } -TTagIdList TTagRegistry::Encode(const TTagList& tags) -{ - TTagIdList ids; - for (const auto& tag : tags) { - ids.push_back(Encode(tag)); - } - - return ids; -} - -TTagId TTagRegistry::Encode(const TTag& tag) -{ - if (auto sanitizeParameters = ScanForSanitize(tag.second); - sanitizeParameters.IsSanitizationRequired()) - { - return EncodeSanitized(SanitizeMonitoringTag(tag, sanitizeParameters.ResultingLength)); - } else { - return EncodeSanitized(tag); - } -} - TCompactVector<std::optional<TTagId>, TypicalTagCount> TTagRegistry::TryEncode(const TTagList& tags) const { TCompactVector<std::optional<TTagId>, TypicalTagCount> ids; for (const auto& tag : tags) { - if (auto sanitizeParameters = ScanForSanitize(tag.second); - sanitizeParameters.IsSanitizationRequired()) - { - ids.push_back(TryEncodeSanitized(SanitizeMonitoringTag(tag, sanitizeParameters.ResultingLength))); + if (auto it = TagByName_.find(tag); it != TagByName_.end()) { + ids.push_back(it->second); } else { - ids.push_back(TryEncodeSanitized(tag)); + ids.push_back({}); } } @@ -183,15 +93,6 @@ void TTagRegistry::DumpTags(NProto::TSensorDump* dump) } } -std::optional<TTagId> TTagRegistry::TryEncodeSanitized(const TTag& tag) const -{ - if (auto it = TagByName_.find(tag); it != TagByName_.end()) { - return it->second; - } else { - return std::nullopt; - } -} - //////////////////////////////////////////////////////////////////////////////// void TTagWriter::WriteLabel(TTagId tag) diff --git a/yt/yt/library/profiling/solomon/tag_registry.h b/yt/yt/library/profiling/solomon/tag_registry.h index 884e7aea02..8e76ae1175 100644 --- a/yt/yt/library/profiling/solomon/tag_registry.h +++ b/yt/yt/library/profiling/solomon/tag_registry.h @@ -30,10 +30,6 @@ public: void DumpTags(NProto::TSensorDump* dump); private: - template <class TTagPerfect> - TTagId EncodeSanitized(TTagPerfect&& tag); - std::optional<TTagId> TryEncodeSanitized(const TTag& tag) const; - // TODO(prime@): maybe do something about the fact that tags are never freed. THashMap<TTag, TTagId> TagByName_; std::deque<TTag> TagById_; @@ -64,7 +60,3 @@ private: //////////////////////////////////////////////////////////////////////////////// } // namespace NYT::NProfiling - -#define TAG_REGISTRY_INL_H -#include "tag_registry-inl.h" -#undef TAG_REGISTRY_INL_H diff --git a/yt/yt/library/profiling/unittests/solomon_ut.cpp b/yt/yt/library/profiling/unittests/solomon_ut.cpp index 302e0fc40f..c26d478234 100644 --- a/yt/yt/library/profiling/unittests/solomon_ut.cpp +++ b/yt/yt/library/profiling/unittests/solomon_ut.cpp @@ -962,64 +962,6 @@ TEST_P(TOmitNameLabelSuffixTest, GaugeSummary) ASSERT_NEAR(gauges[Format("yt.davg%v{}", omitNameLabelSuffix ? "" : ".avg")], 40 + 1 / 3.0, 1e-6); } -TEST(TSolomonRegistry, IncorrectSolomonLabels) -{ - auto impl = New<TSolomonRegistry>(); - impl->SetWindowSize(12); - - TString longTag; - longTag.reserve(210); - for (int index = 0; index < 210; ++index) { - longTag.append('a' + index % 26); - } - TString longTagEncoded; - longTagEncoded.reserve(200); - longTagEncoded.append(longTag.begin(), 100).append("..."); - for (int index = 103; index < 200; ++index) { - longTagEncoded.append('a' + (index - 103 + 9) % 26); - } - - TString incorrectSymbolsTag = "aaa|*?\"'\\`bbb"; - incorrectSymbolsTag.back() = 0xff; - TString incorrectSymbolsTagEncoded = "aaa%7c%2a%3f%22%27%5c%60bb%ff"; - - TString longWithIncorrectSymbolsTag(200, 'a'); - longWithIncorrectSymbolsTag[98] = 0xff; - longWithIncorrectSymbolsTag[199] = 0x00; - TString longWithIncorrectSymbolsTagEncoded; - longWithIncorrectSymbolsTagEncoded.append(TString(98, 'a')) - .append("%f...") - .append(TString(94, 'a')) - .append("%00"); - - auto profiler = TProfiler(impl, "/debug") - .WithTag("tag0", longTag) - .WithTag("tag1", incorrectSymbolsTag) - .WithTag("tag2", longWithIncorrectSymbolsTag); - auto c0 = profiler.Counter("/c"); - c0.Increment(1); - - auto result = CollectSensors(impl); - - for (const auto& label : result.Labels) { - auto equal = [&label] (TStringBuf tag) { - return std::equal(label.begin(), label.begin() + 4, tag.begin()); - }; - - auto labelValue = label.substr(5, label.size() - 5); - - if (equal("tag0")) { - ASSERT_EQ(labelValue, longTagEncoded); - } else if (equal("tag1")) { - ASSERT_EQ(labelValue, incorrectSymbolsTagEncoded); - } else if (equal("tag2")) { - ASSERT_EQ(labelValue, longWithIncorrectSymbolsTagEncoded); - } else { - ASSERT_TRUE(false); - } - } -} - //////////////////////////////////////////////////////////////////////////////// } // namespace |