aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-12-10 13:34:14 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-12-10 14:10:45 +0300
commitc255a91e6f0feaf9dc9a6a9b52d67ab18c43dae5 (patch)
tree4d940875884d3b3fee90bef38b898866ccc8aadd
parent1fd7d34d96a347197477d4eb34cbb2823c16066c (diff)
downloadydb-c255a91e6f0feaf9dc9a6a9b52d67ab18c43dae5.tar.gz
Intermediate changes
commit_hash:30e75a336c73b67430370de2655cb84a61d5cf4e
-rw-r--r--yt/yt/library/profiling/solomon/tag_registry-inl.h31
-rw-r--r--yt/yt/library/profiling/solomon/tag_registry.cpp139
-rw-r--r--yt/yt/library/profiling/solomon/tag_registry.h8
-rw-r--r--yt/yt/library/profiling/unittests/solomon_ut.cpp58
4 files changed, 20 insertions, 216 deletions
diff --git a/yt/yt/library/profiling/solomon/tag_registry-inl.h b/yt/yt/library/profiling/solomon/tag_registry-inl.h
deleted file mode 100644
index 226aa5a1c1..0000000000
--- a/yt/yt/library/profiling/solomon/tag_registry-inl.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef TAG_REGISTRY_INL_H
-#error "Direct inclusion of this file is not allowed, include tag_registry.h"
-// For the sake of sane code completion.
-#include "tag_registry.h"
-#endif
-
-namespace NYT::NProfiling {
-
-////////////////////////////////////////////////////////////////////////////////
-
-template <class TTagPerfect>
-TTagId TTagRegistry::EncodeSanitized(TTagPerfect&& tag)
-{
- static_assert(std::is_same_v<std::remove_cvref_t<TTagPerfect>, TTag>);
-
- THashMap<TTag, TTagId>::insert_ctx insertCtx;
- if (auto it = TagByName_.find(tag, insertCtx); it != TagByName_.end()) {
- return it->second;
- } else {
- TTagId tagId = TagById_.size() + 1;
-
- TagByName_.emplace_direct(insertCtx, tag, tagId);
- TagById_.push_back(std::forward<TTagPerfect>(tag));
-
- return tagId;
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NProfiling
diff --git a/yt/yt/library/profiling/solomon/tag_registry.cpp b/yt/yt/library/profiling/solomon/tag_registry.cpp
index a2a1326d10..30d7f0d654 100644
--- a/yt/yt/library/profiling/solomon/tag_registry.cpp
+++ b/yt/yt/library/profiling/solomon/tag_registry.cpp
@@ -10,138 +10,48 @@ namespace NYT::NProfiling {
////////////////////////////////////////////////////////////////////////////////
-namespace {
-
-static constexpr int MaxLabelSize = 200;
-static constexpr int HalfMaxLabelSize = MaxLabelSize / 2;
-
-struct TSanitizeParameters
-{
- int ForbiddenCharCount;
- int ResultingLength;
-
- bool IsSanitizationRequired() const
- {
- return ForbiddenCharCount > 0 || ResultingLength > MaxLabelSize;
- }
-};
-
-bool IsAllowedMonitoringTagValueChar(unsigned char c)
-{
- return 31 < c &&
- c < 127 &&
- c != '|' &&
- c != '*' &&
- c != '?' &&
- c != '"' &&
- c != '\'' &&
- c != '\\' &&
- c != '`';
-}
-
-TSanitizeParameters ScanForSanitize(const std::string& value)
-{
- int forbiddenCharCount = 0;
- for (unsigned char c : value) {
- forbiddenCharCount += static_cast<int>(!IsAllowedMonitoringTagValueChar(c));
- }
-
- return {
- .ForbiddenCharCount = forbiddenCharCount,
- .ResultingLength = static_cast<int>(value.size() + forbiddenCharCount * 2),
- };
-}
-
-std::string SanitizeMonitoringTagValue(const std::string& value, int resultingLength)
+TTagIdList TTagRegistry::Encode(const TTagList& tags)
{
- bool needTrim = resultingLength > MaxLabelSize;
-
- std::string result;
- result.resize(std::min(resultingLength, MaxLabelSize));
-
- int resultIndex = 0;
- for (int index = 0; resultIndex < (needTrim ? HalfMaxLabelSize : resultingLength); ++index) {
- unsigned char c = value[index];
-
- if (IsAllowedMonitoringTagValueChar(value[index])) {
- result[resultIndex++] = c;
- } else {
- result[resultIndex++] = '%';
- result[resultIndex++] = IntToHexLowercase[c >> 4];
- result[resultIndex++] = IntToHexLowercase[c & 0x0f];
- }
- }
-
- if (!needTrim) {
- return result;
- }
-
- resultIndex = MaxLabelSize - 1;
- for (int index = ssize(value) - 1; resultIndex > HalfMaxLabelSize + 2; --index) {
- unsigned char c = value[index];
+ TTagIdList ids;
- if (IsAllowedMonitoringTagValueChar(value[index])) {
- result[resultIndex--] = c;
+ for (const auto& tag : tags) {
+ if (auto it = TagByName_.find(tag); it != TagByName_.end()) {
+ ids.push_back(it->second);
} else {
- result[resultIndex--] = IntToHexLowercase[c & 0x0f];
- result[resultIndex--] = IntToHexLowercase[c >> 4];
- result[resultIndex--] = '%';
+ TagById_.push_back(tag);
+ TagByName_[tag] = TagById_.size();
+ ids.push_back(TagById_.size());
}
}
- result[HalfMaxLabelSize] = '.';
- result[HalfMaxLabelSize + 1] = '.';
- result[HalfMaxLabelSize + 2] = '.';
-
- return result;
+ return ids;
}
-TTag SanitizeMonitoringTag(const TTag& tag, int resultingLength)
+TTagId TTagRegistry::Encode(const TTag& tag)
{
- return {tag.first, SanitizeMonitoringTagValue(tag.second, resultingLength)};
+ if (auto it = TagByName_.find(tag); it != TagByName_.end()) {
+ return it->second;
+ } else {
+ TagById_.push_back(tag);
+ TagByName_[tag] = TagById_.size();
+ return TagById_.size();
+ }
}
-} // namespace
-
-////////////////////////////////////////////////////////////////////////////////
-
TTagIdList TTagRegistry::Encode(const TTagSet& tags)
{
return Encode(tags.Tags());
}
-TTagIdList TTagRegistry::Encode(const TTagList& tags)
-{
- TTagIdList ids;
- for (const auto& tag : tags) {
- ids.push_back(Encode(tag));
- }
-
- return ids;
-}
-
-TTagId TTagRegistry::Encode(const TTag& tag)
-{
- if (auto sanitizeParameters = ScanForSanitize(tag.second);
- sanitizeParameters.IsSanitizationRequired())
- {
- return EncodeSanitized(SanitizeMonitoringTag(tag, sanitizeParameters.ResultingLength));
- } else {
- return EncodeSanitized(tag);
- }
-}
-
TCompactVector<std::optional<TTagId>, TypicalTagCount> TTagRegistry::TryEncode(const TTagList& tags) const
{
TCompactVector<std::optional<TTagId>, TypicalTagCount> ids;
for (const auto& tag : tags) {
- if (auto sanitizeParameters = ScanForSanitize(tag.second);
- sanitizeParameters.IsSanitizationRequired())
- {
- ids.push_back(TryEncodeSanitized(SanitizeMonitoringTag(tag, sanitizeParameters.ResultingLength)));
+ if (auto it = TagByName_.find(tag); it != TagByName_.end()) {
+ ids.push_back(it->second);
} else {
- ids.push_back(TryEncodeSanitized(tag));
+ ids.push_back({});
}
}
@@ -183,15 +93,6 @@ void TTagRegistry::DumpTags(NProto::TSensorDump* dump)
}
}
-std::optional<TTagId> TTagRegistry::TryEncodeSanitized(const TTag& tag) const
-{
- if (auto it = TagByName_.find(tag); it != TagByName_.end()) {
- return it->second;
- } else {
- return std::nullopt;
- }
-}
-
////////////////////////////////////////////////////////////////////////////////
void TTagWriter::WriteLabel(TTagId tag)
diff --git a/yt/yt/library/profiling/solomon/tag_registry.h b/yt/yt/library/profiling/solomon/tag_registry.h
index 884e7aea02..8e76ae1175 100644
--- a/yt/yt/library/profiling/solomon/tag_registry.h
+++ b/yt/yt/library/profiling/solomon/tag_registry.h
@@ -30,10 +30,6 @@ public:
void DumpTags(NProto::TSensorDump* dump);
private:
- template <class TTagPerfect>
- TTagId EncodeSanitized(TTagPerfect&& tag);
- std::optional<TTagId> TryEncodeSanitized(const TTag& tag) const;
-
// TODO(prime@): maybe do something about the fact that tags are never freed.
THashMap<TTag, TTagId> TagByName_;
std::deque<TTag> TagById_;
@@ -64,7 +60,3 @@ private:
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NProfiling
-
-#define TAG_REGISTRY_INL_H
-#include "tag_registry-inl.h"
-#undef TAG_REGISTRY_INL_H
diff --git a/yt/yt/library/profiling/unittests/solomon_ut.cpp b/yt/yt/library/profiling/unittests/solomon_ut.cpp
index 302e0fc40f..c26d478234 100644
--- a/yt/yt/library/profiling/unittests/solomon_ut.cpp
+++ b/yt/yt/library/profiling/unittests/solomon_ut.cpp
@@ -962,64 +962,6 @@ TEST_P(TOmitNameLabelSuffixTest, GaugeSummary)
ASSERT_NEAR(gauges[Format("yt.davg%v{}", omitNameLabelSuffix ? "" : ".avg")], 40 + 1 / 3.0, 1e-6);
}
-TEST(TSolomonRegistry, IncorrectSolomonLabels)
-{
- auto impl = New<TSolomonRegistry>();
- impl->SetWindowSize(12);
-
- TString longTag;
- longTag.reserve(210);
- for (int index = 0; index < 210; ++index) {
- longTag.append('a' + index % 26);
- }
- TString longTagEncoded;
- longTagEncoded.reserve(200);
- longTagEncoded.append(longTag.begin(), 100).append("...");
- for (int index = 103; index < 200; ++index) {
- longTagEncoded.append('a' + (index - 103 + 9) % 26);
- }
-
- TString incorrectSymbolsTag = "aaa|*?\"'\\`bbb";
- incorrectSymbolsTag.back() = 0xff;
- TString incorrectSymbolsTagEncoded = "aaa%7c%2a%3f%22%27%5c%60bb%ff";
-
- TString longWithIncorrectSymbolsTag(200, 'a');
- longWithIncorrectSymbolsTag[98] = 0xff;
- longWithIncorrectSymbolsTag[199] = 0x00;
- TString longWithIncorrectSymbolsTagEncoded;
- longWithIncorrectSymbolsTagEncoded.append(TString(98, 'a'))
- .append("%f...")
- .append(TString(94, 'a'))
- .append("%00");
-
- auto profiler = TProfiler(impl, "/debug")
- .WithTag("tag0", longTag)
- .WithTag("tag1", incorrectSymbolsTag)
- .WithTag("tag2", longWithIncorrectSymbolsTag);
- auto c0 = profiler.Counter("/c");
- c0.Increment(1);
-
- auto result = CollectSensors(impl);
-
- for (const auto& label : result.Labels) {
- auto equal = [&label] (TStringBuf tag) {
- return std::equal(label.begin(), label.begin() + 4, tag.begin());
- };
-
- auto labelValue = label.substr(5, label.size() - 5);
-
- if (equal("tag0")) {
- ASSERT_EQ(labelValue, longTagEncoded);
- } else if (equal("tag1")) {
- ASSERT_EQ(labelValue, incorrectSymbolsTagEncoded);
- } else if (equal("tag2")) {
- ASSERT_EQ(labelValue, longWithIncorrectSymbolsTagEncoded);
- } else {
- ASSERT_TRUE(false);
- }
- }
-}
-
////////////////////////////////////////////////////////////////////////////////
} // namespace