aboutsummaryrefslogtreecommitdiffstats
path: root/yt
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-12-05 15:03:50 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-12-05 16:02:11 +0300
commit8ac1844d7108f0245a09c557c17ebf8de6fe3fe1 (patch)
treeff661687625885f98e9aa47d0cf6c7b174a98819 /yt
parente96f434399710cd67c5e25d1888930caedc1b655 (diff)
downloadydb-8ac1844d7108f0245a09c557c17ebf8de6fe3fe1.tar.gz
Intermediate changes
commit_hash:a7241fa39ef025c93cfb2c8995772ba44b42c197
Diffstat (limited to 'yt')
-rw-r--r--yt/yt/library/profiling/solomon/tag_registry-inl.h31
-rw-r--r--yt/yt/library/profiling/solomon/tag_registry.cpp139
-rw-r--r--yt/yt/library/profiling/solomon/tag_registry.h8
-rw-r--r--yt/yt/library/profiling/unittests/solomon_ut.cpp58
4 files changed, 216 insertions, 20 deletions
diff --git a/yt/yt/library/profiling/solomon/tag_registry-inl.h b/yt/yt/library/profiling/solomon/tag_registry-inl.h
new file mode 100644
index 0000000000..226aa5a1c1
--- /dev/null
+++ b/yt/yt/library/profiling/solomon/tag_registry-inl.h
@@ -0,0 +1,31 @@
+#ifndef TAG_REGISTRY_INL_H
+#error "Direct inclusion of this file is not allowed, include tag_registry.h"
+// For the sake of sane code completion.
+#include "tag_registry.h"
+#endif
+
+namespace NYT::NProfiling {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TTagPerfect>
+TTagId TTagRegistry::EncodeSanitized(TTagPerfect&& tag)
+{
+ static_assert(std::is_same_v<std::remove_cvref_t<TTagPerfect>, TTag>);
+
+ THashMap<TTag, TTagId>::insert_ctx insertCtx;
+ if (auto it = TagByName_.find(tag, insertCtx); it != TagByName_.end()) {
+ return it->second;
+ } else {
+ TTagId tagId = TagById_.size() + 1;
+
+ TagByName_.emplace_direct(insertCtx, tag, tagId);
+ TagById_.push_back(std::forward<TTagPerfect>(tag));
+
+ return tagId;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NProfiling
diff --git a/yt/yt/library/profiling/solomon/tag_registry.cpp b/yt/yt/library/profiling/solomon/tag_registry.cpp
index 30d7f0d654..0bace2e5f1 100644
--- a/yt/yt/library/profiling/solomon/tag_registry.cpp
+++ b/yt/yt/library/profiling/solomon/tag_registry.cpp
@@ -10,48 +10,138 @@ namespace NYT::NProfiling {
////////////////////////////////////////////////////////////////////////////////
-TTagIdList TTagRegistry::Encode(const TTagList& tags)
+namespace {
+
+static constexpr int MaxLabelSize = 200;
+static constexpr int HalfMaxLabelSize = MaxLabelSize / 2;
+
+struct TSanitizeParameters
{
- TTagIdList ids;
+ int ForbiddenCharCount;
+ int ResultingLength;
- for (const auto& tag : tags) {
- if (auto it = TagByName_.find(tag); it != TagByName_.end()) {
- ids.push_back(it->second);
+ bool IsSanitizationRequired() const
+ {
+ return ForbiddenCharCount > 0 || ResultingLength > MaxLabelSize;
+ }
+};
+
+bool IsAllowedMonitoringTagValueChar(unsigned char c)
+{
+ return 31 < c &&
+ c < 127 &&
+ c != '|' &&
+ c != '*' &&
+ c != '?' &&
+ c != '"' &&
+ c != '\'' &&
+ c != '\\' &&
+ c != '`';
+}
+
+TSanitizeParameters ScanForSanitize(const std::string& value)
+{
+ int forbiddenCharCount = 0;
+ for (unsigned char c : value) {
+ forbiddenCharCount += static_cast<int>(!IsAllowedMonitoringTagValueChar(c));
+ }
+
+ return {
+ .ForbiddenCharCount = forbiddenCharCount,
+ .ResultingLength = static_cast<int>(value.size() + forbiddenCharCount * 2),
+ };
+}
+
+std::string SanitizeMonitoringTagValue(const std::string& value, int resultingLength)
+{
+ bool needTrim = resultingLength > MaxLabelSize;
+
+ std::string result;
+ result.resize(std::min(resultingLength, MaxLabelSize));
+
+ int resultIndex = 0;
+ for (int index = 0; resultIndex < (needTrim ? HalfMaxLabelSize : resultingLength); ++index) {
+ unsigned char c = value[index];
+
+ if (IsAllowedMonitoringTagValueChar(value[index])) {
+ result[resultIndex++] = c;
} else {
- TagById_.push_back(tag);
- TagByName_[tag] = TagById_.size();
- ids.push_back(TagById_.size());
+ result[resultIndex++] = '%';
+ result[resultIndex++] = IntToHexLowercase[c >> 4];
+ result[resultIndex++] = IntToHexLowercase[c & 0x0f];
}
}
- return ids;
+ if (!needTrim) {
+ return result;
+ }
+
+ resultIndex = MaxLabelSize - 1;
+ for (int index = ssize(value) - 1; resultIndex > HalfMaxLabelSize + 2; --index) {
+ unsigned char c = value[index];
+
+ if (IsAllowedMonitoringTagValueChar(value[index])) {
+ result[resultIndex--] = c;
+ } else {
+ result[resultIndex--] = IntToHexLowercase[c & 0x0f];
+ result[resultIndex--] = IntToHexLowercase[c >> 4];
+ result[resultIndex--] = '%';
+ }
+ }
+
+ result[HalfMaxLabelSize] = '.';
+ result[HalfMaxLabelSize + 1] = '.';
+ result[HalfMaxLabelSize + 2] = '.';
+
+ return result;
}
-TTagId TTagRegistry::Encode(const TTag& tag)
+TTag SanitizeMonitoringTag(const TTag& tag, int resultingLength)
{
- if (auto it = TagByName_.find(tag); it != TagByName_.end()) {
- return it->second;
- } else {
- TagById_.push_back(tag);
- TagByName_[tag] = TagById_.size();
- return TagById_.size();
- }
+ return {tag.first, SanitizeMonitoringTagValue(tag.second, resultingLength)};
}
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
TTagIdList TTagRegistry::Encode(const TTagSet& tags)
{
return Encode(tags.Tags());
}
+TTagIdList TTagRegistry::Encode(const TTagList& tags)
+{
+ TTagIdList ids;
+ for (const auto& tag : tags) {
+ ids.push_back(Encode(tag));
+ }
+
+ return ids;
+}
+
+TTagId TTagRegistry::Encode(const TTag& tag)
+{
+ if (auto sanitizeParameters = ScanForSanitize(tag.second);
+ sanitizeParameters.IsSanitizationRequired())
+ {
+ return EncodeSanitized(SanitizeMonitoringTag(tag, sanitizeParameters.ResultingLength));
+ } else {
+ return EncodeSanitized(tag);
+ }
+}
+
TCompactVector<std::optional<TTagId>, TypicalTagCount> TTagRegistry::TryEncode(const TTagList& tags) const
{
TCompactVector<std::optional<TTagId>, TypicalTagCount> ids;
for (const auto& tag : tags) {
- if (auto it = TagByName_.find(tag); it != TagByName_.end()) {
- ids.push_back(it->second);
+ if (auto sanitizeParameters = ScanForSanitize(tag.second);
+ sanitizeParameters.IsSanitizationRequired())
+ {
+ ids.push_back(TryEncodeSanitized(SanitizeMonitoringTag(tag, sanitizeParameters.ResultingLength)));
} else {
- ids.push_back({});
+ ids.push_back(TryEncodeSanitized(tag));
}
}
@@ -93,6 +183,15 @@ void TTagRegistry::DumpTags(NProto::TSensorDump* dump)
}
}
+TTagId TTagRegistry::TryEncodeSanitized(const TTag& tag) const
+{
+ if (auto it = TagByName_.find(tag); it != TagByName_.end()) {
+ return it->second;
+ } else {
+ return {};
+ }
+}
+
////////////////////////////////////////////////////////////////////////////////
void TTagWriter::WriteLabel(TTagId tag)
diff --git a/yt/yt/library/profiling/solomon/tag_registry.h b/yt/yt/library/profiling/solomon/tag_registry.h
index 8e76ae1175..d12459602c 100644
--- a/yt/yt/library/profiling/solomon/tag_registry.h
+++ b/yt/yt/library/profiling/solomon/tag_registry.h
@@ -30,6 +30,10 @@ public:
void DumpTags(NProto::TSensorDump* dump);
private:
+ template <class TTagPerfect>
+ TTagId EncodeSanitized(TTagPerfect&& tag);
+ TTagId TryEncodeSanitized(const TTag& tag) const;
+
// TODO(prime@): maybe do something about the fact that tags are never freed.
THashMap<TTag, TTagId> TagByName_;
std::deque<TTag> TagById_;
@@ -60,3 +64,7 @@ private:
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NProfiling
+
+#define TAG_REGISTRY_INL_H
+#include "tag_registry-inl.h"
+#undef TAG_REGISTRY_INL_H
diff --git a/yt/yt/library/profiling/unittests/solomon_ut.cpp b/yt/yt/library/profiling/unittests/solomon_ut.cpp
index c26d478234..302e0fc40f 100644
--- a/yt/yt/library/profiling/unittests/solomon_ut.cpp
+++ b/yt/yt/library/profiling/unittests/solomon_ut.cpp
@@ -962,6 +962,64 @@ TEST_P(TOmitNameLabelSuffixTest, GaugeSummary)
ASSERT_NEAR(gauges[Format("yt.davg%v{}", omitNameLabelSuffix ? "" : ".avg")], 40 + 1 / 3.0, 1e-6);
}
+TEST(TSolomonRegistry, IncorrectSolomonLabels)
+{
+ auto impl = New<TSolomonRegistry>();
+ impl->SetWindowSize(12);
+
+ TString longTag;
+ longTag.reserve(210);
+ for (int index = 0; index < 210; ++index) {
+ longTag.append('a' + index % 26);
+ }
+ TString longTagEncoded;
+ longTagEncoded.reserve(200);
+ longTagEncoded.append(longTag.begin(), 100).append("...");
+ for (int index = 103; index < 200; ++index) {
+ longTagEncoded.append('a' + (index - 103 + 9) % 26);
+ }
+
+ TString incorrectSymbolsTag = "aaa|*?\"'\\`bbb";
+ incorrectSymbolsTag.back() = 0xff;
+ TString incorrectSymbolsTagEncoded = "aaa%7c%2a%3f%22%27%5c%60bb%ff";
+
+ TString longWithIncorrectSymbolsTag(200, 'a');
+ longWithIncorrectSymbolsTag[98] = 0xff;
+ longWithIncorrectSymbolsTag[199] = 0x00;
+ TString longWithIncorrectSymbolsTagEncoded;
+ longWithIncorrectSymbolsTagEncoded.append(TString(98, 'a'))
+ .append("%f...")
+ .append(TString(94, 'a'))
+ .append("%00");
+
+ auto profiler = TProfiler(impl, "/debug")
+ .WithTag("tag0", longTag)
+ .WithTag("tag1", incorrectSymbolsTag)
+ .WithTag("tag2", longWithIncorrectSymbolsTag);
+ auto c0 = profiler.Counter("/c");
+ c0.Increment(1);
+
+ auto result = CollectSensors(impl);
+
+ for (const auto& label : result.Labels) {
+ auto equal = [&label] (TStringBuf tag) {
+ return std::equal(label.begin(), label.begin() + 4, tag.begin());
+ };
+
+ auto labelValue = label.substr(5, label.size() - 5);
+
+ if (equal("tag0")) {
+ ASSERT_EQ(labelValue, longTagEncoded);
+ } else if (equal("tag1")) {
+ ASSERT_EQ(labelValue, incorrectSymbolsTagEncoded);
+ } else if (equal("tag2")) {
+ ASSERT_EQ(labelValue, longWithIncorrectSymbolsTagEncoded);
+ } else {
+ ASSERT_TRUE(false);
+ }
+ }
+}
+
////////////////////////////////////////////////////////////////////////////////
} // namespace