diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-03-04 17:59:16 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-03-04 18:11:15 +0300 |
commit | d0dca30a5b0275bcdcc2c26f672558bc5113e479 (patch) | |
tree | 1a0bb48cce2148980ffa292a5c8077fcbd0f5f01 /yt | |
parent | f335173a2a8205efd065307fb6bea616b473f5bd (diff) | |
download | ydb-d0dca30a5b0275bcdcc2c26f672558bc5113e479.tar.gz |
Intermediate changes
Diffstat (limited to 'yt')
-rw-r--r-- | yt/python/yt/logger.py | 67 | ||||
-rw-r--r-- | yt/yt/client/table_client/unittests/unversioned_row_ut.cpp | 32 | ||||
-rw-r--r-- | yt/yt/client/table_client/unittests/ya.make | 1 | ||||
-rw-r--r-- | yt/yt/client/table_client/unversioned_row.h | 11 | ||||
-rw-r--r-- | yt/yt/core/misc/arithmetic_formula.cpp | 23 | ||||
-rw-r--r-- | yt/yt/core/misc/unittests/boolean_formula_ut.cpp | 3 | ||||
-rw-r--r-- | yt/yt/core/rpc/service_detail.cpp | 4 | ||||
-rw-r--r-- | yt/yt/core/ya.make | 1 | ||||
-rw-r--r-- | yt/yt/core/ypath/tokenizer.cpp | 6 | ||||
-rw-r--r-- | yt/yt/core/ypath/tokenizer.h | 9 | ||||
-rw-r--r-- | yt/yt/core/yson/unittests/ya.make | 1 | ||||
-rw-r--r-- | yt/yt/core/yson/unittests/ypath_filtering_yson_consumer_ut.cpp | 499 | ||||
-rw-r--r-- | yt/yt/core/yson/ypath_filtering_consumer.cpp | 446 | ||||
-rw-r--r-- | yt/yt/core/yson/ypath_filtering_consumer.h | 41 | ||||
-rw-r--r-- | yt/yt/library/profiling/sensor.cpp | 4 | ||||
-rw-r--r-- | yt/yt/library/profiling/sensor.h | 4 |
16 files changed, 1116 insertions, 36 deletions
diff --git a/yt/python/yt/logger.py b/yt/python/yt/logger.py index c885fa9f52..7b876312b9 100644 --- a/yt/python/yt/logger.py +++ b/yt/python/yt/logger.py @@ -5,7 +5,10 @@ try: except ImportError: yatest_common = None +import functools import logging +import os +import re def set_log_level_from_config(logger): @@ -20,6 +23,58 @@ def set_log_level_from_config(logger): logger.setLevel(level=logging.__dict__[logger_config.LOG_LEVEL.upper()]) +class SimpleColorizedStreamHandler(logging.StreamHandler): + C_LCYAN = "\033[96m" + C_LBLUE = "\033[94m" + C_LGREEN = "\033[92m" + C_LYELLOW = "\033[93m" + C_LGRAY = "\033[37m" + C_BOLD = "\033[1m" + C_END = "\033[0m" + + KW = C_LBLUE + URL = C_LCYAN + C_BOLD + PARAM = C_LGRAY + YSON_PARAM = C_LYELLOW + + RE_KW = functools.partial(lambda p, r, m: p.sub(r, m), re.compile(r"(Perform HTTP \S+ request|Response received)"), r"{}\1{}".format(KW, C_END)) + RE_HTTP = functools.partial(lambda p, r, m: p.sub(r, m), re.compile(r"(https?://\S+)"), r"{}\1{}".format(URL, C_END)) + RE_JSON = functools.partial(lambda p, r, m: p.sub(r, m), re.compile(r"([\w'-]+): "), r"{}\1{}: ".format(PARAM, C_END)) + RE_YSON = functools.partial(lambda p, r, m: p.sub(r, m), re.compile(r"\"([^\";]+?)\"="), "\"{}\\1{}\"=".format(YSON_PARAM, C_END)) + + ENABLED = os.environ.get("YT_LOG_LEVEL") == "Debug" + + terminator = '\n' # py2 compat + + def _colorize(self, msg): + msg = self.RE_KW(msg) + msg = self.RE_HTTP(msg) + msg = self.RE_JSON(msg) + msg = self.RE_YSON(msg) + return msg + + def emit(self, record): + try: + msg = self.format(record) + stream = self.stream + if stream.isatty() and record.levelno == logging.DEBUG and self.ENABLED: + msg = self._colorize(msg) + stream.write(msg + self.terminator) + self.flush() + except Exception: + self.handleError(record) + + +formatter = None + + +def set_formatter(new_formatter): + global formatter + formatter = new_formatter + for handler in LOGGER.handlers: + handler.setFormatter(new_formatter) + + logging.getLogger("yt.packages.requests.packages.urllib3").setLevel(logging.WARNING) LOGGER = logging.getLogger("Yt") @@ -29,22 +84,12 @@ LOGGER.propagate = False set_log_level_from_config(LOGGER) if logger_config.LOG_PATH is None: - LOGGER.addHandler(logging.StreamHandler()) + LOGGER.addHandler(SimpleColorizedStreamHandler()) else: LOGGER.addHandler(logging.FileHandler(logger_config.LOG_PATH)) BASIC_FORMATTER = logging.Formatter(logger_config.LOG_PATTERN) -formatter = None - - -def set_formatter(new_formatter): - global formatter - formatter = new_formatter - for handler in LOGGER.handlers: - handler.setFormatter(new_formatter) - - set_formatter(BASIC_FORMATTER) diff --git a/yt/yt/client/table_client/unittests/unversioned_row_ut.cpp b/yt/yt/client/table_client/unittests/unversioned_row_ut.cpp new file mode 100644 index 0000000000..a766846c65 --- /dev/null +++ b/yt/yt/client/table_client/unittests/unversioned_row_ut.cpp @@ -0,0 +1,32 @@ +#include <yt/yt/client/table_client/unversioned_row.h> +#include <yt/yt/client/table_client/row_buffer.h> + +#include <yt/yt/core/test_framework/framework.h> + +namespace NYT::NTableClient { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TUnversionedOwningRowTest, DefaultCtor) +{ + TUnversionedOwningRow owningRow; + ASSERT_EQ(owningRow.GetSpaceUsed(), 0ull); +} + +TEST(TUnversionedOwningRowTest, ConstructFromUnversionedRow) +{ + auto buffer = New<TRowBuffer>(); + TUnversionedRowBuilder rowBuilder; + rowBuilder.AddValue(MakeUnversionedInt64Value(123, 0)); + TUnversionedRow row = rowBuilder.GetRow(); + + TUnversionedOwningRow owningRow(row); + ASSERT_EQ(owningRow.GetCount(), 1); + ASSERT_GT(owningRow.GetSpaceUsed(), 0ull); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT::NTableClient diff --git a/yt/yt/client/table_client/unittests/ya.make b/yt/yt/client/table_client/unittests/ya.make index 911aa88179..f7594f2b9a 100644 --- a/yt/yt/client/table_client/unittests/ya.make +++ b/yt/yt/client/table_client/unittests/ya.make @@ -8,6 +8,7 @@ SRCS( columnar_statistics_ut.cpp columnar_ut.cpp serialization_ut.cpp + unversioned_row_ut.cpp ) INCLUDE(${ARCADIA_ROOT}/yt/opensource.inc) diff --git a/yt/yt/client/table_client/unversioned_row.h b/yt/yt/client/table_client/unversioned_row.h index 6ca7db3dab..e24172331f 100644 --- a/yt/yt/client/table_client/unversioned_row.h +++ b/yt/yt/client/table_client/unversioned_row.h @@ -772,11 +772,16 @@ public: size_t GetSpaceUsed() const { - return StringData_.GetHolder()->GetTotalByteSize().value_or(StringData_.Size()) + - RowData_.GetHolder()->GetTotalByteSize().value_or(RowData_.Size()); + size_t size = 0; + if (StringData_) { + size += StringData_.GetHolder()->GetTotalByteSize().value_or(StringData_.Size()); + } + if (RowData_) { + size += RowData_.GetHolder()->GetTotalByteSize().value_or(RowData_.Size()); + } + return size; } - friend void swap(TUnversionedOwningRow& lhs, TUnversionedOwningRow& rhs) { using std::swap; diff --git a/yt/yt/core/misc/arithmetic_formula.cpp b/yt/yt/core/misc/arithmetic_formula.cpp index b8a4f73392..6aa84df465 100644 --- a/yt/yt/core/misc/arithmetic_formula.cpp +++ b/yt/yt/core/misc/arithmetic_formula.cpp @@ -12,6 +12,8 @@ #include <util/generic/hash.h> +#include <algorithm> + namespace NYT { using namespace NYson; @@ -31,11 +33,8 @@ bool IsSymbolAllowedInName(char c, EEvaluationContext context, bool isFirst) if (std::isalpha(c) || c == '_') { return true; } - if (isFirst) { - return false; - } if (std::isdigit(c)) { - return true; + return !isFirst || context == EEvaluationContext::Boolean; } if (context == EEvaluationContext::Boolean && extraAllowedBooleanVariableTokens.contains(c)) { return true; @@ -49,11 +48,16 @@ void ValidateFormulaVariable(const TString& variable, EEvaluationContext context THROW_ERROR_EXCEPTION("Variable should not be empty"); } for (char c : variable) { - if (!IsSymbolAllowedInName(c, context, false)) { + if (!IsSymbolAllowedInName(c, context, /*isFirst*/ false)) { THROW_ERROR_EXCEPTION("Invalid character %Qv in variable %Qv", c, variable); } } - if (!IsSymbolAllowedInName(variable[0], context, true)) { + if (context == EEvaluationContext::Boolean) { + if (std::all_of(variable.begin(), variable.end(), [] (char c) {return std::isdigit(c);})) { + THROW_ERROR_EXCEPTION("All digits characters are prohibited for boolean variable %Qv", variable); + } + } + if (!IsSymbolAllowedInName(variable[0], context, /*isFirst*/ true)) { THROW_ERROR_EXCEPTION("Invalid first character in variable %Qv", variable); } if (variable == "in") { @@ -416,7 +420,7 @@ std::vector<TFormulaToken> TGenericFormulaImpl::Tokenize(const TString& formula, char second = pos + 1 < formula.size() ? formula[pos + 1] : '\0'; if (first == 'i' && second == 'n') { char third = pos + 2 < formula.size() ? formula[pos + 2] : '\0'; - if (IsSymbolAllowedInName(third, context, false)) { + if (IsSymbolAllowedInName(third, context, /*isFirst*/ false)) { return std::nullopt; } else { pos += 2; @@ -523,9 +527,10 @@ std::vector<TFormulaToken> TGenericFormulaImpl::Tokenize(const TString& formula, auto extractVariable = [&] { TString name; - while (pos < formula.size() && IsSymbolAllowedInName(formula[pos], context, name.empty())) { + while (pos < formula.size() && IsSymbolAllowedInName(formula[pos], context, /*isFirst*/ name.empty())) { name += formula[pos++]; } + ValidateFormulaVariable(name, context); return name; }; @@ -564,7 +569,7 @@ std::vector<TFormulaToken> TGenericFormulaImpl::Tokenize(const TString& formula, TFormulaToken token; token.Position = pos; - if (std::isdigit(c) || (c == '-' && !expectBinaryOperator)) { + if (context == EEvaluationContext::Arithmetic && (std::isdigit(c) || (c == '-' && !expectBinaryOperator))) { token.Type = EFormulaTokenType::Number; token.Number = extractNumber(); expectBinaryOperator = true; diff --git a/yt/yt/core/misc/unittests/boolean_formula_ut.cpp b/yt/yt/core/misc/unittests/boolean_formula_ut.cpp index c4faaecca0..0bae531297 100644 --- a/yt/yt/core/misc/unittests/boolean_formula_ut.cpp +++ b/yt/yt/core/misc/unittests/boolean_formula_ut.cpp @@ -148,8 +148,9 @@ TEST(TBooleanFormulaTest, ValidateVariable) ValidateBooleanFormulaVariable("tablet_common/news-queue"); ValidateBooleanFormulaVariable("VAR123VAR"); ValidateBooleanFormulaVariable("IN"); + ValidateBooleanFormulaVariable("2var"); - EXPECT_THROW(ValidateBooleanFormulaVariable("2var"), TErrorException); + EXPECT_THROW(ValidateBooleanFormulaVariable("2"), TErrorException); EXPECT_THROW(ValidateBooleanFormulaVariable("foo bar"), TErrorException); EXPECT_THROW(ValidateBooleanFormulaVariable(""), TErrorException); EXPECT_THROW(ValidateBooleanFormulaVariable("a+b"), TErrorException); diff --git a/yt/yt/core/rpc/service_detail.cpp b/yt/yt/core/rpc/service_detail.cpp index cce7b8f625..87093d5295 100644 --- a/yt/yt/core/rpc/service_detail.cpp +++ b/yt/yt/core/rpc/service_detail.cpp @@ -1532,10 +1532,10 @@ void TRequestQueue::IncrementQueueSize(const TServiceBase::TServiceContextPtr& c void TRequestQueue::DecrementQueueSize(const TServiceBase::TServiceContextPtr& context) { auto newQueueSize = --QueueSize_; - auto oldQueueBytesSize = QueueByteSize_.fetch_sub(GetTotalRequestSize(context)); + auto oldQueueByteSize = QueueByteSize_.fetch_sub(GetTotalRequestSize(context)); YT_ASSERT(newQueueSize >= 0); - YT_ASSERT(oldQueueBytesSize >= 0); + YT_ASSERT(oldQueueByteSize >= 0); } i64 TRequestQueue::GetTotalRequestSize(const TServiceBase::TServiceContextPtr& context) diff --git a/yt/yt/core/ya.make b/yt/yt/core/ya.make index cac3d60635..8d449fdc30 100644 --- a/yt/yt/core/ya.make +++ b/yt/yt/core/ya.make @@ -251,6 +251,7 @@ SRCS( yson/writer.cpp yson/string_merger.cpp yson/ypath_designated_consumer.cpp + yson/ypath_filtering_consumer.cpp yson/depth_limiting_yson_consumer.cpp yson/list_verb_lazy_yson_consumer.cpp yson/attributes_stripper.cpp diff --git a/yt/yt/core/ypath/tokenizer.cpp b/yt/yt/core/ypath/tokenizer.cpp index 86a2183dc9..45fa0f4020 100644 --- a/yt/yt/core/ypath/tokenizer.cpp +++ b/yt/yt/core/ypath/tokenizer.cpp @@ -148,7 +148,7 @@ int TTokenizer::ParseHexDigit(char ch, TStringBuf context) YT_ABORT(); } -void TTokenizer::Expect(ETokenType expectedType) +void TTokenizer::Expect(ETokenType expectedType) const { if (expectedType != Type_) { if (Type_ == ETokenType::EndOfStream) { @@ -169,7 +169,7 @@ void TTokenizer::Expect(ETokenType expectedType) } } -void TTokenizer::ExpectListIndex() +void TTokenizer::ExpectListIndex() const { Expect(NYPath::ETokenType::Literal); i64 index; @@ -190,7 +190,7 @@ bool TTokenizer::Skip(ETokenType expectedType) return false; } -void TTokenizer::ThrowUnexpected() +void TTokenizer::ThrowUnexpected() const { if (Type_ == ETokenType::EndOfStream) { if (PreviousType_ == ETokenType::Slash) { diff --git a/yt/yt/core/ypath/tokenizer.h b/yt/yt/core/ypath/tokenizer.h index 27725db10a..46cf2a058d 100644 --- a/yt/yt/core/ypath/tokenizer.h +++ b/yt/yt/core/ypath/tokenizer.h @@ -14,6 +14,9 @@ public: TTokenizer(const TTokenizer&) = delete; TTokenizer& operator=(const TTokenizer&) = delete; + TTokenizer(TTokenizer&&) = default; + TTokenizer& operator=(TTokenizer&&) = default; + void Reset(TYPathBuf path); ETokenType Advance(); @@ -27,10 +30,10 @@ public: TStringBuf GetPath() const; const TString& GetLiteralValue() const; - void Expect(ETokenType expectedType); - void ExpectListIndex(); + void Expect(ETokenType expectedType) const; + void ExpectListIndex() const; bool Skip(ETokenType expectedType); - [[noreturn]] void ThrowUnexpected(); + [[noreturn]] void ThrowUnexpected() const; private: TYPathBuf Path_; diff --git a/yt/yt/core/yson/unittests/ya.make b/yt/yt/core/yson/unittests/ya.make index 5bb15cf408..a153d12ce7 100644 --- a/yt/yt/core/yson/unittests/ya.make +++ b/yt/yt/core/yson/unittests/ya.make @@ -17,6 +17,7 @@ SRCS( protobuf_yson_schema_ut.cpp protobuf_yson_ut.cpp ypath_designated_yson_consumer_ut.cpp + ypath_filtering_yson_consumer_ut.cpp yson_parser_ut.cpp yson_pull_parser_ut.cpp yson_token_writer_ut.cpp diff --git a/yt/yt/core/yson/unittests/ypath_filtering_yson_consumer_ut.cpp b/yt/yt/core/yson/unittests/ypath_filtering_yson_consumer_ut.cpp new file mode 100644 index 0000000000..3173d603dc --- /dev/null +++ b/yt/yt/core/yson/unittests/ypath_filtering_yson_consumer_ut.cpp @@ -0,0 +1,499 @@ +#include <yt/yt/core/test_framework/framework.h> + +#include <yt/yt/core/ytree/fluent.h> + +#include <yt/yt/core/yson/ypath_filtering_consumer.h> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +class TYPathFilteringConsumerTest + : public ::testing::Test +{ +public: + TYPathFilteringConsumerTest() + : Output_(ValueString_) + , Consumer_(CreateYsonWriter(&Output_, EYsonFormat::Text, EYsonType::Node, /*enableRaw*/ false)) + { } + + IYsonConsumer* GetConsumer() + { + return Consumer_.get(); + } + + TString FlushOutput() + { + auto result = std::move(ValueString_); + ValueString_.clear(); + return result; + } + +private: + TString ValueString_; + TStringOutput Output_; + std::unique_ptr<IYsonConsumer> Consumer_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +TEST_F(TYPathFilteringConsumerTest, BlacklistMap1) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"key"="value";})"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistMap2) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("forbidden_key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), "{}"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistMap3) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("forbidden_key").Value("value") + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"key"="value";})"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistList) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/1/key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginList() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndList(); + + ASSERT_EQ(FlushOutput(), R"([{"key"="value";};{};{"key"="value";};])"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistListNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/1/key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginList() + .Item() + .BeginList() + .Item().Entity() + .Item().Entity() + .Item().Entity() + .EndList() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndList(); + + ASSERT_EQ(FlushOutput(), R"([[#;#;#;];{};{"key"="value";};])"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistAttributes) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/@"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginAttributes() + .Item("some_attr").Entity() + .EndAttributes() + .Entity(); + + ASSERT_EQ(FlushOutput(), R"(#)"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistAttribute) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/@forbidden_attr"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginAttributes() + .Item("forbidden_attr").Entity() + .Item("allowed_attr").Entity() + .EndAttributes() + .Entity(); + + ASSERT_EQ(FlushOutput(), R"(<"allowed_attr"=#;>#)"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistNestedMap) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forbidden_key", "/nested_map/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("forbidden_key").Value("value") + .Item("key").Value("value") + .Item("nested_map") + .BeginMap() + .Item("forbidden_key").Value("value") + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"key"="value";"nested_map"={"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistNestedAsterisk) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/*/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("nested_map1") + .BeginMap() + .Item("key").Value("value") + .Item("forbidden_key").Value("value") + .EndMap() + .Item("nested_map2") + .BeginMap() + .Item("forbidden_key").Value("value") + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"nested_map1"={"key"="value";};"nested_map2"={"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, WhitelistMap1) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {""}, + EPathFilteringMode::Whitelist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"key"="value";})"); +} + +TEST_F(TYPathFilteringConsumerTest, WhitelistMap2) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/allowed_key"}, + EPathFilteringMode::Whitelist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("allowed_key").Value("value") + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"allowed_key"="value";})"); +} + +TEST_F(TYPathFilteringConsumerTest, WhitelistNestedMap) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/allowed_key", "/nested/allowed_key"}, + EPathFilteringMode::Whitelist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .Item("allowed_key").Value("value") + .Item("nested") + .BeginMap() + .Item("key").Value("value") + .Item("allowed_key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"allowed_key"="value";"nested"={"allowed_key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedSimple) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key"=#;})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"forced_key"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedWhitelistMixedNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"forced_key"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedMixedNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key"}, + EPathFilteringMode::ForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"key"="value";"forced_key"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedAttribute) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/@forced_attr"}, + EPathFilteringMode::ForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginAttributes() + .EndAttributes() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"=<"forced_attr"=#;>{"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedAttributes1) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/@"}, + EPathFilteringMode::ForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"=<>{"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedAttributes2) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/@forced_key"}, + EPathFilteringMode::ForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"=<"forced_key"=#;>{"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedMultiplePaths) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key1", "/forced_key2"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key1"=#;"forced_key2"=#;})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedMultiplePathsNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key","/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"forced_key"=#;};"forced_key"=#;})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedFilteringSimple) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key"=#;})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedFilteringSimplePrefix) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("forced_key") + .BeginMap() + .Item("inner").Entity() + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key"={"inner"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedFilteringNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"forced_key"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedFilteringMultiplePathsNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key", "/data/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .Item("some_map") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key"=#;"data"={"forced_key"=#;};})"); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/yt/yt/core/yson/ypath_filtering_consumer.cpp b/yt/yt/core/yson/ypath_filtering_consumer.cpp new file mode 100644 index 0000000000..ec1b610623 --- /dev/null +++ b/yt/yt/core/yson/ypath_filtering_consumer.cpp @@ -0,0 +1,446 @@ +#include "ypath_filtering_consumer.h" + +#include "null_consumer.h" + +#include <yt/yt/core/misc/error.h> + +#include <yt/yt/core/ypath/tokenizer.h> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +struct TAtTokenTag +{ }; + +//////////////////////////////////////////////////////////////////////////////// + +class TYPathFilteringConsumerBase + : public NYson::TYsonConsumerBase +{ +public: + void OnStringScalar(TStringBuf value) override + { + BeforeValue(); + Forward_->OnStringScalar(value); + } + + void OnInt64Scalar(i64 value) override + { + BeforeValue(); + Forward_->OnInt64Scalar(value); + } + + void OnUint64Scalar(ui64 value) override + { + BeforeValue(); + Forward_->OnUint64Scalar(value); + } + + void OnDoubleScalar(double value) override + { + BeforeValue(); + Forward_->OnDoubleScalar(value); + } + + void OnBooleanScalar(bool value) override + { + BeforeValue(); + Forward_->OnBooleanScalar(value); + } + + void OnEntity() override + { + BeforeValue(); + Forward_->OnEntity(); + } + + void OnBeginMap() override + { + BeforeValue(); + AfterCollectionBegin_ = true; + + Forward_->OnBeginMap(); + } + + void OnKeyedItem(TStringBuf key) override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + AfterCollectionBegin_ = false; + AppendPath(key); + + Forward_->OnKeyedItem(key); + } + + void OnEndMap() override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + AfterCollectionBegin_ = false; + BeforeEndDictionary(); + Forward_->OnEndMap(); + } + + void OnBeginList() override + { + BeforeValue(); + AfterCollectionBegin_ = true; + ListIndexes_.push_back(0); + + Forward_->OnBeginList(); + } + + void OnListItem() override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + AfterCollectionBegin_ = false; + AppendPath(ListIndexes_.back()); + ++ListIndexes_.back(); + + Forward_->OnListItem(); + } + + void OnEndList() override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + AfterCollectionBegin_ = false; + ListIndexes_.pop_back(); + + Forward_->OnEndList(); + + } + + void OnBeginAttributes() override + { + AppendPath(TAtTokenTag{}); + + AfterCollectionBegin_ = true; + + Forward_->OnBeginAttributes(); + } + + void OnEndAttributes() override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + + BeforeEndDictionary(); + Forward_->OnEndAttributes(); + RollbackPath(); + } + +protected: + virtual void AppendPath(TStringBuf literal) = 0; + virtual void AppendPath(int listIndex) = 0; + virtual void AppendPath(TAtTokenTag) = 0; + + virtual void RollbackPath() = 0; + + virtual void BeforeValue() = 0; + virtual void BeforeEndDictionary() = 0; + + void SetForward(NYson::IYsonConsumer* consumer) + { + Forward_ = consumer; + } + + void ResetForward() + { + Forward_ = GetNullYsonConsumer(); + } + + NYson::IYsonConsumer* GetForward() + { + return Forward_; + } + +private: + NYson::IYsonConsumer* Forward_; + bool AfterCollectionBegin_ = false; + std::vector<int> ListIndexes_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TYPathFilteringConsumer + : public TYPathFilteringConsumerBase +{ +public: + TYPathFilteringConsumer( + NYson::IYsonConsumer* underlying, + std::vector<NYPath::TYPath> paths, + EPathFilteringMode mode) + : Underlying_(underlying) + , Paths_(std::move(paths)) + , Mode_(mode) + { + SetForward(Underlying_); + PerPathFilteringStates_.reserve(Paths_.size()); + for (int i = 0; i < std::ssize(Paths_); ++i) { + PerPathFilteringStates_.push_back(TPathFilteringState{ + .Tokenizer = NYPath::TTokenizer(Paths_[i]), + .MaxMatchedDepth = 0, + .Fulfilled = false, + .Index = i + }); + } + + for (auto& state : PerPathFilteringStates_) { + state.Tokenizer.Expect(NYPath::ETokenType::StartOfStream); + if (!ToNextLiteral(state.Tokenizer)) { + state.Fulfilled = true; + SubtreeFiltering_ = true; + FilteringDepth_ = Depth_; + } + } + } + +private: + struct TPathFilteringState + { + NYPath::TTokenizer Tokenizer; + int MaxMatchedDepth; + bool Fulfilled; + int Index; + }; + + NYson::IYsonConsumer* Underlying_; + const std::vector<NYPath::TYPath> Paths_; + const EPathFilteringMode Mode_; + bool SubtreeFiltering_ = false; + int FilteringDepth_ = -1; + std::vector<TPathFilteringState> PerPathFilteringStates_; + int Depth_ = 0; + + // For asterisk (*) matching support only. + std::vector<TPathFilteringState> SavedFilteringStates_; + + template <typename TTokenType> + void AppendPathImpl(TTokenType token) + { + ++Depth_; + + bool advancedAnyPath = false; + + for (auto& state : PerPathFilteringStates_) { + if (Depth_ != state.MaxMatchedDepth + 1 || state.Fulfilled) { + continue; + } + + if (DoesMatch(state.Tokenizer, token)) { + if (state.Tokenizer.GetType() == NYPath::ETokenType::Asterisk) { + SaveFilteringState(state); + } + + ++state.MaxMatchedDepth; + advancedAnyPath = true; + if (!ToNextLiteral(state.Tokenizer)) { + state.Fulfilled = true; + + if (!SubtreeFiltering_) { + if (IsBlacklistMode()) { + ResetForward(); + SubtreeFiltering_ = true; + FilteringDepth_ = Depth_; + } else if (IsWhitelistMode()) { + SubtreeFiltering_ = true; + FilteringDepth_ = Depth_; + } + } + } + } + } + + if (IsWhitelistMode() && !SubtreeFiltering_) { + if (!advancedAnyPath) { + SubtreeFiltering_ = true; + FilteringDepth_ = Depth_; + ResetForward(); + } + } + } + + void AppendPath(TStringBuf literal) override + { + AppendPathImpl(literal); + } + + void AppendPath(int listIndex) override + { + AppendPathImpl<TStringBuf>(NYPath::ToYPathLiteral(listIndex)); + } + + void AppendPath(TAtTokenTag tag) override + { + AppendPathImpl(tag); + } + + void RollbackPath() override + { + if (SubtreeFiltering_ && Depth_ == FilteringDepth_) { + SubtreeFiltering_ = false; + SetForward(Underlying_); + } + + --Depth_; + + if (!IsForcedEntitiesMode()) { + while (!SavedFilteringStates_.empty() && SavedFilteringStates_.back().MaxMatchedDepth == Depth_) { + PerPathFilteringStates_[SavedFilteringStates_.back().Index] = std::move(SavedFilteringStates_.back()); + SavedFilteringStates_.pop_back(); + } + } + } + + void BeforeEndDictionary() override + { + if (!IsForcedEntitiesMode()) { + return; + } + + for (auto& state : PerPathFilteringStates_) { + if (Depth_ != state.MaxMatchedDepth || state.Fulfilled) { + continue; + } + + auto& tokenizer = state.Tokenizer; + std::vector<bool> isAttributesStack; + tokenizer.Expect(NYPath::ETokenType::Literal); + GetForward()->OnKeyedItem(tokenizer.GetLiteralValue()); + while(ToNextLiteral(tokenizer)) { + if (tokenizer.GetType() == NYPath::ETokenType::At) { + isAttributesStack.push_back(true); + GetForward()->OnBeginAttributes(); + tokenizer.Advance(); + } else { + isAttributesStack.push_back(false); + GetForward()->OnBeginMap(); + } + tokenizer.Expect(NYPath::ETokenType::Literal); + GetForward()->OnKeyedItem(tokenizer.GetLiteralValue()); + } + GetForward()->OnEntity(); + tokenizer.Expect(NYPath::ETokenType::EndOfStream); + while(!isAttributesStack.empty()) { + if (isAttributesStack.back()) { + GetForward()->OnEndAttributes(); + } else { + GetForward()->OnEndMap(); + } + isAttributesStack.pop_back(); + } + + state.Fulfilled = true; + } + } + + void BeforeValue() override + { + for (auto& state : PerPathFilteringStates_) { + if (Depth_ != state.MaxMatchedDepth || state.Fulfilled) { + continue; + } + + if (state.Tokenizer.GetType() == NYPath::ETokenType::At) { + OnBeginAttributes(); + OnEndAttributes(); + return; + } + } + } + + bool ToNextLiteral(NYPath::TTokenizer& tokenizer) + { + if (tokenizer.GetType() == NYPath::ETokenType::At) { + return tokenizer.Advance() != NYPath::ETokenType::EndOfStream; + } + tokenizer.Advance(); + if (tokenizer.GetType() == NYPath::ETokenType::EndOfStream) { + return false; + } else { + tokenizer.Expect(NYPath::ETokenType::Slash); + tokenizer.Advance(); + return true; + } + } + + bool IsBlacklistMode() const + { + return Mode_ == EPathFilteringMode::Blacklist; + } + + bool IsWhitelistMode() const + { + return Mode_ == EPathFilteringMode::Whitelist || Mode_ == EPathFilteringMode::WhitelistWithForcedEntities; + } + + bool IsForcedEntitiesMode() const + { + return Mode_ == EPathFilteringMode::ForcedEntities || Mode_ == EPathFilteringMode::WhitelistWithForcedEntities; + } + + template <typename TTokenType> + bool DoesMatch(const NYPath::TTokenizer& tokenizer, TTokenType) const + { + if (tokenizer.GetType() == NYPath::ETokenType::At) { + return std::is_same_v<TTokenType, TAtTokenTag>; + } + + return false; + } + + template <> + bool DoesMatch<TStringBuf>(const NYPath::TTokenizer& tokenizer, TStringBuf token) const + { + if (tokenizer.GetType() == NYPath::ETokenType::Asterisk) { + THROW_ERROR_EXCEPTION_IF(IsForcedEntitiesMode(), + "YPathFilteringConsumer does not allow asterisk matching in forced entities modes"); + return true; + } + + tokenizer.Expect(NYPath::ETokenType::Literal); + return tokenizer.GetLiteralValue() == token; + } + + void SaveFilteringState(const TPathFilteringState& state) + { + SavedFilteringStates_.push_back(TPathFilteringState{ + .Tokenizer = NYPath::TTokenizer(state.Tokenizer.GetInput()), + .MaxMatchedDepth = state.MaxMatchedDepth, + .Fulfilled = state.Fulfilled, + .Index = state.Index, + }); + + SavedFilteringStates_.back().Tokenizer.Skip(NYPath::ETokenType::StartOfStream); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +std::unique_ptr<NYson::IYsonConsumer> CreateYPathFilteringConsumer( + NYson::IYsonConsumer* underlying, + std::vector<NYPath::TYPath> paths, + EPathFilteringMode mode) +{ + return std::make_unique<TYPathFilteringConsumer>(underlying, std::move(paths), mode); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/yt/yt/core/yson/ypath_filtering_consumer.h b/yt/yt/core/yson/ypath_filtering_consumer.h new file mode 100644 index 0000000000..afbd0c0236 --- /dev/null +++ b/yt/yt/core/yson/ypath_filtering_consumer.h @@ -0,0 +1,41 @@ +#pragma once + +#include <yt/yt/core/ypath/public.h> + +#include <library/cpp/yt/yson/consumer.h> + +namespace NYT::NYson { + +/////////////////////////////////////////////////////////////////////////////// + +DEFINE_ENUM(EPathFilteringMode, + (Blacklist) + (Whitelist) + (WhitelistWithForcedEntities) + (ForcedEntities) +); + +//////////////////////////////////////////////////////////////////////////////// + +// Creates consumer that wraps underlying consumer providing +// filtering and missing value handling support. +// Several filtering modes are supported: +// - In `Blacklist` mode all nodes inside matched paths are omitted. +// - In `Whitelist` mode all nodes outside matched paths are omitted. +// - In `ForcedEntities` mode an entity node is inserted in case +// no value at given path is consumed. +// - The `WhitelistWithForcedEntities` values are consumed only at provided +// paths and entities are inserted in case of missing values. +// Attribute handling is available in all modes. For `ForcedEntities` modes +// `/@` path stands for inserting empty attribute dictionary, `/@ATTRIBUTE_NAME` for +// inserting the certain attribute. +// Asterisk matching is allowed only for `Blacklist` and `Whitelist` modes. +// Only the `EYsonType::Node` consuming is supported. +std::unique_ptr<NYson::IYsonConsumer> CreateYPathFilteringConsumer( + NYson::IYsonConsumer* underlying, + std::vector<NYPath::TYPath> paths, + EPathFilteringMode mode); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/yt/yt/library/profiling/sensor.cpp b/yt/yt/library/profiling/sensor.cpp index 0e37acf218..beb76248cc 100644 --- a/yt/yt/library/profiling/sensor.cpp +++ b/yt/yt/library/profiling/sensor.cpp @@ -182,7 +182,7 @@ void TGaugeHistogram::Remove(double value, int count) const noexcept Histogram_->Remove(value, count); } -void TGaugeHistogram::Reset() noexcept +void TGaugeHistogram::Reset() const noexcept { if (!Histogram_) { return; @@ -200,7 +200,7 @@ THistogramSnapshot TGaugeHistogram::GetSnapshot() const return Histogram_->GetSnapshot(false); } -void TGaugeHistogram::LoadSnapshot(THistogramSnapshot snapshot) +void TGaugeHistogram::LoadSnapshot(THistogramSnapshot snapshot) const { if (!Histogram_) { return; diff --git a/yt/yt/library/profiling/sensor.h b/yt/yt/library/profiling/sensor.h index 3d81605ed1..209ac43f41 100644 --- a/yt/yt/library/profiling/sensor.h +++ b/yt/yt/library/profiling/sensor.h @@ -142,10 +142,10 @@ class TGaugeHistogram public: void Add(double value, int count = 1) const noexcept; void Remove(double value, int count = 1) const noexcept; - void Reset() noexcept; + void Reset() const noexcept; THistogramSnapshot GetSnapshot() const; - void LoadSnapshot(THistogramSnapshot snapshot); + void LoadSnapshot(THistogramSnapshot snapshot) const; explicit operator bool() const; |