diff options
21 files changed, 1220 insertions, 107 deletions
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA index 9780b831403..26ae8b84813 100644 --- a/contrib/python/hypothesis/py3/.dist-info/METADATA +++ b/contrib/python/hypothesis/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: hypothesis -Version: 6.98.6 +Version: 6.98.8 Summary: A library for property-based testing Home-page: https://hypothesis.works Author: David R. MacIver and Zac Hatfield-Dodds diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/ghostwriter.py b/contrib/python/hypothesis/py3/hypothesis/extra/ghostwriter.py index 3404088ed00..8917d5bd870 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/ghostwriter.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/ghostwriter.py @@ -482,7 +482,6 @@ def _get_params(func: Callable) -> Dict[str, inspect.Parameter]: kind = inspect.Parameter.KEYWORD_ONLY continue # we omit *varargs, if there are any if _iskeyword(arg.lstrip("*")) or not arg.lstrip("*").isidentifier(): - print(repr(args)) break # skip all subsequent params if this name is invalid params.append(inspect.Parameter(name=arg, kind=kind)) @@ -588,6 +587,8 @@ def _imports_for_object(obj): """Return the imports for `obj`, which may be empty for e.g. lambdas""" if isinstance(obj, (re.Pattern, re.Match)): return {"re"} + if isinstance(obj, st.SearchStrategy): + return _imports_for_strategy(obj) try: if is_generic_type(obj): if isinstance(obj, TypeVar): @@ -606,19 +607,19 @@ def _imports_for_strategy(strategy): # If we have a lazy from_type strategy, because unwrapping it gives us an # error or invalid syntax, import that type and we're done. if isinstance(strategy, LazyStrategy): - if strategy.function.__name__ in ( - st.from_type.__name__, - st.from_regex.__name__, - ): - return { - imp - for arg in set(strategy._LazyStrategy__args) - | set(strategy._LazyStrategy__kwargs.values()) - for imp in _imports_for_object(arg) - } + imports = { + imp + for arg in set(strategy._LazyStrategy__args) + | set(strategy._LazyStrategy__kwargs.values()) + for imp in _imports_for_object(_strip_typevars(arg)) + } + if re.match(r"from_(type|regex)\(", repr(strategy)): + if repr(strategy).startswith("from_type("): + return {module for module, _ in imports} + return imports elif _get_module(strategy.function).startswith("hypothesis.extra."): module = _get_module(strategy.function).replace("._array_helpers", ".numpy") - return {(module, strategy.function.__name__)} + return {(module, strategy.function.__name__)} | imports imports = set() with warnings.catch_warnings(): @@ -672,6 +673,9 @@ def _valid_syntax_repr(strategy): if isinstance(strategy, OneOfStrategy): seen = set() elems = [] + with warnings.catch_warnings(): + warnings.simplefilter("ignore", SmallSearchSpaceWarning) + strategy.element_strategies # might warn on first access for s in strategy.element_strategies: if isinstance(s, SampledFromStrategy) and s.elements == (os.environ,): continue @@ -694,7 +698,11 @@ def _valid_syntax_repr(strategy): # Return a syntactically-valid strategy repr, including fixing some # strategy reprs and replacing invalid syntax reprs with `"nothing()"`. # String-replace to hide the special case in from_type() for Decimal('snan') - r = repr(strategy).replace(".filter(_can_hash)", "") + r = ( + repr(strategy) + .replace(".filter(_can_hash)", "") + .replace("hypothesis.strategies.", "") + ) # Replace <unknown> with ... in confusing lambdas r = re.sub(r"(lambda.*?: )(<unknown>)([,)])", r"\1...\3", r) compile(r, "<string>", "eval") @@ -724,9 +732,10 @@ def _get_module_helper(obj): dots = [i for i, c in enumerate(module_name) if c == "."] + [None] for idx in dots: - if getattr(sys.modules.get(module_name[:idx]), obj.__name__, None) is obj: - KNOWN_FUNCTION_LOCATIONS[obj] = module_name[:idx] - return module_name[:idx] + for candidate in (module_name[:idx].lstrip("_"), module_name[:idx]): + if getattr(sys.modules.get(candidate), obj.__name__, None) is obj: + KNOWN_FUNCTION_LOCATIONS[obj] = candidate + return candidate return module_name @@ -755,7 +764,7 @@ def _get_qualname(obj, *, include_module=False): def _write_call( - func: Callable, *pass_variables: str, except_: Except, assign: str = "" + func: Callable, *pass_variables: str, except_: Except = Exception, assign: str = "" ) -> str: """Write a call to `func` with explicit and implicit arguments. @@ -1000,6 +1009,9 @@ def _parameter_to_annotation(parameter: Any) -> Optional[_AnnotationData]: else: type_name = str(parameter) + if type_name.startswith("hypothesis.strategies."): + return _AnnotationData(type_name.replace("hypothesis.strategies", "st"), set()) + origin_type = get_origin(parameter) # if not generic or no generic arguments @@ -1045,9 +1057,6 @@ def _make_test(imports: ImportSet, body: str) -> str: # Discarding "builtins." and "__main__" probably isn't particularly useful # for user code, but important for making a good impression in demos. body = body.replace("builtins.", "").replace("__main__.", "") - body = body.replace("hypothesis.strategies.", "st.") - if "st.from_type(typing." in body: - imports.add("typing") imports |= {("hypothesis", "given"), ("hypothesis", "strategies as st")} if " reject()\n" in body: imports.add(("hypothesis", "reject")) @@ -1260,11 +1269,29 @@ def magic( hints = get_type_hints(func) hints.pop("return", None) params = _get_params(func) - if len(hints) == len(params) == 2: - a, b = hints.values() + if (len(hints) == len(params) == 2) or ( + _get_module(func) == "operator" + and "item" not in func.__name__ + and tuple(params) in [("a", "b"), ("x", "y")] + ): + a, b = hints.values() or [Any, Any] arg1, arg2 = params if a == b and len(arg1) == len(arg2) <= 3: - make_(_make_binop_body, func, annotate=annotate) + # https://en.wikipedia.org/wiki/Distributive_property#Other_examples + known = { + "mul": "add", + "matmul": "add", + "or_": "and_", + "and_": "or_", + }.get(func.__name__, "") + distributes_over = getattr(sys.modules[_get_module(func)], known, None) + make_( + _make_binop_body, + func, + commutative=func.__name__ != "matmul", + distributes_over=distributes_over, + annotate=annotate, + ) del by_name[name] # Look for Numpy ufuncs or gufuncs, and write array-oriented tests for them. @@ -1469,10 +1496,17 @@ def roundtrip( return _make_test(*_make_roundtrip_body(funcs, except_, style, annotate)) -def _make_equiv_body(funcs, except_, style, annotate): +def _get_varnames(funcs): var_names = [f"result_{f.__name__}" for f in funcs] if len(set(var_names)) < len(var_names): - var_names = [f"result_{i}_{ f.__name__}" for i, f in enumerate(funcs)] + var_names = [f"result_{f.__name__}_{_get_module(f)}" for f in funcs] + if len(set(var_names)) < len(var_names): + var_names = [f"result_{i}_{f.__name__}" for i, f in enumerate(funcs)] + return var_names + + +def _make_equiv_body(funcs, except_, style, annotate): + var_names = _get_varnames(funcs) test_lines = [ _write_call(f, assign=vname, except_=except_) for vname, f in zip(var_names, funcs) @@ -1512,10 +1546,7 @@ else: def _make_equiv_errors_body(funcs, except_, style, annotate): - var_names = [f"result_{f.__name__}" for f in funcs] - if len(set(var_names)) < len(var_names): - var_names = [f"result_{i}_{ f.__name__}" for i, f in enumerate(funcs)] - + var_names = _get_varnames(funcs) first, *rest = funcs first_call = _write_call(first, assign=var_names[0], except_=except_) extra_imports, suppress = _exception_string(except_) @@ -1715,18 +1746,11 @@ def _make_binop_body( maker( "associative", "abc", + _write_call(func, "a", _write_call(func, "b", "c"), assign="left"), _write_call( func, - "a", - _write_call(func, "b", "c", except_=Exception), - except_=Exception, - assign="left", - ), - _write_call( - func, - _write_call(func, "a", "b", except_=Exception), + _write_call(func, "a", "b"), "c", - except_=Exception, assign="right", ), ) @@ -1734,8 +1758,8 @@ def _make_binop_body( maker( "commutative", "ab", - _write_call(func, "a", "b", except_=Exception, assign="left"), - _write_call(func, "b", "a", except_=Exception, assign="right"), + _write_call(func, "a", "b", assign="left"), + _write_call(func, "b", "a", assign="right"), ) if identity is not None: # Guess that the identity element is the minimal example from our operands @@ -1757,34 +1781,42 @@ def _make_binop_body( compile(repr(identity), "<string>", "exec") except SyntaxError: identity = repr(identity) # type: ignore - maker( - "identity", - "a", + identity_parts = [ + f"{identity = }", _assert_eq( style, "a", - _write_call(func, "a", repr(identity), except_=Exception), + _write_call(func, "a", "identity"), ), - ) + _assert_eq( + style, + "a", + _write_call(func, "identity", "a"), + ), + ] + maker("identity", "a", "\n".join(identity_parts)) if distributes_over: - maker( - distributes_over.__name__ + "_distributes_over", - "abc", + do = distributes_over + dist_parts = [ + _write_call(func, "a", _write_call(do, "b", "c"), assign="left"), _write_call( - distributes_over, - _write_call(func, "a", "b", except_=Exception), - _write_call(func, "a", "c", except_=Exception), - except_=Exception, - assign="left", + do, + _write_call(func, "a", "b"), + _write_call(func, "a", "c"), + assign="ldist", ), + _assert_eq(style, "ldist", "left"), + "\n", + _write_call(func, _write_call(do, "a", "b"), "c", assign="right"), _write_call( - func, - "a", - _write_call(distributes_over, "b", "c", except_=Exception), - except_=Exception, - assign="right", + do, + _write_call(func, "a", "c"), + _write_call(func, "b", "c"), + assign="rdist", ), - ) + _assert_eq(style, "rdist", "right"), + ] + maker(do.__name__ + "_distributes_over", "abc", "\n".join(dist_parts)) _, operands_repr = _valid_syntax_repr(operands) operands_repr = _st_strategy_names(operands_repr) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py index 99a170ca647..2a011a8b118 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py @@ -47,6 +47,13 @@ MUTATION_POOL_SIZE = 100 MIN_TEST_CALLS = 10 BUFFER_SIZE = 8 * 1024 +# If the shrinking phase takes more than five minutes, abort it early and print +# a warning. Many CI systems will kill a build after around ten minutes with +# no output, and appearing to hang isn't great for interactive use either - +# showing partially-shrunk examples is better than quitting with no examples! +# (but make it monkeypatchable, for the rare users who need to keep on shrinking) +MAX_SHRINKING_SECONDS = 300 + @attr.s class HealthCheckState: @@ -811,9 +818,8 @@ class ConjectureRunner: ) assert ex1.end <= ex2.start - replacements = [data.buffer[e.start : e.end] for e in [ex1, ex2]] - - replacement = self.random.choice(replacements) + e = self.random.choice([ex1, ex2]) + replacement = data.buffer[e.start : e.end] try: # We attempt to replace both the the examples with @@ -822,7 +828,7 @@ class ConjectureRunner: # wrong - labels matching are only a best guess as to # whether the two are equivalent - but it doesn't # really matter. It may not achieve the desired result - # but it's still a perfectly acceptable choice sequence. + # but it's still a perfectly acceptable choice sequence # to try. new_data = self.cached_test_function( data.buffer[: ex1.start] @@ -922,7 +928,7 @@ class ConjectureRunner: ) def new_conjecture_data_for_buffer(self, buffer): - return ConjectureData.for_buffer(buffer, observer=self.tree.new_observer()) + return self.new_conjecture_data(buffer, max_length=len(buffer)) def shrink_interesting_examples(self): """If we've found interesting examples, try to replace each of them @@ -935,12 +941,7 @@ class ConjectureRunner: return self.debug("Shrinking interesting examples") - - # If the shrinking phase takes more than five minutes, abort it early and print - # a warning. Many CI systems will kill a build after around ten minutes with - # no output, and appearing to hang isn't great for interactive use either - - # showing partially-shrunk examples is better than quitting with no examples! - self.finish_shrinking_deadline = time.perf_counter() + 300 + self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS for prev_data in sorted( self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py index 43ec273cc1d..6aafd5421cc 100644 --- a/contrib/python/hypothesis/py3/hypothesis/version.py +++ b/contrib/python/hypothesis/py3/hypothesis/version.py @@ -8,5 +8,5 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -__version_info__ = (6, 98, 6) +__version_info__ = (6, 98, 8) __version__ = ".".join(map(str, __version_info__)) diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make index 5da3870659b..1678e159c96 100644 --- a/contrib/python/hypothesis/py3/ya.make +++ b/contrib/python/hypothesis/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.98.6) +VERSION(6.98.8) LICENSE(MPL-2.0) diff --git a/yt/python/yt/logger.py b/yt/python/yt/logger.py index c885fa9f524..7b876312b91 100644 --- a/yt/python/yt/logger.py +++ b/yt/python/yt/logger.py @@ -5,7 +5,10 @@ try: except ImportError: yatest_common = None +import functools import logging +import os +import re def set_log_level_from_config(logger): @@ -20,20 +23,47 @@ def set_log_level_from_config(logger): logger.setLevel(level=logging.__dict__[logger_config.LOG_LEVEL.upper()]) -logging.getLogger("yt.packages.requests.packages.urllib3").setLevel(logging.WARNING) +class SimpleColorizedStreamHandler(logging.StreamHandler): + C_LCYAN = "\033[96m" + C_LBLUE = "\033[94m" + C_LGREEN = "\033[92m" + C_LYELLOW = "\033[93m" + C_LGRAY = "\033[37m" + C_BOLD = "\033[1m" + C_END = "\033[0m" -LOGGER = logging.getLogger("Yt") + KW = C_LBLUE + URL = C_LCYAN + C_BOLD + PARAM = C_LGRAY + YSON_PARAM = C_LYELLOW -LOGGER.propagate = False + RE_KW = functools.partial(lambda p, r, m: p.sub(r, m), re.compile(r"(Perform HTTP \S+ request|Response received)"), r"{}\1{}".format(KW, C_END)) + RE_HTTP = functools.partial(lambda p, r, m: p.sub(r, m), re.compile(r"(https?://\S+)"), r"{}\1{}".format(URL, C_END)) + RE_JSON = functools.partial(lambda p, r, m: p.sub(r, m), re.compile(r"([\w'-]+): "), r"{}\1{}: ".format(PARAM, C_END)) + RE_YSON = functools.partial(lambda p, r, m: p.sub(r, m), re.compile(r"\"([^\";]+?)\"="), "\"{}\\1{}\"=".format(YSON_PARAM, C_END)) -set_log_level_from_config(LOGGER) + ENABLED = os.environ.get("YT_LOG_LEVEL") == "Debug" -if logger_config.LOG_PATH is None: - LOGGER.addHandler(logging.StreamHandler()) -else: - LOGGER.addHandler(logging.FileHandler(logger_config.LOG_PATH)) + terminator = '\n' # py2 compat + + def _colorize(self, msg): + msg = self.RE_KW(msg) + msg = self.RE_HTTP(msg) + msg = self.RE_JSON(msg) + msg = self.RE_YSON(msg) + return msg + + def emit(self, record): + try: + msg = self.format(record) + stream = self.stream + if stream.isatty() and record.levelno == logging.DEBUG and self.ENABLED: + msg = self._colorize(msg) + stream.write(msg + self.terminator) + self.flush() + except Exception: + self.handleError(record) -BASIC_FORMATTER = logging.Formatter(logger_config.LOG_PATTERN) formatter = None @@ -45,6 +75,21 @@ def set_formatter(new_formatter): handler.setFormatter(new_formatter) +logging.getLogger("yt.packages.requests.packages.urllib3").setLevel(logging.WARNING) + +LOGGER = logging.getLogger("Yt") + +LOGGER.propagate = False + +set_log_level_from_config(LOGGER) + +if logger_config.LOG_PATH is None: + LOGGER.addHandler(SimpleColorizedStreamHandler()) +else: + LOGGER.addHandler(logging.FileHandler(logger_config.LOG_PATH)) + +BASIC_FORMATTER = logging.Formatter(logger_config.LOG_PATTERN) + set_formatter(BASIC_FORMATTER) diff --git a/yt/yt/client/table_client/unittests/unversioned_row_ut.cpp b/yt/yt/client/table_client/unittests/unversioned_row_ut.cpp new file mode 100644 index 00000000000..a766846c656 --- /dev/null +++ b/yt/yt/client/table_client/unittests/unversioned_row_ut.cpp @@ -0,0 +1,32 @@ +#include <yt/yt/client/table_client/unversioned_row.h> +#include <yt/yt/client/table_client/row_buffer.h> + +#include <yt/yt/core/test_framework/framework.h> + +namespace NYT::NTableClient { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TUnversionedOwningRowTest, DefaultCtor) +{ + TUnversionedOwningRow owningRow; + ASSERT_EQ(owningRow.GetSpaceUsed(), 0ull); +} + +TEST(TUnversionedOwningRowTest, ConstructFromUnversionedRow) +{ + auto buffer = New<TRowBuffer>(); + TUnversionedRowBuilder rowBuilder; + rowBuilder.AddValue(MakeUnversionedInt64Value(123, 0)); + TUnversionedRow row = rowBuilder.GetRow(); + + TUnversionedOwningRow owningRow(row); + ASSERT_EQ(owningRow.GetCount(), 1); + ASSERT_GT(owningRow.GetSpaceUsed(), 0ull); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT::NTableClient diff --git a/yt/yt/client/table_client/unittests/ya.make b/yt/yt/client/table_client/unittests/ya.make index 911aa88179e..f7594f2b9aa 100644 --- a/yt/yt/client/table_client/unittests/ya.make +++ b/yt/yt/client/table_client/unittests/ya.make @@ -8,6 +8,7 @@ SRCS( columnar_statistics_ut.cpp columnar_ut.cpp serialization_ut.cpp + unversioned_row_ut.cpp ) INCLUDE(${ARCADIA_ROOT}/yt/opensource.inc) diff --git a/yt/yt/client/table_client/unversioned_row.h b/yt/yt/client/table_client/unversioned_row.h index 6ca7db3dabf..e24172331f8 100644 --- a/yt/yt/client/table_client/unversioned_row.h +++ b/yt/yt/client/table_client/unversioned_row.h @@ -772,11 +772,16 @@ public: size_t GetSpaceUsed() const { - return StringData_.GetHolder()->GetTotalByteSize().value_or(StringData_.Size()) + - RowData_.GetHolder()->GetTotalByteSize().value_or(RowData_.Size()); + size_t size = 0; + if (StringData_) { + size += StringData_.GetHolder()->GetTotalByteSize().value_or(StringData_.Size()); + } + if (RowData_) { + size += RowData_.GetHolder()->GetTotalByteSize().value_or(RowData_.Size()); + } + return size; } - friend void swap(TUnversionedOwningRow& lhs, TUnversionedOwningRow& rhs) { using std::swap; diff --git a/yt/yt/core/misc/arithmetic_formula.cpp b/yt/yt/core/misc/arithmetic_formula.cpp index b8a4f733926..6aa84df4656 100644 --- a/yt/yt/core/misc/arithmetic_formula.cpp +++ b/yt/yt/core/misc/arithmetic_formula.cpp @@ -12,6 +12,8 @@ #include <util/generic/hash.h> +#include <algorithm> + namespace NYT { using namespace NYson; @@ -31,11 +33,8 @@ bool IsSymbolAllowedInName(char c, EEvaluationContext context, bool isFirst) if (std::isalpha(c) || c == '_') { return true; } - if (isFirst) { - return false; - } if (std::isdigit(c)) { - return true; + return !isFirst || context == EEvaluationContext::Boolean; } if (context == EEvaluationContext::Boolean && extraAllowedBooleanVariableTokens.contains(c)) { return true; @@ -49,11 +48,16 @@ void ValidateFormulaVariable(const TString& variable, EEvaluationContext context THROW_ERROR_EXCEPTION("Variable should not be empty"); } for (char c : variable) { - if (!IsSymbolAllowedInName(c, context, false)) { + if (!IsSymbolAllowedInName(c, context, /*isFirst*/ false)) { THROW_ERROR_EXCEPTION("Invalid character %Qv in variable %Qv", c, variable); } } - if (!IsSymbolAllowedInName(variable[0], context, true)) { + if (context == EEvaluationContext::Boolean) { + if (std::all_of(variable.begin(), variable.end(), [] (char c) {return std::isdigit(c);})) { + THROW_ERROR_EXCEPTION("All digits characters are prohibited for boolean variable %Qv", variable); + } + } + if (!IsSymbolAllowedInName(variable[0], context, /*isFirst*/ true)) { THROW_ERROR_EXCEPTION("Invalid first character in variable %Qv", variable); } if (variable == "in") { @@ -416,7 +420,7 @@ std::vector<TFormulaToken> TGenericFormulaImpl::Tokenize(const TString& formula, char second = pos + 1 < formula.size() ? formula[pos + 1] : '\0'; if (first == 'i' && second == 'n') { char third = pos + 2 < formula.size() ? formula[pos + 2] : '\0'; - if (IsSymbolAllowedInName(third, context, false)) { + if (IsSymbolAllowedInName(third, context, /*isFirst*/ false)) { return std::nullopt; } else { pos += 2; @@ -523,9 +527,10 @@ std::vector<TFormulaToken> TGenericFormulaImpl::Tokenize(const TString& formula, auto extractVariable = [&] { TString name; - while (pos < formula.size() && IsSymbolAllowedInName(formula[pos], context, name.empty())) { + while (pos < formula.size() && IsSymbolAllowedInName(formula[pos], context, /*isFirst*/ name.empty())) { name += formula[pos++]; } + ValidateFormulaVariable(name, context); return name; }; @@ -564,7 +569,7 @@ std::vector<TFormulaToken> TGenericFormulaImpl::Tokenize(const TString& formula, TFormulaToken token; token.Position = pos; - if (std::isdigit(c) || (c == '-' && !expectBinaryOperator)) { + if (context == EEvaluationContext::Arithmetic && (std::isdigit(c) || (c == '-' && !expectBinaryOperator))) { token.Type = EFormulaTokenType::Number; token.Number = extractNumber(); expectBinaryOperator = true; diff --git a/yt/yt/core/misc/unittests/boolean_formula_ut.cpp b/yt/yt/core/misc/unittests/boolean_formula_ut.cpp index c4faaecca06..0bae531297b 100644 --- a/yt/yt/core/misc/unittests/boolean_formula_ut.cpp +++ b/yt/yt/core/misc/unittests/boolean_formula_ut.cpp @@ -148,8 +148,9 @@ TEST(TBooleanFormulaTest, ValidateVariable) ValidateBooleanFormulaVariable("tablet_common/news-queue"); ValidateBooleanFormulaVariable("VAR123VAR"); ValidateBooleanFormulaVariable("IN"); + ValidateBooleanFormulaVariable("2var"); - EXPECT_THROW(ValidateBooleanFormulaVariable("2var"), TErrorException); + EXPECT_THROW(ValidateBooleanFormulaVariable("2"), TErrorException); EXPECT_THROW(ValidateBooleanFormulaVariable("foo bar"), TErrorException); EXPECT_THROW(ValidateBooleanFormulaVariable(""), TErrorException); EXPECT_THROW(ValidateBooleanFormulaVariable("a+b"), TErrorException); diff --git a/yt/yt/core/rpc/service_detail.cpp b/yt/yt/core/rpc/service_detail.cpp index cce7b8f625d..87093d5295b 100644 --- a/yt/yt/core/rpc/service_detail.cpp +++ b/yt/yt/core/rpc/service_detail.cpp @@ -1532,10 +1532,10 @@ void TRequestQueue::IncrementQueueSize(const TServiceBase::TServiceContextPtr& c void TRequestQueue::DecrementQueueSize(const TServiceBase::TServiceContextPtr& context) { auto newQueueSize = --QueueSize_; - auto oldQueueBytesSize = QueueByteSize_.fetch_sub(GetTotalRequestSize(context)); + auto oldQueueByteSize = QueueByteSize_.fetch_sub(GetTotalRequestSize(context)); YT_ASSERT(newQueueSize >= 0); - YT_ASSERT(oldQueueBytesSize >= 0); + YT_ASSERT(oldQueueByteSize >= 0); } i64 TRequestQueue::GetTotalRequestSize(const TServiceBase::TServiceContextPtr& context) diff --git a/yt/yt/core/ya.make b/yt/yt/core/ya.make index cac3d606353..8d449fdc30c 100644 --- a/yt/yt/core/ya.make +++ b/yt/yt/core/ya.make @@ -251,6 +251,7 @@ SRCS( yson/writer.cpp yson/string_merger.cpp yson/ypath_designated_consumer.cpp + yson/ypath_filtering_consumer.cpp yson/depth_limiting_yson_consumer.cpp yson/list_verb_lazy_yson_consumer.cpp yson/attributes_stripper.cpp diff --git a/yt/yt/core/ypath/tokenizer.cpp b/yt/yt/core/ypath/tokenizer.cpp index 86a2183dc9f..45fa0f40208 100644 --- a/yt/yt/core/ypath/tokenizer.cpp +++ b/yt/yt/core/ypath/tokenizer.cpp @@ -148,7 +148,7 @@ int TTokenizer::ParseHexDigit(char ch, TStringBuf context) YT_ABORT(); } -void TTokenizer::Expect(ETokenType expectedType) +void TTokenizer::Expect(ETokenType expectedType) const { if (expectedType != Type_) { if (Type_ == ETokenType::EndOfStream) { @@ -169,7 +169,7 @@ void TTokenizer::Expect(ETokenType expectedType) } } -void TTokenizer::ExpectListIndex() +void TTokenizer::ExpectListIndex() const { Expect(NYPath::ETokenType::Literal); i64 index; @@ -190,7 +190,7 @@ bool TTokenizer::Skip(ETokenType expectedType) return false; } -void TTokenizer::ThrowUnexpected() +void TTokenizer::ThrowUnexpected() const { if (Type_ == ETokenType::EndOfStream) { if (PreviousType_ == ETokenType::Slash) { diff --git a/yt/yt/core/ypath/tokenizer.h b/yt/yt/core/ypath/tokenizer.h index 27725db10ac..46cf2a058d2 100644 --- a/yt/yt/core/ypath/tokenizer.h +++ b/yt/yt/core/ypath/tokenizer.h @@ -14,6 +14,9 @@ public: TTokenizer(const TTokenizer&) = delete; TTokenizer& operator=(const TTokenizer&) = delete; + TTokenizer(TTokenizer&&) = default; + TTokenizer& operator=(TTokenizer&&) = default; + void Reset(TYPathBuf path); ETokenType Advance(); @@ -27,10 +30,10 @@ public: TStringBuf GetPath() const; const TString& GetLiteralValue() const; - void Expect(ETokenType expectedType); - void ExpectListIndex(); + void Expect(ETokenType expectedType) const; + void ExpectListIndex() const; bool Skip(ETokenType expectedType); - [[noreturn]] void ThrowUnexpected(); + [[noreturn]] void ThrowUnexpected() const; private: TYPathBuf Path_; diff --git a/yt/yt/core/yson/unittests/ya.make b/yt/yt/core/yson/unittests/ya.make index 5bb15cf408a..a153d12ce7a 100644 --- a/yt/yt/core/yson/unittests/ya.make +++ b/yt/yt/core/yson/unittests/ya.make @@ -17,6 +17,7 @@ SRCS( protobuf_yson_schema_ut.cpp protobuf_yson_ut.cpp ypath_designated_yson_consumer_ut.cpp + ypath_filtering_yson_consumer_ut.cpp yson_parser_ut.cpp yson_pull_parser_ut.cpp yson_token_writer_ut.cpp diff --git a/yt/yt/core/yson/unittests/ypath_filtering_yson_consumer_ut.cpp b/yt/yt/core/yson/unittests/ypath_filtering_yson_consumer_ut.cpp new file mode 100644 index 00000000000..3173d603dcb --- /dev/null +++ b/yt/yt/core/yson/unittests/ypath_filtering_yson_consumer_ut.cpp @@ -0,0 +1,499 @@ +#include <yt/yt/core/test_framework/framework.h> + +#include <yt/yt/core/ytree/fluent.h> + +#include <yt/yt/core/yson/ypath_filtering_consumer.h> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +class TYPathFilteringConsumerTest + : public ::testing::Test +{ +public: + TYPathFilteringConsumerTest() + : Output_(ValueString_) + , Consumer_(CreateYsonWriter(&Output_, EYsonFormat::Text, EYsonType::Node, /*enableRaw*/ false)) + { } + + IYsonConsumer* GetConsumer() + { + return Consumer_.get(); + } + + TString FlushOutput() + { + auto result = std::move(ValueString_); + ValueString_.clear(); + return result; + } + +private: + TString ValueString_; + TStringOutput Output_; + std::unique_ptr<IYsonConsumer> Consumer_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +TEST_F(TYPathFilteringConsumerTest, BlacklistMap1) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"key"="value";})"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistMap2) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("forbidden_key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), "{}"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistMap3) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("forbidden_key").Value("value") + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"key"="value";})"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistList) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/1/key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginList() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndList(); + + ASSERT_EQ(FlushOutput(), R"([{"key"="value";};{};{"key"="value";};])"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistListNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/1/key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginList() + .Item() + .BeginList() + .Item().Entity() + .Item().Entity() + .Item().Entity() + .EndList() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .Item() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndList(); + + ASSERT_EQ(FlushOutput(), R"([[#;#;#;];{};{"key"="value";};])"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistAttributes) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/@"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginAttributes() + .Item("some_attr").Entity() + .EndAttributes() + .Entity(); + + ASSERT_EQ(FlushOutput(), R"(#)"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistAttribute) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/@forbidden_attr"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginAttributes() + .Item("forbidden_attr").Entity() + .Item("allowed_attr").Entity() + .EndAttributes() + .Entity(); + + ASSERT_EQ(FlushOutput(), R"(<"allowed_attr"=#;>#)"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistNestedMap) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forbidden_key", "/nested_map/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("forbidden_key").Value("value") + .Item("key").Value("value") + .Item("nested_map") + .BeginMap() + .Item("forbidden_key").Value("value") + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"key"="value";"nested_map"={"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, BlacklistNestedAsterisk) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/*/forbidden_key"}, + EPathFilteringMode::Blacklist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("nested_map1") + .BeginMap() + .Item("key").Value("value") + .Item("forbidden_key").Value("value") + .EndMap() + .Item("nested_map2") + .BeginMap() + .Item("forbidden_key").Value("value") + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"nested_map1"={"key"="value";};"nested_map2"={"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, WhitelistMap1) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {""}, + EPathFilteringMode::Whitelist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"key"="value";})"); +} + +TEST_F(TYPathFilteringConsumerTest, WhitelistMap2) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/allowed_key"}, + EPathFilteringMode::Whitelist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("allowed_key").Value("value") + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"allowed_key"="value";})"); +} + +TEST_F(TYPathFilteringConsumerTest, WhitelistNestedMap) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/allowed_key", "/nested/allowed_key"}, + EPathFilteringMode::Whitelist); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .Item("allowed_key").Value("value") + .Item("nested") + .BeginMap() + .Item("key").Value("value") + .Item("allowed_key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"allowed_key"="value";"nested"={"allowed_key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedSimple) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key"=#;})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"forced_key"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedWhitelistMixedNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"forced_key"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedMixedNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key"}, + EPathFilteringMode::ForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"key"="value";"forced_key"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedAttribute) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/@forced_attr"}, + EPathFilteringMode::ForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginAttributes() + .EndAttributes() + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"=<"forced_attr"=#;>{"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedAttributes1) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/@"}, + EPathFilteringMode::ForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"=<>{"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedAttributes2) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/@forced_key"}, + EPathFilteringMode::ForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"=<"forced_key"=#;>{"key"="value";};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedMultiplePaths) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key1", "/forced_key2"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key1"=#;"forced_key2"=#;})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedMultiplePathsNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key","/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"forced_key"=#;};"forced_key"=#;})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedFilteringSimple) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key"=#;})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedFilteringSimplePrefix) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("forced_key") + .BeginMap() + .Item("inner").Entity() + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key"={"inner"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedFilteringNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/data/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .Item("data") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"data"={"forced_key"=#;};})"); +} + +TEST_F(TYPathFilteringConsumerTest, ForcedFilteringMultiplePathsNested) +{ + auto consumer = CreateYPathFilteringConsumer( + GetConsumer(), + /*paths*/ {"/forced_key", "/data/forced_key"}, + EPathFilteringMode::WhitelistWithForcedEntities); + + NYTree::BuildYsonFluently(consumer.get()) + .BeginMap() + .Item("key").Value("value") + .Item("some_map") + .BeginMap() + .Item("key").Value("value") + .EndMap() + .EndMap(); + + ASSERT_EQ(FlushOutput(), R"({"forced_key"=#;"data"={"forced_key"=#;};})"); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/yt/yt/core/yson/ypath_filtering_consumer.cpp b/yt/yt/core/yson/ypath_filtering_consumer.cpp new file mode 100644 index 00000000000..ec1b610623b --- /dev/null +++ b/yt/yt/core/yson/ypath_filtering_consumer.cpp @@ -0,0 +1,446 @@ +#include "ypath_filtering_consumer.h" + +#include "null_consumer.h" + +#include <yt/yt/core/misc/error.h> + +#include <yt/yt/core/ypath/tokenizer.h> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +struct TAtTokenTag +{ }; + +//////////////////////////////////////////////////////////////////////////////// + +class TYPathFilteringConsumerBase + : public NYson::TYsonConsumerBase +{ +public: + void OnStringScalar(TStringBuf value) override + { + BeforeValue(); + Forward_->OnStringScalar(value); + } + + void OnInt64Scalar(i64 value) override + { + BeforeValue(); + Forward_->OnInt64Scalar(value); + } + + void OnUint64Scalar(ui64 value) override + { + BeforeValue(); + Forward_->OnUint64Scalar(value); + } + + void OnDoubleScalar(double value) override + { + BeforeValue(); + Forward_->OnDoubleScalar(value); + } + + void OnBooleanScalar(bool value) override + { + BeforeValue(); + Forward_->OnBooleanScalar(value); + } + + void OnEntity() override + { + BeforeValue(); + Forward_->OnEntity(); + } + + void OnBeginMap() override + { + BeforeValue(); + AfterCollectionBegin_ = true; + + Forward_->OnBeginMap(); + } + + void OnKeyedItem(TStringBuf key) override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + AfterCollectionBegin_ = false; + AppendPath(key); + + Forward_->OnKeyedItem(key); + } + + void OnEndMap() override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + AfterCollectionBegin_ = false; + BeforeEndDictionary(); + Forward_->OnEndMap(); + } + + void OnBeginList() override + { + BeforeValue(); + AfterCollectionBegin_ = true; + ListIndexes_.push_back(0); + + Forward_->OnBeginList(); + } + + void OnListItem() override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + AfterCollectionBegin_ = false; + AppendPath(ListIndexes_.back()); + ++ListIndexes_.back(); + + Forward_->OnListItem(); + } + + void OnEndList() override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + AfterCollectionBegin_ = false; + ListIndexes_.pop_back(); + + Forward_->OnEndList(); + + } + + void OnBeginAttributes() override + { + AppendPath(TAtTokenTag{}); + + AfterCollectionBegin_ = true; + + Forward_->OnBeginAttributes(); + } + + void OnEndAttributes() override + { + if (!AfterCollectionBegin_) { + RollbackPath(); + } + + BeforeEndDictionary(); + Forward_->OnEndAttributes(); + RollbackPath(); + } + +protected: + virtual void AppendPath(TStringBuf literal) = 0; + virtual void AppendPath(int listIndex) = 0; + virtual void AppendPath(TAtTokenTag) = 0; + + virtual void RollbackPath() = 0; + + virtual void BeforeValue() = 0; + virtual void BeforeEndDictionary() = 0; + + void SetForward(NYson::IYsonConsumer* consumer) + { + Forward_ = consumer; + } + + void ResetForward() + { + Forward_ = GetNullYsonConsumer(); + } + + NYson::IYsonConsumer* GetForward() + { + return Forward_; + } + +private: + NYson::IYsonConsumer* Forward_; + bool AfterCollectionBegin_ = false; + std::vector<int> ListIndexes_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TYPathFilteringConsumer + : public TYPathFilteringConsumerBase +{ +public: + TYPathFilteringConsumer( + NYson::IYsonConsumer* underlying, + std::vector<NYPath::TYPath> paths, + EPathFilteringMode mode) + : Underlying_(underlying) + , Paths_(std::move(paths)) + , Mode_(mode) + { + SetForward(Underlying_); + PerPathFilteringStates_.reserve(Paths_.size()); + for (int i = 0; i < std::ssize(Paths_); ++i) { + PerPathFilteringStates_.push_back(TPathFilteringState{ + .Tokenizer = NYPath::TTokenizer(Paths_[i]), + .MaxMatchedDepth = 0, + .Fulfilled = false, + .Index = i + }); + } + + for (auto& state : PerPathFilteringStates_) { + state.Tokenizer.Expect(NYPath::ETokenType::StartOfStream); + if (!ToNextLiteral(state.Tokenizer)) { + state.Fulfilled = true; + SubtreeFiltering_ = true; + FilteringDepth_ = Depth_; + } + } + } + +private: + struct TPathFilteringState + { + NYPath::TTokenizer Tokenizer; + int MaxMatchedDepth; + bool Fulfilled; + int Index; + }; + + NYson::IYsonConsumer* Underlying_; + const std::vector<NYPath::TYPath> Paths_; + const EPathFilteringMode Mode_; + bool SubtreeFiltering_ = false; + int FilteringDepth_ = -1; + std::vector<TPathFilteringState> PerPathFilteringStates_; + int Depth_ = 0; + + // For asterisk (*) matching support only. + std::vector<TPathFilteringState> SavedFilteringStates_; + + template <typename TTokenType> + void AppendPathImpl(TTokenType token) + { + ++Depth_; + + bool advancedAnyPath = false; + + for (auto& state : PerPathFilteringStates_) { + if (Depth_ != state.MaxMatchedDepth + 1 || state.Fulfilled) { + continue; + } + + if (DoesMatch(state.Tokenizer, token)) { + if (state.Tokenizer.GetType() == NYPath::ETokenType::Asterisk) { + SaveFilteringState(state); + } + + ++state.MaxMatchedDepth; + advancedAnyPath = true; + if (!ToNextLiteral(state.Tokenizer)) { + state.Fulfilled = true; + + if (!SubtreeFiltering_) { + if (IsBlacklistMode()) { + ResetForward(); + SubtreeFiltering_ = true; + FilteringDepth_ = Depth_; + } else if (IsWhitelistMode()) { + SubtreeFiltering_ = true; + FilteringDepth_ = Depth_; + } + } + } + } + } + + if (IsWhitelistMode() && !SubtreeFiltering_) { + if (!advancedAnyPath) { + SubtreeFiltering_ = true; + FilteringDepth_ = Depth_; + ResetForward(); + } + } + } + + void AppendPath(TStringBuf literal) override + { + AppendPathImpl(literal); + } + + void AppendPath(int listIndex) override + { + AppendPathImpl<TStringBuf>(NYPath::ToYPathLiteral(listIndex)); + } + + void AppendPath(TAtTokenTag tag) override + { + AppendPathImpl(tag); + } + + void RollbackPath() override + { + if (SubtreeFiltering_ && Depth_ == FilteringDepth_) { + SubtreeFiltering_ = false; + SetForward(Underlying_); + } + + --Depth_; + + if (!IsForcedEntitiesMode()) { + while (!SavedFilteringStates_.empty() && SavedFilteringStates_.back().MaxMatchedDepth == Depth_) { + PerPathFilteringStates_[SavedFilteringStates_.back().Index] = std::move(SavedFilteringStates_.back()); + SavedFilteringStates_.pop_back(); + } + } + } + + void BeforeEndDictionary() override + { + if (!IsForcedEntitiesMode()) { + return; + } + + for (auto& state : PerPathFilteringStates_) { + if (Depth_ != state.MaxMatchedDepth || state.Fulfilled) { + continue; + } + + auto& tokenizer = state.Tokenizer; + std::vector<bool> isAttributesStack; + tokenizer.Expect(NYPath::ETokenType::Literal); + GetForward()->OnKeyedItem(tokenizer.GetLiteralValue()); + while(ToNextLiteral(tokenizer)) { + if (tokenizer.GetType() == NYPath::ETokenType::At) { + isAttributesStack.push_back(true); + GetForward()->OnBeginAttributes(); + tokenizer.Advance(); + } else { + isAttributesStack.push_back(false); + GetForward()->OnBeginMap(); + } + tokenizer.Expect(NYPath::ETokenType::Literal); + GetForward()->OnKeyedItem(tokenizer.GetLiteralValue()); + } + GetForward()->OnEntity(); + tokenizer.Expect(NYPath::ETokenType::EndOfStream); + while(!isAttributesStack.empty()) { + if (isAttributesStack.back()) { + GetForward()->OnEndAttributes(); + } else { + GetForward()->OnEndMap(); + } + isAttributesStack.pop_back(); + } + + state.Fulfilled = true; + } + } + + void BeforeValue() override + { + for (auto& state : PerPathFilteringStates_) { + if (Depth_ != state.MaxMatchedDepth || state.Fulfilled) { + continue; + } + + if (state.Tokenizer.GetType() == NYPath::ETokenType::At) { + OnBeginAttributes(); + OnEndAttributes(); + return; + } + } + } + + bool ToNextLiteral(NYPath::TTokenizer& tokenizer) + { + if (tokenizer.GetType() == NYPath::ETokenType::At) { + return tokenizer.Advance() != NYPath::ETokenType::EndOfStream; + } + tokenizer.Advance(); + if (tokenizer.GetType() == NYPath::ETokenType::EndOfStream) { + return false; + } else { + tokenizer.Expect(NYPath::ETokenType::Slash); + tokenizer.Advance(); + return true; + } + } + + bool IsBlacklistMode() const + { + return Mode_ == EPathFilteringMode::Blacklist; + } + + bool IsWhitelistMode() const + { + return Mode_ == EPathFilteringMode::Whitelist || Mode_ == EPathFilteringMode::WhitelistWithForcedEntities; + } + + bool IsForcedEntitiesMode() const + { + return Mode_ == EPathFilteringMode::ForcedEntities || Mode_ == EPathFilteringMode::WhitelistWithForcedEntities; + } + + template <typename TTokenType> + bool DoesMatch(const NYPath::TTokenizer& tokenizer, TTokenType) const + { + if (tokenizer.GetType() == NYPath::ETokenType::At) { + return std::is_same_v<TTokenType, TAtTokenTag>; + } + + return false; + } + + template <> + bool DoesMatch<TStringBuf>(const NYPath::TTokenizer& tokenizer, TStringBuf token) const + { + if (tokenizer.GetType() == NYPath::ETokenType::Asterisk) { + THROW_ERROR_EXCEPTION_IF(IsForcedEntitiesMode(), + "YPathFilteringConsumer does not allow asterisk matching in forced entities modes"); + return true; + } + + tokenizer.Expect(NYPath::ETokenType::Literal); + return tokenizer.GetLiteralValue() == token; + } + + void SaveFilteringState(const TPathFilteringState& state) + { + SavedFilteringStates_.push_back(TPathFilteringState{ + .Tokenizer = NYPath::TTokenizer(state.Tokenizer.GetInput()), + .MaxMatchedDepth = state.MaxMatchedDepth, + .Fulfilled = state.Fulfilled, + .Index = state.Index, + }); + + SavedFilteringStates_.back().Tokenizer.Skip(NYPath::ETokenType::StartOfStream); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +std::unique_ptr<NYson::IYsonConsumer> CreateYPathFilteringConsumer( + NYson::IYsonConsumer* underlying, + std::vector<NYPath::TYPath> paths, + EPathFilteringMode mode) +{ + return std::make_unique<TYPathFilteringConsumer>(underlying, std::move(paths), mode); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/yt/yt/core/yson/ypath_filtering_consumer.h b/yt/yt/core/yson/ypath_filtering_consumer.h new file mode 100644 index 00000000000..afbd0c02364 --- /dev/null +++ b/yt/yt/core/yson/ypath_filtering_consumer.h @@ -0,0 +1,41 @@ +#pragma once + +#include <yt/yt/core/ypath/public.h> + +#include <library/cpp/yt/yson/consumer.h> + +namespace NYT::NYson { + +/////////////////////////////////////////////////////////////////////////////// + +DEFINE_ENUM(EPathFilteringMode, + (Blacklist) + (Whitelist) + (WhitelistWithForcedEntities) + (ForcedEntities) +); + +//////////////////////////////////////////////////////////////////////////////// + +// Creates consumer that wraps underlying consumer providing +// filtering and missing value handling support. +// Several filtering modes are supported: +// - In `Blacklist` mode all nodes inside matched paths are omitted. +// - In `Whitelist` mode all nodes outside matched paths are omitted. +// - In `ForcedEntities` mode an entity node is inserted in case +// no value at given path is consumed. +// - The `WhitelistWithForcedEntities` values are consumed only at provided +// paths and entities are inserted in case of missing values. +// Attribute handling is available in all modes. For `ForcedEntities` modes +// `/@` path stands for inserting empty attribute dictionary, `/@ATTRIBUTE_NAME` for +// inserting the certain attribute. +// Asterisk matching is allowed only for `Blacklist` and `Whitelist` modes. +// Only the `EYsonType::Node` consuming is supported. +std::unique_ptr<NYson::IYsonConsumer> CreateYPathFilteringConsumer( + NYson::IYsonConsumer* underlying, + std::vector<NYPath::TYPath> paths, + EPathFilteringMode mode); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/yt/yt/library/profiling/sensor.cpp b/yt/yt/library/profiling/sensor.cpp index 0e37acf218a..beb76248ccd 100644 --- a/yt/yt/library/profiling/sensor.cpp +++ b/yt/yt/library/profiling/sensor.cpp @@ -182,7 +182,7 @@ void TGaugeHistogram::Remove(double value, int count) const noexcept Histogram_->Remove(value, count); } -void TGaugeHistogram::Reset() noexcept +void TGaugeHistogram::Reset() const noexcept { if (!Histogram_) { return; @@ -200,7 +200,7 @@ THistogramSnapshot TGaugeHistogram::GetSnapshot() const return Histogram_->GetSnapshot(false); } -void TGaugeHistogram::LoadSnapshot(THistogramSnapshot snapshot) +void TGaugeHistogram::LoadSnapshot(THistogramSnapshot snapshot) const { if (!Histogram_) { return; diff --git a/yt/yt/library/profiling/sensor.h b/yt/yt/library/profiling/sensor.h index 3d81605ed10..209ac43f415 100644 --- a/yt/yt/library/profiling/sensor.h +++ b/yt/yt/library/profiling/sensor.h @@ -142,10 +142,10 @@ class TGaugeHistogram public: void Add(double value, int count = 1) const noexcept; void Remove(double value, int count = 1) const noexcept; - void Reset() noexcept; + void Reset() const noexcept; THistogramSnapshot GetSnapshot() const; - void LoadSnapshot(THistogramSnapshot snapshot); + void LoadSnapshot(THistogramSnapshot snapshot) const; explicit operator bool() const; |
