summaryrefslogtreecommitdiffstats
path: root/yt/python
diff options
context:
space:
mode:
authoruzhas <[email protected]>2023-11-18 13:15:37 +0300
committeruzhas <[email protected]>2023-11-18 13:33:08 +0300
commit2c2b40cc0e738d6a07d3f713e50f5dc660ef7426 (patch)
tree3f904d3b0409e924810fd80d4f8c30bdd0f9e089 /yt/python
parente5f17f4337ead20a6fd1f4948e3ebe4e75c5b7f6 (diff)
to fix build in github
Diffstat (limited to 'yt/python')
-rw-r--r--yt/python/yt/__init__.py6
-rw-r--r--yt/python/yt/common.py846
-rw-r--r--yt/python/yt/json_wrapper.py24
-rw-r--r--yt/python/yt/logger.py72
-rw-r--r--yt/python/yt/logger_config.py7
-rw-r--r--yt/python/yt/subprocess_wrapper.py19
-rw-r--r--yt/python/yt/type_info/README.md2
-rw-r--r--yt/python/yt/type_info/__init__.py11
-rw-r--r--yt/python/yt/type_info/test/lib/conftest.py30
-rw-r--r--yt/python/yt/type_info/test/lib/helpers.py30
-rw-r--r--yt/python/yt/type_info/test/lib/test_common.py49
-rw-r--r--yt/python/yt/type_info/test/lib/test_helpers.py43
-rw-r--r--yt/python/yt/type_info/test/lib/test_io.py167
-rw-r--r--yt/python/yt/type_info/test/lib/test_typing.py228
-rw-r--r--yt/python/yt/type_info/test/lib/ya.make22
-rw-r--r--yt/python/yt/type_info/test/py2/ya.make7
-rw-r--r--yt/python/yt/type_info/test/py3/ya.make7
-rw-r--r--yt/python/yt/type_info/test/ya.make5
-rw-r--r--yt/python/yt/type_info/type_base.py109
-rw-r--r--yt/python/yt/type_info/typing.py443
-rw-r--r--yt/python/yt/type_info/ya.make19
-rw-r--r--yt/python/yt/ya.make26
-rw-r--r--yt/python/yt/yson/__init__.py86
-rw-r--r--yt/python/yt/yson/common.py69
-rw-r--r--yt/python/yt/yson/convert.py162
-rw-r--r--yt/python/yt/yson/lexer.py392
-rw-r--r--yt/python/yt/yson/parser.py287
-rw-r--r--yt/python/yt/yson/tokenizer.py21
-rw-r--r--yt/python/yt/yson/writer.py442
-rw-r--r--yt/python/yt/yson/ya.make23
-rw-r--r--yt/python/yt/yson/yson_token.py155
-rw-r--r--yt/python/yt/yson/yson_types.py331
32 files changed, 4140 insertions, 0 deletions
diff --git a/yt/python/yt/__init__.py b/yt/python/yt/__init__.py
new file mode 100644
index 00000000000..b7cffee5e3c
--- /dev/null
+++ b/yt/python/yt/__init__.py
@@ -0,0 +1,6 @@
+"""Python interfaces for YT system.
+
+YT is reliable and performant distributed storage and computation platform.
+There are Python interfaces for low level YT client API: binding to C++ Driver API, wrapper for HTTP-interface and
+some pretty "binaries" for interactive usage in command line.
+"""
diff --git a/yt/python/yt/common.py b/yt/python/yt/common.py
new file mode 100644
index 00000000000..b2ee8fb8db0
--- /dev/null
+++ b/yt/python/yt/common.py
@@ -0,0 +1,846 @@
+try:
+ from yt.packages.six import iteritems, PY3, text_type, binary_type, string_types
+ from yt.packages.six.moves import map as imap
+except ImportError:
+ from six import iteritems, PY3, text_type, binary_type, string_types
+ from six.moves import map as imap
+
+import yt.json_wrapper as json
+
+try:
+ from library.python.prctl import prctl
+except ImportError:
+ prctl = None
+
+# Fix for thread unsafety of datetime module.
+# See http://bugs.python.org/issue7980 for more details.
+import _strptime # noqa
+
+# Python3 compatibility
+try:
+ from collections.abc import Mapping
+except ImportError:
+ from collections import Mapping
+from datetime import datetime, timedelta
+from itertools import chain
+from functools import wraps
+
+import calendar
+import copy
+import ctypes
+import errno
+import functools
+import inspect
+import os
+import re
+import signal
+import socket
+import sys
+import time
+import types
+import string
+import warnings
+
+# Standard YT time representation
+
+YT_DATETIME_FORMAT_STRING = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+YT_NULL_TRANSACTION_ID = "0-0-0-0"
+
+
+# Deprecation stuff.
+class YtDeprecationWarning(DeprecationWarning):
+ """Custom warnings category, because built-in category is ignored by default."""
+
+
+warnings.simplefilter("default", category=YtDeprecationWarning)
+
+DEFAULT_DEPRECATION_MESSAGE = "{0} is deprecated and will be removed in the next major release, " \
+ "use {1} instead"
+
+ERROR_TEXT_MATCHING_DEPRECATION_MESSAGE = "Matching errors by their messages using string patterns is highly " \
+ "discouraged. It is recommended to use contains_code(code) method instead. " \
+ "If there is no suitable error code for your needs, ask yt@ for creating one."
+
+
+def declare_deprecated(functional_name, alternative_name, condition=None, message=None):
+ if condition or condition is None:
+ message = get_value(message, DEFAULT_DEPRECATION_MESSAGE.format(functional_name, alternative_name))
+ warnings.warn(message, YtDeprecationWarning)
+
+
+def deprecated_with_message(message):
+ def function_decorator(func):
+ @wraps(func)
+ def deprecated_function(*args, **kwargs):
+ warnings.warn(message, YtDeprecationWarning)
+ return func(*args, **kwargs)
+ return deprecated_function
+ return function_decorator
+
+
+def deprecated(alternative):
+ def function_decorator(func):
+ warn_message = DEFAULT_DEPRECATION_MESSAGE.format(func.__name__, alternative)
+ return deprecated_with_message(warn_message)(func)
+ return function_decorator
+
+
+def get_fqdn():
+ fqdn = socket.getfqdn()
+ if fqdn == "localhost.localdomain":
+ fqdn = "localhost"
+ return fqdn
+
+
+class YtError(Exception):
+ """Base class for all YT errors."""
+ def __init__(self, message="", code=1, inner_errors=None, attributes=None):
+ self.message = message
+ self.code = code
+ self.inner_errors = inner_errors if inner_errors is not None else []
+ self.attributes = attributes if attributes else {}
+ if "host" not in self.attributes:
+ self.attributes["host"] = self._get_fqdn()
+ if "datetime" not in self.attributes:
+ self.attributes["datetime"] = datetime_to_string(datetime.utcnow())
+
+ def simplify(self):
+ """Transforms error (with inner errors) to standard python dict."""
+ result = {"message": self.message, "code": self.code}
+ if self.attributes:
+ result["attributes"] = self.attributes
+ if self.inner_errors:
+ result["inner_errors"] = []
+ for error in self.inner_errors:
+ result["inner_errors"].append(
+ error.simplify() if isinstance(error, YtError) else
+ error)
+ return result
+
+ @classmethod
+ def from_dict(cls, dict_):
+ """Restores YtError instance from standard python dict. Reverses simplify()."""
+ inner_errors = [cls.from_dict(inner) for inner in dict_.get("inner_errors", [])]
+ return cls(message=dict_["message"], code=dict_["code"], attributes=dict_.get("attributes"),
+ inner_errors=inner_errors)
+
+ def find_matching_error(self, code=None, predicate=None):
+ """
+ Find a suberror contained in the error (possibly the error itself) which is either:
+ - having error code equal to `code';
+ - or satisfying custom predicate `predicate'.
+
+ Exactly one condition should be specified.
+
+ Returns either first error matching the condition or None if no matching found.
+ """
+
+ if sum(argument is not None for argument in (code, predicate)) != 1:
+ raise ValueError("Exactly one condition should be specified")
+
+ if code is not None:
+ predicate = lambda error: int(error.code) == code # noqa
+
+ def find_recursive(error):
+ # error may be Python dict; if so, transform it to YtError.
+ if not isinstance(error, YtError):
+ error = YtError(**error)
+
+ if predicate(error):
+ return error
+ for inner_error in error.inner_errors:
+ inner_result = find_recursive(inner_error)
+ if inner_result:
+ return inner_result
+ return None
+
+ return find_recursive(self)
+
+ def contains_code(self, code):
+ """Check if error or one of its inner errors contains specified error code."""
+ return self.find_matching_error(code=code) is not None
+
+ def _contains_text(self, text):
+ """Inner method, do not call explicitly."""
+ return self.find_matching_error(predicate=lambda error: text in error.message) is not None
+
+ @deprecated_with_message(ERROR_TEXT_MATCHING_DEPRECATION_MESSAGE)
+ def contains_text(self, text):
+ """
+ Check if error or one of its inner errors contains specified substring in message.
+
+ It is not recommended to use this helper; consider using contains_code instead.
+ If the error you are seeking is not distinguishable by code, please send a message to yt@
+ and we will fix that.
+ """
+
+ return self._contains_text(text)
+
+ def _matches_regexp(self, pattern):
+ """Inner method, do not call explicitly."""
+ return self.find_matching_error(predicate=lambda error: re.match(pattern, error.message) is not None) is not None
+
+ @deprecated_with_message(ERROR_TEXT_MATCHING_DEPRECATION_MESSAGE)
+ def matches_regexp(self, pattern):
+ """
+ Check if error message or one of its inner error messages matches given regexp.
+
+ It is not recommended to use this helper; consider using contains_code instead.
+ If the error you are seeking is not distinguishable by code, please send a message to yt@
+ and we will fix that.
+ """
+
+ return self._matches_regexp(pattern)
+
+ def __str__(self):
+ return format_error(self)
+
+ def __repr__(self):
+ return "%s(%s)" % (
+ self.__class__.__name__,
+ _pretty_format_messages_flat(self))
+
+ @staticmethod
+ def _get_fqdn():
+ if not hasattr(YtError, "_cached_fqdn"):
+ YtError._cached_fqdn = get_fqdn()
+ return YtError._cached_fqdn
+
+ # Error differentiation methods.
+ def is_retriable_archive_error(self):
+ """
+ Operation progress in Cypress is outdated or some attributes are archive only
+ while archive request failed
+ """
+ return self.contains_code(1911)
+
+ def is_resolve_error(self):
+ """Resolution error."""
+ return self.contains_code(500)
+
+ def is_already_exists(self):
+ """Already exists."""
+ return self.contains_code(501)
+
+ def is_access_denied(self):
+ """Access denied."""
+ return self.contains_code(901)
+
+ def is_account_limit_exceeded(self):
+ """Access denied."""
+ return self.contains_code(902)
+
+ def is_concurrent_transaction_lock_conflict(self):
+ """Deprecated! Transaction lock conflict."""
+ return self.contains_code(402)
+
+ def is_cypress_transaction_lock_conflict(self):
+ """Transaction lock conflict."""
+ return self.contains_code(402)
+
+ def is_tablet_transaction_lock_conflict(self):
+ """Transaction lock conflict."""
+ return self.contains_code(1700)
+
+ @deprecated(alternative='use is_request_queue_size_limit_exceeded')
+ def is_request_rate_limit_exceeded(self):
+ """Request rate limit exceeded."""
+ return self.contains_code(904)
+
+ def is_safe_mode_enabled(self):
+ """Safe mode enabled."""
+ return self.contains_code(906)
+
+ def is_request_queue_size_limit_exceeded(self):
+ """Request rate limit exceeded."""
+ return self.contains_code(108) or self.contains_code(904)
+
+ def is_rpc_unavailable(self):
+ """Rpc unavailable."""
+ return self.contains_code(105)
+
+ def is_master_communication_error(self):
+ """Master communication error."""
+ return self.contains_code(712)
+
+ def is_chunk_unavailable(self):
+ """Chunk unavailable."""
+ return self.contains_code(716)
+
+ def is_request_timed_out(self):
+ """Request timed out."""
+ return self.contains_code(3)
+
+ def is_concurrent_operations_limit_reached(self):
+ """Too many concurrent operations."""
+ return self.contains_code(202)
+
+ def is_master_disconnected(self):
+ """Master disconnected error."""
+ return self.contains_code(218)
+
+ def is_no_such_transaction(self):
+ """No such transaction."""
+ return self.contains_code(11000)
+
+ def is_no_such_job(self):
+ """No such job."""
+ return self.contains_code(203)
+
+ def is_no_such_operation(self):
+ """No such operation."""
+ return self.contains_code(1915)
+
+ def is_shell_exited(self):
+ """Shell exited."""
+ return self.contains_code(1800) or self.contains_code(1801)
+
+ def is_no_such_service(self):
+ """No such service."""
+ return self.contains_code(102)
+
+ def is_transport_error(self):
+ """Transport error."""
+ return self.contains_code(100)
+
+ def is_tablet_in_intermediate_state(self):
+ """Tablet is in intermediate state."""
+ # TODO(ifsmirnov) migrate to error code, YT-10993
+ return self._matches_regexp("Tablet .* is in state .*")
+
+ def is_no_such_tablet(self):
+ """No such tablet."""
+ return self.contains_code(1701)
+
+ def is_tablet_not_mounted(self):
+ """Tablet is not mounted."""
+ return self.contains_code(1702)
+
+ def is_no_such_cell(self):
+ """No such cell."""
+ return self.contains_code(1721)
+
+ def is_all_target_nodes_failed(self):
+ """Failed to write chunk since all target nodes have failed."""
+ return self.contains_code(700)
+
+ def is_no_such_attribute(self, attributes_list=None):
+ """Operation attribute is not supported."""
+ if attributes_list is None:
+ pred_new = lambda err: err.code == 1920 # noqa
+ else:
+ pred_new = lambda err: (err.attributes.get("attribute_name") in attributes_list) and (err.code == 1920) # noqa
+ pred_old = lambda err: ("Attribute" in err.message) and ("is not allowed" in err.message) # noqa
+ # COMPAT: remove old version
+ return self.find_matching_error(predicate=pred_new) or self.find_matching_error(predicate=pred_old)
+
+ def is_row_is_blocked(self):
+ """Row is blocked"""
+ return self.contains_code(1712)
+
+ def is_blocked_row_wait_timeout(self):
+ """Timed out waiting on blocked row"""
+ return self.contains_code(1713)
+
+ def is_chunk_not_preloaded(self):
+ """Chunk data is not preloaded yet"""
+ return self.contains_code(1735)
+
+ def is_already_present_in_group(self):
+ """Member is already present in group"""
+ return self.contains_code(908)
+
+
+class YtResponseError(YtError):
+ """Represents an error in YT response."""
+ def __init__(self, underlying_error):
+ super(YtResponseError, self).__init__()
+ self.message = "Received response with error"
+ self._underlying_error = underlying_error
+ self.inner_errors = [self._underlying_error]
+
+ # Common response error properties.
+ @property
+ def params(self):
+ return self.attributes.get("params")
+
+ # HTTP response interface.
+ @property
+ def url(self):
+ """ Returns url for HTTP response error"""
+ return self.attributes.get("url")
+
+ @property
+ def headers(self):
+ """ COMPAT: Returns request headers for HTTP response error"""
+ return self.attributes.get("request_headers")
+
+ @property
+ def error(self):
+ """ COMPAT: Returns underlying error"""
+ return self._underlying_error
+
+ @property
+ def request_headers(self):
+ """ Returns request headers for HTTP response error"""
+ return self.attributes.get("request_headers")
+
+ @property
+ def response_headers(self):
+ """ Returns response headers for HTTP response error"""
+ return self.attributes.get("response_headers")
+
+ def __reduce__(self):
+ return (_reconstruct_yt_response_error, (type(self), self.message, self.attributes, self._underlying_error, self.inner_errors))
+
+
+def _reconstruct_yt_response_error(error_class, message, attributes, underlying_error, inner_errors):
+ error = error_class(underlying_error)
+ error.message = message
+ error.inner_errors = inner_errors
+ error.attributes = attributes
+ return error
+
+
+class PrettyPrintableDict(dict):
+ pass
+
+
+def _pretty_format_escape(value):
+ def escape(char):
+ if char in string.printable:
+ return char
+ return "\\x{0:02x}".format(ord(char))
+ value = value.replace("\n", "\\n").replace("\t", "\\t")
+ try:
+ value.encode("utf-8")
+ return value
+ except UnicodeDecodeError:
+ return "".join(imap(escape, value))
+
+
+def _pretty_format_attribute(name, value, attribute_length_limit):
+ name = to_native_str(name)
+ if isinstance(value, PrettyPrintableDict):
+ value = json.dumps(value, indent=2)
+ value = value.replace("\n", "\n" + " " * (15 + 1 + 4))
+ else:
+ if isinstance(value, string_types):
+ value = to_native_str(value)
+ else:
+ value = str(value)
+ value = _pretty_format_escape(value)
+ if attribute_length_limit is not None and len(value) > attribute_length_limit:
+ value = value[:attribute_length_limit] + "...message truncated..."
+ return " " * 4 + "%-15s %s" % (name, value)
+
+
+def _pretty_simplify_error(error):
+ if isinstance(error, YtError):
+ error = error.simplify()
+ elif isinstance(error, (Exception, KeyboardInterrupt)):
+ error = {"code": 1, "message": str(error)}
+ return error
+
+
+def _pretty_extract_messages(error, depth=0):
+ """
+ YtError -> [(depth: int, message: str), ...], in tree order.
+ """
+ error = _pretty_simplify_error(error)
+
+ if not error.get("attributes", {}).get("transparent", False):
+ yield (depth, to_native_str(error["message"]))
+ depth += 1
+
+ for inner_error in error.get("inner_errors", []):
+ for subitem in _pretty_extract_messages(inner_error, depth=depth):
+ yield subitem
+
+
+def _pretty_format_messages_flat(error):
+ prev_depth = 0
+ result = []
+ for depth, message in _pretty_extract_messages(error):
+ if depth > prev_depth:
+ result.append(" ")
+ result.append("(" * (depth - prev_depth))
+ elif prev_depth > depth:
+ result.append(")" * (prev_depth - depth))
+ elif result:
+ result.append(", ")
+ result.append(repr(message))
+ prev_depth = depth
+
+ result.append(")" * prev_depth)
+ return "".join(result)
+
+
+def _pretty_format_messages(error, indent=0, indent_step=4):
+ result = []
+ for depth, message in _pretty_extract_messages(error):
+ result.append("{indent}{message}".format(
+ indent=" " * (indent + depth * indent_step),
+ message=message))
+
+ return "\n".join(result)
+
+
+def _pretty_format_full_errors(error, attribute_length_limit):
+ error = _pretty_simplify_error(error)
+
+ lines = []
+ if "message" in error:
+ lines.append(to_native_str(error["message"]))
+
+ if "code" in error and int(error["code"]) != 1:
+ lines.append(_pretty_format_attribute(
+ "code", error["code"], attribute_length_limit=attribute_length_limit))
+
+ attributes = error.get("attributes", {})
+
+ origin_keys = ["host", "datetime"]
+ origin_cpp_keys = ["pid", "tid", "fid"]
+ if all(key in attributes for key in origin_keys):
+ date = attributes["datetime"]
+ if isinstance(date, datetime):
+ date = date.strftime("%y-%m-%dT%H:%M:%S.%fZ")
+ value = "{0} on {1}".format(attributes["host"], date)
+ if all(key in attributes for key in origin_cpp_keys):
+ value += " (pid %(pid)d, tid %(tid)x, fid %(fid)x)" % attributes
+ lines.append(_pretty_format_attribute(
+ "origin", value, attribute_length_limit=attribute_length_limit))
+
+ location_keys = ["file", "line"]
+ if all(key in attributes for key in location_keys):
+ lines.append(_pretty_format_attribute(
+ "location",
+ "%(file)s:%(line)d" % attributes,
+ attribute_length_limit=attribute_length_limit))
+
+ for key, value in iteritems(attributes):
+ if key in origin_keys or key in location_keys or key in origin_cpp_keys:
+ continue
+ lines.append(_pretty_format_attribute(
+ key, value, attribute_length_limit=attribute_length_limit))
+
+ result = (" " * 4 + "\n").join(lines)
+ if "inner_errors" in error:
+ for inner_error in error["inner_errors"]:
+ result += "\n" + _pretty_format_full_errors(
+ inner_error, attribute_length_limit=attribute_length_limit)
+
+ return result
+
+
+def _pretty_format(error, attribute_length_limit=None):
+ return "{}\n\n***** Details:\n{}\n".format(
+ _pretty_format_messages(error),
+ _pretty_format_full_errors(error, attribute_length_limit=attribute_length_limit))
+
+
+def _pretty_format_fake(error, attribute_length_limit=None):
+ return _pretty_format(error, attribute_length_limit)
+
+
+def _pretty_format_for_logging(error, attribute_length_limit=None):
+ return _pretty_format_full_errors(error, attribute_length_limit=attribute_length_limit).replace("\n", "\\n")
+
+
+def format_error(error, attribute_length_limit=300):
+ return _pretty_format(error, attribute_length_limit)
+
+
+def join_exceptions(*args):
+ result = []
+ for exception in args:
+ if isinstance(exception, tuple):
+ result += exception
+ else:
+ result.append(exception)
+ return tuple(result)
+
+
+def which(name, flags=os.X_OK, custom_paths=None):
+ """Return list of files in system paths with given name."""
+ # TODO: check behavior when dealing with symlinks
+ result = []
+ paths = os.environ.get("PATH", "").split(os.pathsep)
+ if custom_paths is not None:
+ paths = custom_paths + paths
+ for dir in paths:
+ path = os.path.join(dir, name)
+ if os.access(path, flags):
+ result.append(path)
+ return result
+
+
+def unlist(list):
+ try:
+ return list[0] if len(list) == 1 else list
+ except TypeError: # cannot calculate len
+ return list
+
+
+def require(condition, exception_func):
+ if not condition:
+ raise exception_func()
+
+
+def update_inplace(object, patch):
+ """Apply patch to object inplace"""
+ if isinstance(patch, Mapping) and isinstance(object, Mapping):
+ for key, value in iteritems(patch):
+ if key in object:
+ object[key] = update_inplace(object[key], value)
+ else:
+ object[key] = value
+ elif isinstance(patch, list) and isinstance(object, list):
+ for index, value in enumerate(patch):
+ if index < len(object):
+ object[index] = update_inplace(object[index], value)
+ else:
+ object.append(value)
+ else:
+ object = patch
+ return object
+
+
+def update(object, patch):
+ """Apply patch to object without modifying original object or patch"""
+ if patch is None:
+ return copy.deepcopy(object)
+ elif object is None:
+ return copy.deepcopy(patch)
+ else:
+ return update_inplace(copy.deepcopy(object), patch)
+
+
+def flatten(obj, list_types=(list, tuple, set, frozenset, types.GeneratorType)):
+ """Create flat list from all elements."""
+ if isinstance(obj, list_types):
+ return list(chain(*imap(flatten, obj)))
+ return [obj]
+
+
+def update_from_env(variables):
+ """Update variables dict from environment."""
+ for key, value in iteritems(os.environ):
+ prefix = "YT_"
+ if not key.startswith(prefix):
+ continue
+
+ key = key[len(prefix):]
+ if key not in variables:
+ continue
+
+ var_type = type(variables[key])
+ # Using int we treat "0" as false, "1" as "true"
+ if var_type == bool:
+ try:
+ value = int(value)
+ except: # noqa
+ pass
+ # None type is treated as str
+ if isinstance(None, var_type):
+ var_type = str
+
+ variables[key] = var_type(value)
+
+
+def get_value(value, default):
+ if value is None:
+ return default
+ else:
+ return value
+
+
+def filter_dict(predicate, dictionary):
+ return dict([(k, v) for (k, v) in iteritems(dictionary) if predicate(k, v)])
+
+
+def set_pdeathsig(signum=None):
+ if sys.platform.startswith("linux"):
+ if signum is None:
+ signum = signal.SIGTERM
+ if prctl:
+ prctl.set_pdeathsig(signum)
+ else:
+ ctypes.cdll.LoadLibrary("libc.so.6")
+ libc = ctypes.CDLL("libc.so.6")
+ PR_SET_PDEATHSIG = 1
+ libc.prctl(PR_SET_PDEATHSIG, signum)
+
+
+def remove_file(path, force=False):
+ try:
+ os.remove(path)
+ except OSError:
+ if not force:
+ raise
+
+
+def makedirp(path):
+ try:
+ os.makedirs(path)
+ except OSError as err:
+ if err.errno != errno.EEXIST:
+ raise
+
+
+def touch(path):
+ if not os.path.exists(path):
+ makedirp(os.path.dirname(path))
+ with open(path, "w"):
+ pass
+
+
+def date_string_to_datetime(date):
+ return datetime.strptime(date, YT_DATETIME_FORMAT_STRING)
+
+
+def date_string_to_timestamp(date):
+ return calendar.timegm(date_string_to_datetime(date).timetuple())
+
+
+def date_string_to_timestamp_mcs(time_str):
+ dt = date_string_to_datetime(time_str)
+ return int(calendar.timegm(dt.timetuple()) * (10 ** 6) + dt.microsecond)
+
+
+def datetime_to_string(date, is_local=False):
+ if is_local:
+ date = datetime.utcfromtimestamp(time.mktime(date.timetuple()))
+ return date.strftime(YT_DATETIME_FORMAT_STRING)
+
+
+def make_non_blocking(fd):
+ # Use local import to support Windows.
+ import fcntl
+ flags = fcntl.fcntl(fd, fcntl.F_GETFL)
+ fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)
+
+
+def to_native_str(string, encoding="utf-8", errors="strict"):
+ if not PY3 and isinstance(string, text_type):
+ return string.encode(encoding)
+ if PY3 and isinstance(string, binary_type):
+ return string.decode(encoding, errors=errors)
+ return string
+
+
+def copy_docstring_from(documented_function):
+ """Decorator that copies docstring from one function to another.
+
+ :param documented_function: function that provides docstring.
+
+ Usage::
+
+ def foo(...):
+ "USEFUL DOCUMENTATION"
+ ...
+
+ @copy_docstring_from(foo)
+ def bar(...)
+ # docstring will be copied from `foo' function
+ ...
+ """
+ return functools.wraps(documented_function, assigned=("__doc__",), updated=())
+
+
+def is_process_alive(pid):
+ try:
+ os.kill(pid, 0)
+ except OSError as err:
+ if err.errno == errno.ESRCH:
+ return False
+ elif err.errno == errno.EPERM:
+ return True
+ else:
+ # According to "man 2 kill" possible error values are
+ # (EINVAL, EPERM, ESRCH)
+ raise
+ return True
+
+
+def uuid_to_parts(guid):
+ id_parts = guid.split("-")
+ id_hi = int(id_parts[2], 16) << 32 | int(id_parts[3], 16)
+ id_lo = int(id_parts[0], 16) << 32 | int(id_parts[1], 16)
+ return id_hi, id_lo
+
+
+def parts_to_uuid(id_hi, id_lo):
+ guid = id_lo << 64 | id_hi
+ mask = 0xFFFFFFFF
+
+ parts = []
+ for i in range(4):
+ parts.append((guid & mask) >> (i * 32))
+ mask <<= 32
+
+ return "-".join(reversed(["{:x}".format(part) for part in parts]))
+
+
+# TODO(asaitgalin): Remove copy-paste from YP.
+def underscore_case_to_camel_case(str):
+ result = []
+ first = True
+ upper = True
+ for c in str:
+ if c == "_":
+ upper = True
+ else:
+ if upper:
+ if c not in string.ascii_letters and not first:
+ result.append("_")
+ c = c.upper()
+ result.append(c)
+ upper = False
+ first = False
+ return "".join(result)
+
+
+class WaitFailed(Exception):
+ pass
+
+
+def wait(predicate, error_message=None, iter=None, sleep_backoff=None, timeout=None, ignore_exceptions=False):
+ # 30 seconds by default
+ if sleep_backoff is None:
+ sleep_backoff = 0.3
+
+ if ignore_exceptions:
+ def check_predicate():
+ try:
+ return predicate()
+ # Do not catch BaseException because pytest exceptions are inherited from it
+ # pytest.fail raises exception inherited from BaseException.
+ except Exception:
+ return False
+ else:
+ check_predicate = predicate
+
+ if timeout is None:
+ if iter is None:
+ iter = 100
+ index = 0
+ while index < iter:
+ if check_predicate():
+ return
+ index += 1
+ time.sleep(sleep_backoff)
+ else:
+ start_time = datetime.now()
+ while datetime.now() - start_time < timedelta(seconds=timeout):
+ if check_predicate():
+ return
+ time.sleep(sleep_backoff)
+
+ if inspect.isfunction(error_message):
+ error_message = error_message()
+ if error_message is None:
+ error_message = "Wait failed"
+ error_message += " (timeout = {0})".format(timeout if timeout is not None else iter * sleep_backoff)
+ raise WaitFailed(error_message)
diff --git a/yt/python/yt/json_wrapper.py b/yt/python/yt/json_wrapper.py
new file mode 100644
index 00000000000..2e18c98ca9a
--- /dev/null
+++ b/yt/python/yt/json_wrapper.py
@@ -0,0 +1,24 @@
+try:
+ from yt.packages.six import iteritems, text_type
+except ImportError:
+ from six import iteritems, text_type
+
+try:
+ from simplejson import load, dump, loads, dumps, JSONDecodeError # noqa
+except ImportError:
+ # This version of simplejson has no compiled speedup module.
+ from yt.packages.simplejson import load, dump, loads, dumps, JSONDecodeError # noqa
+
+
+def loads_as_bytes(*args, **kwargs):
+ def encode(value):
+ if isinstance(value, dict):
+ return dict([(encode(k), encode(v)) for k, v in iteritems(value)])
+ elif isinstance(value, list):
+ return [encode(item) for item in value]
+ elif isinstance(value, text_type):
+ return value.encode("utf-8")
+ else:
+ return value
+
+ return encode(loads(*args, **kwargs))
diff --git a/yt/python/yt/logger.py b/yt/python/yt/logger.py
new file mode 100644
index 00000000000..c885fa9f524
--- /dev/null
+++ b/yt/python/yt/logger.py
@@ -0,0 +1,72 @@
+from . import logger_config
+
+try:
+ import yatest.common as yatest_common
+except ImportError:
+ yatest_common = None
+
+import logging
+
+
+def set_log_level_from_config(logger):
+ if not logger_config.LOG_LEVEL:
+ logger.setLevel(level=logging.__dict__["INFO"])
+ else:
+ if logger_config.LOG_LEVEL.upper() == "NOTSET":
+ raise Exception("LOG_LEVEL couldn't be defined as NOTSET")
+ # Intentionally override trace with debug for compatibility with C++ logging library.
+ if logger_config.LOG_LEVEL.upper() == "TRACE":
+ logger_config.LOG_LEVEL = "DEBUG"
+ logger.setLevel(level=logging.__dict__[logger_config.LOG_LEVEL.upper()])
+
+
+logging.getLogger("yt.packages.requests.packages.urllib3").setLevel(logging.WARNING)
+
+LOGGER = logging.getLogger("Yt")
+
+LOGGER.propagate = False
+
+set_log_level_from_config(LOGGER)
+
+if logger_config.LOG_PATH is None:
+ LOGGER.addHandler(logging.StreamHandler())
+else:
+ LOGGER.addHandler(logging.FileHandler(logger_config.LOG_PATH))
+
+BASIC_FORMATTER = logging.Formatter(logger_config.LOG_PATTERN)
+
+formatter = None
+
+
+def set_formatter(new_formatter):
+ global formatter
+ formatter = new_formatter
+ for handler in LOGGER.handlers:
+ handler.setFormatter(new_formatter)
+
+
+set_formatter(BASIC_FORMATTER)
+
+
+def debug(msg, *args, **kwargs):
+ LOGGER.debug(msg, *args, **kwargs)
+
+
+def info(msg, *args, **kwargs):
+ LOGGER.info(msg, *args, **kwargs)
+
+
+def warning(msg, *args, **kwargs):
+ LOGGER.warning(msg, *args, **kwargs)
+
+
+def error(msg, *args, **kwargs):
+ LOGGER.error(msg, *args, **kwargs)
+
+
+def exception(msg, *args, **kwargs):
+ LOGGER.exception(msg, *args, **kwargs)
+
+
+def log(level, msg, *args, **kwargs):
+ LOGGER.log(level, msg, *args, **kwargs)
diff --git a/yt/python/yt/logger_config.py b/yt/python/yt/logger_config.py
new file mode 100644
index 00000000000..45d1bdd1a7c
--- /dev/null
+++ b/yt/python/yt/logger_config.py
@@ -0,0 +1,7 @@
+from .common import update_from_env
+
+LOG_LEVEL = None
+LOG_PATTERN = "%(asctime)-15s\t%(levelname)s\t%(message)s"
+LOG_PATH = None
+
+update_from_env(globals())
diff --git a/yt/python/yt/subprocess_wrapper.py b/yt/python/yt/subprocess_wrapper.py
new file mode 100644
index 00000000000..b766cb11fe5
--- /dev/null
+++ b/yt/python/yt/subprocess_wrapper.py
@@ -0,0 +1,19 @@
+from yt.common import to_native_str
+
+try:
+ import subprocess32 as subprocess
+except ImportError:
+ import subprocess
+
+Popen = subprocess.Popen
+PIPE = subprocess.PIPE
+STDOUT = subprocess.STDOUT
+CalledProcessError = subprocess.CalledProcessError
+
+
+def check_call(*args, **kwargs):
+ return subprocess.check_call(*args, **kwargs)
+
+
+def check_output(*args, **kwargs):
+ return to_native_str(subprocess.check_output(*args, **kwargs))
diff --git a/yt/python/yt/type_info/README.md b/yt/python/yt/type_info/README.md
new file mode 100644
index 00000000000..65be5d6a5b5
--- /dev/null
+++ b/yt/python/yt/type_info/README.md
@@ -0,0 +1,2 @@
+# Description
+Python library for representing types of Common Yandex Typesystem (complementary to C++ [library/type_info](https://a.yandex-team.ru/arc/trunk/arcadia/library/cpp/type_info)).
diff --git a/yt/python/yt/type_info/__init__.py b/yt/python/yt/type_info/__init__.py
new file mode 100644
index 00000000000..aa7268843c5
--- /dev/null
+++ b/yt/python/yt/type_info/__init__.py
@@ -0,0 +1,11 @@
+from .type_base import ( # noqa
+ Type, is_valid_type, validate_type,
+)
+
+from .typing import ( # noqa
+ Bool, Int8, Uint8, Int16, Uint16, Int32, Uint32, Int64, Uint64, Float,
+ Double, String, Utf8, Yson, Json, Uuid, Date, Datetime, Timestamp,
+ Interval, TzDate, TzDatetime, TzTimestamp, Void, Null, Optional, List,
+ Tuple, Dict, Struct, Variant, Tagged, Decimal, EmptyTuple, EmptyStruct,
+ serialize_yson, deserialize_yson,
+)
diff --git a/yt/python/yt/type_info/test/lib/conftest.py b/yt/python/yt/type_info/test/lib/conftest.py
new file mode 100644
index 00000000000..4fe86467afd
--- /dev/null
+++ b/yt/python/yt/type_info/test/lib/conftest.py
@@ -0,0 +1,30 @@
+import cyson
+
+import six
+
+import sys
+
+
+# Emulate module `yt.yson` in tests to get rid of the dependency on yt/python
+class FakeYsonModule:
+ YsonError = ValueError
+
+ @staticmethod
+ def loads(yson):
+ reader = cyson.Reader if six.text_type not in six.string_types else cyson.UnicodeReader
+ return cyson.loads(yson, Reader=reader)
+
+ @staticmethod
+ def dumps(yson, yson_format="text"):
+ if hasattr(yson, "to_yson_type"):
+ yson = yson.to_yson_type()
+
+ return cyson.dumps(yson, format=yson_format)
+
+
+class FakeYtModule:
+ yson = FakeYsonModule
+
+
+sys.modules["yt"] = FakeYtModule
+sys.modules["yt.yson"] = FakeYtModule.yson
diff --git a/yt/python/yt/type_info/test/lib/helpers.py b/yt/python/yt/type_info/test/lib/helpers.py
new file mode 100644
index 00000000000..1f4c54a14fb
--- /dev/null
+++ b/yt/python/yt/type_info/test/lib/helpers.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+NO_ARGUMENT_TYPES = [
+ "Bool",
+ "Int8",
+ "Uint8",
+ "Int16",
+ "Uint16",
+ "Int32",
+ "Uint32",
+ "Int64",
+ "Uint64",
+ "Float",
+ "Double",
+ "String",
+ "Utf8",
+ "Yson",
+ "Json",
+ "Uuid",
+ "Date",
+ "Datetime",
+ "Timestamp",
+ "Interval",
+ "TzDate",
+ "TzDatetime",
+ "TzTimestamp",
+
+ "Void",
+ "Null",
+]
diff --git a/yt/python/yt/type_info/test/lib/test_common.py b/yt/python/yt/type_info/test/lib/test_common.py
new file mode 100644
index 00000000000..e4b7e7a585c
--- /dev/null
+++ b/yt/python/yt/type_info/test/lib/test_common.py
@@ -0,0 +1,49 @@
+import yt.type_info as ti
+
+import pytest
+import pkgutil
+
+
+def load_and_parse(path):
+ testdata = pkgutil.get_data(__package__, path)
+ testlines = [line for line in testdata.split(b"\n") if not line.startswith(b"#")]
+ tests = (b"\n".join(testlines)).split(b";;")
+
+ parsed_tests = [tuple(test_arg.strip() for test_arg in test.split(b"::"))
+ for test in tests]
+
+ return [test for test in parsed_tests if any(test)]
+
+
+def pytest_generate_tests(metafunc):
+ if metafunc.cls is not TestCommon:
+ return
+
+ TEST_DATA_PATH_PREFIX = "library/cpp/type_info/ut/test-data"
+
+ if "test_good" in metafunc.function.__name__:
+ tests = load_and_parse(TEST_DATA_PATH_PREFIX + "/good-types.txt")
+ argnames = ["yson_type", "string_type"]
+ else:
+ tests = load_and_parse(TEST_DATA_PATH_PREFIX + "/bad-types.txt")
+ argnames = ["yson_type", "error_text"]
+
+ argvalues = [tuple(test[:len(argnames)]) for test in tests]
+ ids = [str(id) for id in range(1, len(tests) + 1)]
+
+ metafunc.parametrize(argnames, argvalues, ids=ids)
+
+
+class TestCommon:
+ def test_good(self, yson_type, string_type):
+ py_type = ti.deserialize_yson(yson_type)
+
+ assert str(py_type) == string_type.decode("utf-8")
+
+ yson_type2 = ti.serialize_yson(py_type, human_readable=True)
+ py_type2 = ti.deserialize_yson(yson_type2)
+ assert py_type == py_type2
+
+ def test_bad(self, yson_type, error_text):
+ with pytest.raises(ValueError, match=error_text.decode("utf-8")):
+ ti.deserialize_yson(yson_type)
diff --git a/yt/python/yt/type_info/test/lib/test_helpers.py b/yt/python/yt/type_info/test/lib/test_helpers.py
new file mode 100644
index 00000000000..86bb85cabdc
--- /dev/null
+++ b/yt/python/yt/type_info/test/lib/test_helpers.py
@@ -0,0 +1,43 @@
+import pytest
+
+import yt.type_info.typing as typing
+
+from yt.type_info.type_base import (
+ is_valid_type, make_primitive_type, validate_type, quote_string, Type
+)
+
+
+def test_valid_type():
+ assert is_valid_type(Type)
+ assert not is_valid_type(int)
+ assert not is_valid_type(3)
+ assert is_valid_type(typing.List[typing.Int8])
+
+
+def test_check_type():
+ validate_type(Type)
+ validate_type(typing.Int64)
+ with pytest.raises(ValueError):
+ validate_type(int)
+ with pytest.raises(ValueError):
+ validate_type(3)
+ validate_type(typing.List[typing.Int8])
+
+
+def test_quoute_string():
+ assert quote_string("a'b'c'd") == "'a\\'b\\'c\\'d'"
+ assert quote_string("abc") == "'abc'"
+ assert quote_string("") == "''"
+ assert quote_string("\\bcd\\") == "'\\\\bcd\\\\'"
+ assert quote_string("\\b'c'd\\") == "'\\\\b\\'c\\'d\\\\'"
+ assert quote_string(u"хэ\\л'ло") == u"'хэ\\\\л\\'ло'"
+
+
+def test_make_primitive_type():
+ MyType = make_primitive_type("MyTypeName")
+ assert is_valid_type(MyType)
+ assert str(MyType) == "MyTypeName"
+ assert MyType.name == "MyTypeName"
+ # Check no exception is thrown
+ ListOfMyType = typing.List[MyType]
+ assert is_valid_type(ListOfMyType)
diff --git a/yt/python/yt/type_info/test/lib/test_io.py b/yt/python/yt/type_info/test/lib/test_io.py
new file mode 100644
index 00000000000..1b6cca89977
--- /dev/null
+++ b/yt/python/yt/type_info/test/lib/test_io.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+
+from . import helpers
+import yt.type_info.typing as typing
+from yt.type_info import is_valid_type, serialize_yson, deserialize_yson
+
+from yt import yson
+
+import pytest
+import six
+
+
+def to_snake_case(s):
+ res = []
+ first = True
+ for c in s:
+ if c.isupper():
+ if first:
+ res.append(c.lower())
+ else:
+ res += ["_", c.lower()]
+ else:
+ res.append(c)
+ first = False
+ return "".join(res)
+
+
[email protected]("human_readable", [True, False])
+def test_primitive_types(human_readable):
+ for type_name in helpers.NO_ARGUMENT_TYPES:
+ assert hasattr(typing, type_name)
+ type_ = getattr(typing, type_name)
+ serialized = serialize_yson(type_, human_readable=human_readable)
+ assert deserialize_yson(serialized) == type_
+ obj = yson.loads(serialized)
+ assert obj == to_snake_case(type_name)
+
+
[email protected]("human_readable", [True, False])
+def test_compound_types(human_readable):
+ def check(type_, expected_obj):
+ serialized = serialize_yson(type_, human_readable=human_readable)
+ deserialized = deserialize_yson(serialized)
+ assert is_valid_type(deserialized)
+ obj = yson.loads(serialized)
+ assert obj == expected_obj
+ assert deserialized == type_
+
+ check(typing.Optional[typing.Int32], {
+ "type_name": "optional",
+ "item": "int32",
+ })
+
+ check(typing.Dict[typing.Double, typing.Bool], {
+ "type_name": "dict",
+ "key": "double",
+ "value": "bool",
+ })
+
+ check(typing.Tuple[typing.Double, typing.Uuid, typing.Json], {
+ "type_name": "tuple",
+ "elements": [
+ {"type": "double"},
+ {"type": "uuid"},
+ {"type": "json"},
+ ],
+ })
+
+ check(typing.EmptyTuple, {"type_name": "tuple", "elements": []})
+
+ if six.PY2:
+ russian_name = u"ой".encode("utf-8")
+ else:
+ russian_name = u"ой"
+
+ check(
+ typing.Struct[
+ "a": typing.Uint8,
+ "b": typing.Yson,
+ u"ой": typing.Uint64,
+ ],
+ {
+ "type_name": "struct",
+ "members": [
+ {"name": "a", "type": "uint8"},
+ {"name": "b", "type": "yson"},
+ {"name": russian_name, "type": "uint64"},
+ ],
+ },
+ )
+
+ check(typing.EmptyStruct, {"type_name": "struct", "members": []})
+
+ variant_struct = typing.Variant[
+ "to_be": typing.Null,
+ "not_to_be": typing.Void,
+ ]
+ check(variant_struct, {
+ "type_name": "variant",
+ "members": [
+ {"name": "to_be", "type": "null"},
+ {"name": "not_to_be", "type": "void"},
+ ],
+ })
+
+ check(typing.Variant[typing.Uint8, typing.Int8], {
+ "type_name": "variant",
+ "elements": [
+ {"type": "uint8"},
+ {"type": "int8"},
+ ],
+ })
+
+ check(typing.Tagged[variant_struct, "my_tag"], {
+ "type_name": "tagged",
+ "tag": "my_tag",
+ "item": {
+ "type_name": "variant",
+ "members": [
+ {"name": "to_be", "type": "null"},
+ {"name": "not_to_be", "type": "void"},
+ ],
+ },
+ })
+
+ if six.PY2:
+ russian_tag = u"мой_тэг".encode("utf-8")
+ else:
+ russian_tag = u"мой_тэг"
+ check(typing.Tagged[typing.String, u"мой_тэг"], {
+ "type_name": "tagged",
+ "tag": russian_tag,
+ "item": "string",
+ })
+
+ check(typing.Decimal[5, 3], {
+ "type_name": "decimal",
+ "precision": 5,
+ "scale": 3,
+ })
+
+ check(typing.Decimal(5, 3), {
+ "type_name": "decimal",
+ "precision": 5,
+ "scale": 3,
+ })
+
+
+def test_errors():
+ with pytest.raises(TypeError):
+ serialize_yson("I'm not a type")
+
+ # Malformed YSON.
+ with pytest.raises(ValueError):
+ deserialize_yson("}}O_o{{")
+
+ # Nonexistent type.
+ with pytest.raises(ValueError):
+ deserialize_yson("{type_name=\"I actually dont exist\"}")
+
+ # More subtle problem with types.
+ with pytest.raises(ValueError):
+ deserialize_yson("""{
+ type_name=decimal;
+ precision=10;
+ scale=10.0;
+ }""")
diff --git a/yt/python/yt/type_info/test/lib/test_typing.py b/yt/python/yt/type_info/test/lib/test_typing.py
new file mode 100644
index 00000000000..34d7ece2aac
--- /dev/null
+++ b/yt/python/yt/type_info/test/lib/test_typing.py
@@ -0,0 +1,228 @@
+from . import helpers
+import yt.type_info.typing as typing
+from yt.type_info import is_valid_type
+
+import pytest
+import six
+
+
+def test_primitive_types():
+ for type_name in helpers.NO_ARGUMENT_TYPES:
+ assert hasattr(typing, type_name)
+ type_ = getattr(typing, type_name)
+ assert type_.name == type_name
+ assert str(type_) == type_name
+ assert is_valid_type(type_)
+
+
+def test_primitive_type_equality():
+ for i, type_name in enumerate(helpers.NO_ARGUMENT_TYPES):
+ type_ = getattr(typing, type_name)
+ assert type_ == type_
+ assert not(type_ != type_)
+ type_name_next = helpers.NO_ARGUMENT_TYPES[(i + 1) % len(helpers.NO_ARGUMENT_TYPES)]
+ type_next = getattr(typing, type_name_next)
+ assert type_ != type_next
+ assert not(type_ == type_next)
+
+
+def test_compound_types():
+ optional = typing.Optional[typing.Int32]
+ assert is_valid_type(optional)
+ assert str(optional) == "Optional<Int32>"
+ assert optional.name == "Optional"
+ assert optional.item == typing.Int32
+
+ list_ = typing.List[typing.String]
+ assert is_valid_type(list_)
+ assert str(list_) == "List<String>"
+ assert list_.name == "List"
+ assert list_.item == typing.String
+
+ dict_ = typing.Dict[typing.Double, typing.Bool]
+ assert is_valid_type(dict_)
+ assert str(dict_) == "Dict<Double, Bool>"
+ assert dict_.name == "Dict"
+ assert dict_.key == typing.Double
+ assert dict_.value == typing.Bool
+
+ tuple_ = typing.Tuple[typing.Double, typing.Uuid, typing.Json]
+ assert is_valid_type(tuple_)
+ assert str(tuple_) == "Tuple<Double, Uuid, Json>"
+ assert tuple_.name == "Tuple"
+ assert tuple_.items == (typing.Double, typing.Uuid, typing.Json)
+
+ assert is_valid_type(typing.EmptyTuple)
+ assert str(typing.EmptyTuple) == "Tuple<>"
+ assert typing.EmptyTuple.name == "Tuple"
+ assert typing.EmptyTuple.items == tuple()
+
+ struct = typing.Struct[
+ "a": typing.Uint8,
+ "b": typing.Yson,
+ u"ой": typing.Uint64,
+ ]
+ assert is_valid_type(struct)
+ assert six.ensure_text(str(struct)) == u"Struct<'a': Uint8, 'b': Yson, 'ой': Uint64>"
+ assert struct.name == "Struct"
+ assert struct.items == (("a", typing.Uint8), ("b", typing.Yson), (u"ой", typing.Uint64))
+
+ assert is_valid_type(typing.EmptyStruct)
+ assert str(typing.EmptyStruct) == "Struct<>"
+ assert typing.EmptyStruct.name == "Struct"
+ assert typing.EmptyStruct.items == tuple()
+
+ variant_struct = typing.Variant[
+ "to_be": typing.Null,
+ "not_to_be": typing.Void,
+ ]
+ assert is_valid_type(variant_struct)
+ assert str(variant_struct) == "Variant<'to_be': Null, 'not_to_be': Void>"
+ assert variant_struct.name == "Variant"
+ assert variant_struct.items == (("to_be", typing.Null), ("not_to_be", typing.Void))
+
+ variant_tuple = typing.Variant[typing.Uint8, typing.Int8]
+ assert is_valid_type(variant_tuple)
+ assert str(variant_tuple) == "Variant<Uint8, Int8>"
+ assert variant_tuple.name == "Variant"
+ assert variant_tuple.items == (typing.Uint8, typing.Int8)
+
+ tagged = typing.Tagged[typing.String, "my_tag"]
+ assert is_valid_type(tagged)
+ assert str(tagged) == "Tagged<String, 'my_tag'>"
+ assert tagged.name == "Tagged"
+ assert tagged.tag == "my_tag"
+ assert tagged.item == typing.String
+
+ tagged_ru = typing.Tagged[typing.String, u"мой_тэг"]
+ assert is_valid_type(tagged_ru)
+ assert six.ensure_text(str(tagged_ru)) == u"Tagged<String, 'мой_тэг'>"
+ assert tagged_ru.name == "Tagged"
+ assert tagged_ru.tag == u"мой_тэг"
+ assert tagged_ru.item == typing.String
+
+ decimal_getitem = typing.Decimal[5, 3]
+ assert is_valid_type(decimal_getitem)
+ assert str(decimal_getitem) == "Decimal(5, 3)"
+ assert decimal_getitem.name == "Decimal"
+ assert decimal_getitem.precision == 5
+ assert decimal_getitem.scale == 3
+
+ decimal_call = typing.Decimal(5, 3)
+ assert is_valid_type(decimal_call)
+ assert str(decimal_call) == "Decimal(5, 3)"
+ assert decimal_call.name == "Decimal"
+ assert decimal_call.precision == 5
+ assert decimal_call.scale == 3
+
+
+def test_compound_type_equality():
+ optional1 = typing.Optional[typing.Int32]
+ optional2 = typing.Optional[typing.Int32]
+ optional3 = typing.Optional[typing.Int64]
+ assert optional1 == optional1
+ assert optional1 == optional2
+ assert optional1 != optional3
+
+ list1 = typing.List[typing.String]
+ list2 = typing.List[typing.String]
+ list3 = typing.List[typing.Optional[typing.String]]
+ assert list1 == list1
+ assert list1 == list2
+ assert list1 != list3
+
+ dict1 = typing.Dict[typing.Double, typing.Bool]
+ dict2 = typing.Dict[typing.Double, typing.Bool]
+ dict3 = typing.Dict[typing.Optional[typing.Double], typing.Bool]
+ assert dict1 == dict1
+ assert dict1 == dict2
+ assert dict1 != dict3
+
+ tuple1 = typing.Tuple[typing.Double, typing.Uuid, typing.Json]
+ tuple2 = typing.Tuple[typing.Double, typing.Uuid, typing.Json]
+ tuple3 = typing.Tuple[typing.Double, typing.Json, typing.Uuid]
+ assert tuple1 == tuple1
+ assert tuple1 == tuple2
+ assert tuple1 != tuple3
+
+ assert typing.EmptyTuple == typing.EmptyTuple
+
+ struct1 = typing.Struct["a": typing.Uint8, "b": typing.Yson, u"ой": typing.Uint64]
+ struct2 = typing.Struct["a": typing.Uint8, "b": typing.Yson, u"ой": typing.Uint64]
+ struct3 = typing.Struct["a": typing.Uint8, "B": typing.Yson, u"ой": typing.Uint64]
+ assert struct1 == struct1
+ assert struct1 == struct2
+ assert struct1 != struct3
+
+ assert typing.EmptyStruct == typing.EmptyStruct
+ assert typing.EmptyStruct != typing.EmptyTuple
+
+ variant_struct1 = typing.Variant["to_be": typing.Null, "not_to_be": typing.Void]
+ variant_struct2 = typing.Variant["to_be": typing.Null, "not_to_be": typing.Void]
+ variant_struct3 = typing.Variant["to_be": typing.Void, "not_to_be": typing.Void]
+ assert variant_struct1 == variant_struct1
+ assert variant_struct1 == variant_struct2
+ assert variant_struct1 != variant_struct3
+
+ variant_tuple1 = typing.Variant[typing.Uint8, typing.Int8]
+ variant_tuple2 = typing.Variant[typing.Uint8, typing.Int8]
+ variant_tuple3 = typing.Variant[typing.Uint8, typing.Int8, typing.Int16]
+ assert variant_tuple1 == variant_tuple1
+ assert variant_tuple1 == variant_tuple2
+ assert variant_tuple1 != variant_tuple3
+
+ tagged1 = typing.Tagged[typing.String, "my_tag"]
+ tagged2 = typing.Tagged[typing.String, "my_tag"]
+ tagged3 = typing.Tagged[typing.String, "my_tag1"]
+ assert tagged1 == tagged1
+ assert tagged1 == tagged2
+ assert tagged1 != tagged3
+
+ tagged_ru1 = typing.Tagged[typing.String, u"мой_тэг"]
+ tagged_ru2 = typing.Tagged[typing.String, u"мой_тэг"]
+ tagged_ru3 = typing.Tagged[typing.String, u"мой_тэг1"]
+ assert tagged_ru1 == tagged_ru1
+ assert tagged_ru1 == tagged_ru2
+ assert tagged_ru1 != tagged_ru3
+
+ decimal_getitem1 = typing.Decimal[5, 3]
+ decimal_getitem2 = typing.Decimal[5, 3]
+ decimal_getitem3 = typing.Decimal[5, 4]
+ assert decimal_getitem1 == decimal_getitem1
+ assert decimal_getitem1 == decimal_getitem2
+ assert decimal_getitem1 != decimal_getitem3
+
+ decimal_call1 = typing.Decimal(5, 3)
+ decimal_call2 = typing.Decimal(5, 3)
+ decimal_call3 = typing.Decimal(5, 4)
+ assert decimal_call1 == decimal_call1
+ assert decimal_call1 == decimal_call2
+ assert decimal_call1 != decimal_call3
+
+ assert decimal_call1 == decimal_getitem1
+ assert decimal_call2 == decimal_getitem2
+ assert decimal_call3 == decimal_getitem3
+
+
+def test_errors():
+ for T in [typing.Struct, typing.Variant]:
+ with pytest.raises(ValueError):
+ T[b"\xff": typing.Int8]
+
+ with pytest.raises(ValueError):
+ T[10: typing.Int8]
+
+ with pytest.raises(ValueError):
+ T["a": typing.Int8, "a": typing.Int8]
+
+ with pytest.raises(ValueError):
+ T["": typing.Int8]
+
+ with pytest.raises(ValueError):
+ typing.Tagged[typing.Int8, 10]
+
+ with pytest.raises(ValueError):
+ typing.Tagged[typing.Int8, b"\xff"]
+
+ with pytest.raises(ValueError):
+ typing.Decimal(10, 10.0)
diff --git a/yt/python/yt/type_info/test/lib/ya.make b/yt/python/yt/type_info/test/lib/ya.make
new file mode 100644
index 00000000000..ffbccab8508
--- /dev/null
+++ b/yt/python/yt/type_info/test/lib/ya.make
@@ -0,0 +1,22 @@
+PY23_LIBRARY()
+
+TEST_SRCS(
+ conftest.py
+ test_common.py
+ test_helpers.py
+ test_typing.py
+ test_io.py
+ helpers.py
+)
+
+PEERDIR(
+ library/python/cyson
+ yt/python/yt/type_info
+)
+
+RESOURCE_FILES(
+ library/cpp/type_info/ut/test-data/good-types.txt
+ library/cpp/type_info/ut/test-data/bad-types.txt
+)
+
+END()
diff --git a/yt/python/yt/type_info/test/py2/ya.make b/yt/python/yt/type_info/test/py2/ya.make
new file mode 100644
index 00000000000..0233403a735
--- /dev/null
+++ b/yt/python/yt/type_info/test/py2/ya.make
@@ -0,0 +1,7 @@
+PY2TEST()
+
+PEERDIR(
+ yt/python/yt/type_info/test/lib
+)
+
+END()
diff --git a/yt/python/yt/type_info/test/py3/ya.make b/yt/python/yt/type_info/test/py3/ya.make
new file mode 100644
index 00000000000..e1210482c08
--- /dev/null
+++ b/yt/python/yt/type_info/test/py3/ya.make
@@ -0,0 +1,7 @@
+PY3TEST()
+
+PEERDIR(
+ yt/python/yt/type_info/test/lib
+)
+
+END()
diff --git a/yt/python/yt/type_info/test/ya.make b/yt/python/yt/type_info/test/ya.make
new file mode 100644
index 00000000000..1363b76581c
--- /dev/null
+++ b/yt/python/yt/type_info/test/ya.make
@@ -0,0 +1,5 @@
+RECURSE(
+ lib
+ py2
+ py3
+)
diff --git a/yt/python/yt/type_info/type_base.py b/yt/python/yt/type_info/type_base.py
new file mode 100644
index 00000000000..57b5452e296
--- /dev/null
+++ b/yt/python/yt/type_info/type_base.py
@@ -0,0 +1,109 @@
+import six
+
+from abc import ABCMeta, abstractmethod
+
+
+def _with_type(x):
+ return "<type: {!s}>: {!r}".format(type(x), x)
+
+
+def _is_utf8(s):
+ if not isinstance(s, six.string_types):
+ return False
+
+ if isinstance(s, six.binary_type):
+ try:
+ s.decode("utf-8", errors="strict")
+ return True
+ except UnicodeDecodeError:
+ return False
+ return isinstance(s, six.text_type)
+
+
+def _as_utf8(s):
+ if isinstance(s, six.text_type):
+ return s
+ elif isinstance(s, six.binary_type):
+ return s.decode("utf-8", errors="strict")
+ else:
+ raise TypeError("expected string or binary type, but got {}".format(type(s)))
+
+
+def is_valid_type(x):
+ return isinstance(x, Type) or x is Type
+
+
+def validate_type(x):
+ if not is_valid_type(x):
+ raise ValueError("Expected type, but got {}".format(_with_type(x)))
+
+
+def quote_string(s):
+ return u"'{}'".format(s.replace("\\", "\\\\").replace("'", "\\'"))
+
+
+class Type(six.with_metaclass(ABCMeta)):
+ REQUIRED_ATTRS = ["name", "yt_type_name"]
+
+ def __init__(self, attrs):
+ assert all(key in attrs for key in self.REQUIRED_ATTRS), \
+ "One or more of required arguments not found in attributes"
+
+ for name, value in attrs.items():
+ setattr(self, name, value)
+
+ def __eq__(self, other):
+ assert isinstance(self, Type)
+
+ if not isinstance(other, Type):
+ return False
+
+ return str(self) == str(other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ return hash(str(self))
+
+ @abstractmethod
+ def __str__(self):
+ pass
+
+ @abstractmethod
+ def to_yson_type(self):
+ pass
+
+
+class Primitive(Type):
+ def __str__(self):
+ return self.name
+
+ def to_yson_type(self):
+ return self.yt_type_name
+
+
+class Generic(six.with_metaclass(ABCMeta)):
+ def __init__(self, name, yt_type_name=None):
+ assert _is_utf8(name), "Name must be UTF-8, got {}".format(_with_type(name))
+ self.name = name
+ self.yt_type_name = name.lower()
+
+ @abstractmethod
+ def __getitem__(self, param):
+ pass
+
+ @abstractmethod
+ def from_dict(self):
+ pass
+
+
+def make_primitive_type(name, yt_type_name=None):
+ assert _is_utf8(name), "Name of primitive type must be UTF-8, got {}".format(_with_type(name))
+ assert yt_type_name is None or _is_utf8(yt_type_name), \
+ "YT type name of primitive type must be UTF-8, got {}".format(_with_type(name))
+
+ if yt_type_name is None:
+ yt_type_name = name.lower()
+
+ return Primitive({"name": name, "yt_type_name": yt_type_name})
diff --git a/yt/python/yt/type_info/typing.py b/yt/python/yt/type_info/typing.py
new file mode 100644
index 00000000000..07cd5c189cb
--- /dev/null
+++ b/yt/python/yt/type_info/typing.py
@@ -0,0 +1,443 @@
+from . import type_base
+
+# Backward compatibility
+from .type_base import ( # noqa
+ is_valid_type, validate_type, Type
+)
+
+try:
+ import yt.yson
+ _TI_SERIALIZATION_AVAILABLE = True
+except ImportError:
+ _TI_SERIALIZATION_AVAILABLE = False
+
+import six
+
+
[email protected]_2_unicode_compatible
+class _SingleArgumentGenericAlias(type_base.Type):
+ REQUIRED_ATTRS = type_base.Type.REQUIRED_ATTRS + ["item"]
+
+ def __str__(self):
+ return "{}<{}>".format(self.name, self.item)
+
+ def to_yson_type(self):
+ return {"type_name": self.name.lower(), "item": self.item.to_yson_type()}
+
+
+class _SingleArgumentGeneric(type_base.Generic):
+ def __getitem__(self, param):
+ type_base.validate_type(param)
+
+ attrs = {
+ "name": self.name,
+ "yt_type_name": self.yt_type_name,
+ "item": param,
+ }
+
+ return _SingleArgumentGenericAlias(attrs)
+
+ def from_dict(self, type_):
+ _validate_contains(type_, "item")
+ param = _parse_type(type_["item"])
+ return self.__getitem__(param)
+
+
+def _make_tuple_attrs(items, type_name, yt_type_name):
+ for item in items:
+ type_base.validate_type(item)
+
+ attrs = {
+ "name": type_name,
+ "yt_type_name": yt_type_name,
+ "items": items,
+ }
+
+ return attrs
+
+
[email protected]_2_unicode_compatible
+class _TupleAlias(type_base.Type):
+ REQUIRED_ATTRS = type_base.Type.REQUIRED_ATTRS + ["items"]
+
+ def __str__(self):
+ return "{}<{}>".format(self.name, ", ".join(str(x) for x in self.items))
+
+ def to_yson_type(self):
+ yson_repr = {
+ "type_name": self.yt_type_name,
+ "elements": [{"type": item.to_yson_type()} for item in self.items]
+ }
+ return yson_repr
+
+
+class _GenericTuple(type_base.Generic):
+ def __getitem__(self, params):
+ if not isinstance(params, tuple):
+ params = (params,)
+ return _TupleAlias(_make_tuple_attrs(params, self.name, self.yt_type_name))
+
+ def from_dict(self, type_):
+ _validate_contains(type_, "elements")
+
+ elements = type_["elements"]
+ _validate(isinstance(elements, list), "\"elements\" must contain a list")
+ _validate(all(isinstance(elem, dict) for elem in elements), "\"elements\" must contain a list of maps")
+
+ params = []
+ for elem in elements:
+ _validate_contains(elem, "type")
+ params.append(_parse_type(elem["type"]))
+
+ return self.__getitem__(tuple(params))
+
+
[email protected]_2_unicode_compatible
+class _DictAlias(type_base.Type):
+ REQUIRED_ATTRS = type_base.Type.REQUIRED_ATTRS + ["key", "value"]
+
+ def __str__(self):
+ return "{}<{}, {}>".format(self.name, self.key, self.value)
+
+ def to_yson_type(self):
+ return {"type_name": self.yt_type_name, "key": self.key.to_yson_type(), "value": self.value.to_yson_type()}
+
+
+class _GenericDict(type_base.Generic):
+ def __getitem__(self, params):
+ if not isinstance(params, tuple) or len(params) != 2:
+ raise ValueError("Expected two types in Dict, but got {}".format(type_base._with_type(params)))
+ key, value = params
+ type_base.validate_type(key)
+ type_base.validate_type(value)
+
+ attrs = {
+ "name": self.name,
+ "yt_type_name": self.yt_type_name,
+ "key": key,
+ "value": value,
+ }
+
+ return _DictAlias(attrs)
+
+ def from_dict(self, type_):
+ _validate_contains(type_, "key")
+ _validate_contains(type_, "value")
+ key = _parse_type(type_["key"])
+ value = _parse_type(type_["value"])
+ return self.__getitem__((key, value))
+
+
+def _validate_struct_fields(items):
+ names = set()
+ for item_name, type_ in items:
+ if not type_base._is_utf8(item_name):
+ raise ValueError("Name of struct field must be UTF-8, got {}".format(type_base._with_type(item_name)))
+
+ if not item_name:
+ raise ValueError("Empty field name is not allowed")
+
+ if item_name in names:
+ raise ValueError("Duplicate fields are not allowed: {}".format(item_name))
+
+ names.add(item_name)
+
+
+def _make_struct_attrs(params, type_name, yt_type_name):
+ for x in params:
+ if not (
+ isinstance(x, slice) and
+ type_base._is_utf8(x.start) and
+ type_base.is_valid_type(x.stop) and
+ x.step is None
+ ):
+ raise ValueError("Expected slice in form of field:type, but got {}".format(type_base._with_type(x)))
+
+ items = tuple((type_base._as_utf8(x.start), x.stop) for x in params)
+ _validate_struct_fields(items)
+
+ attrs = {
+ "name": type_name,
+ "yt_type_name": yt_type_name,
+ "items": items,
+ }
+
+ return attrs
+
+
[email protected]_2_unicode_compatible
+class _StructAlias(type_base.Type):
+ REQUIRED_ATTRS = type_base.Type.REQUIRED_ATTRS + ["items"]
+
+ def __str__(self):
+ params_str = ", ".join(
+ u"{}: {}".format(type_base.quote_string(name), str(type_))
+ for name, type_ in self.items
+ )
+ return u"{}<{}>".format(self.name, params_str)
+
+ def to_yson_type(self):
+ yson_repr = {
+ "type_name": self.yt_type_name,
+ "members": [
+ {"name": name, "type": type_.to_yson_type()}
+ for name, type_ in self.items
+ ]
+ }
+ return yson_repr
+
+
+class _GenericStruct(type_base.Generic):
+ def __getitem__(self, params):
+ if not isinstance(params, tuple):
+ params = (params,)
+ return _StructAlias(_make_struct_attrs(params, self.name, self.yt_type_name))
+
+ def from_dict(self, type_):
+ _validate_contains(type_, "members")
+
+ members = type_["members"]
+ _validate(isinstance(members, list), "\"members\" must contain a list")
+ _validate(all(isinstance(member, dict) for member in members), "\"members\" must contain a list of maps")
+
+ params = []
+ for member in members:
+ _validate_contains(member, "name")
+ _validate_contains(member, "type")
+
+ _validate(type_base._is_utf8(member["name"]), "\"name\" must contain a string")
+ params.append(slice(member["name"], _parse_type(member["type"])))
+
+ return self.__getitem__(tuple(params))
+
+
+class _GenericVariant(type_base.Generic):
+ def __getitem__(self, params):
+ if not isinstance(params, tuple):
+ params = (params,)
+
+ if isinstance(params[0], slice):
+ attrs = _make_struct_attrs(params, self.name, self.yt_type_name)
+ attrs["underlying"] = "Struct"
+ return _StructAlias(attrs)
+ else:
+ attrs = _make_tuple_attrs(params, self.name, self.yt_type_name)
+ attrs["underlying"] = "Tuple"
+ return _TupleAlias(attrs)
+
+ def from_dict(self, type_):
+ _validate("elements" in type_ or "members" in type_, "missing both keys \"members\" and \"elements\"")
+ _validate(not ("elements" in type_ and "members" in type_), "both keys \"members\" and \"elements\" are present")
+
+ if "elements" in type_:
+ cls = Tuple.from_dict(type_)
+ cls.underlying = Tuple.name
+ else:
+ cls = Struct.from_dict(type_)
+ cls.underlying = Struct.name
+
+ cls.name = self.name
+ cls.yt_type_name = self.yt_type_name
+ return cls
+
+
[email protected]_2_unicode_compatible
+class _TaggedAlias(type_base.Type):
+ REQUIRED_ATTRS = type_base.Type.REQUIRED_ATTRS + ["item", "tag"]
+
+ def __str__(self):
+ return u"{}<{}, {}>".format(self.name, str(self.item), type_base.quote_string(self.tag))
+
+ def to_yson_type(self):
+ yson_repr = {
+ "type_name": self.yt_type_name,
+ "item": self.item.to_yson_type(),
+ "tag": self.tag,
+ }
+ return yson_repr
+
+
+class _GenericTagged(type_base.Generic):
+ def __getitem__(self, params):
+ if not (
+ isinstance(params, tuple) and
+ len(params) == 2 and
+ type_base.is_valid_type(params[0]) and
+ (isinstance(params[1], six.string_types) or isinstance(params[1], bytes))
+ ):
+ raise ValueError("Expected type and tag, but got {}".format(type_base._with_type(params)))
+ item, tag = params
+ if not type_base._is_utf8(tag):
+ raise ValueError("Tag must be UTF-8, got {}".format(type_base._with_type(tag)))
+ tag = type_base._as_utf8(tag)
+
+ attrs = {
+ "name": self.name,
+ "yt_type_name": self.yt_type_name,
+ "item": item,
+ "tag": tag,
+ }
+
+ return _TaggedAlias(attrs)
+
+ def from_dict(self, type_):
+ _validate_contains(type_, "tag")
+ _validate_contains(type_, "item")
+
+ tag = type_["tag"]
+ _validate(type_base._is_utf8(tag), "\"tag\" must contain a string")
+
+ item = _parse_type(type_["item"])
+ return self.__getitem__((item, tag,))
+
+
[email protected]_2_unicode_compatible
+class _DecimalAlias(type_base.Type):
+ REQUIRED_ATTRS = type_base.Type.REQUIRED_ATTRS + ["precision", "scale"]
+
+ def __str__(self):
+ return "{}({}, {})".format(self.name, self.precision, self.scale)
+
+ def to_yson_type(self):
+ yson_repr = {
+ "type_name": self.yt_type_name,
+ "precision": self.precision,
+ "scale": self.scale,
+ }
+ return yson_repr
+
+
+class _GenericDecimal(type_base.Generic):
+ def _create_alias(self, precision, scale):
+ if not isinstance(precision, six.integer_types):
+ raise ValueError("Expected integer, but got {}".format(type_base._with_type(precision)))
+
+ if not isinstance(scale, six.integer_types):
+ raise ValueError("Expected integer, but got {}".format(type_base._with_type(scale)))
+
+ attrs = {
+ "name": self.name,
+ "yt_type_name": self.yt_type_name,
+ "precision": precision,
+ "scale": scale,
+ }
+
+ return _DecimalAlias(attrs)
+
+ def __call__(self, precision, scale):
+ return self._create_alias(precision, scale)
+
+ def __getitem__(self, params):
+ if not isinstance(params, tuple) or len(params) != 2:
+ raise ValueError("Expected precision and scale integer parameters")
+
+ precision, scale = params
+ return self._create_alias(precision, scale)
+
+ def from_dict(self, type_):
+ _validate_contains(type_, "precision")
+ _validate_contains(type_, "scale")
+
+ return self.__getitem__((type_["precision"], type_["scale"],))
+
+
+Bool = type_base.make_primitive_type("Bool")
+Int8 = type_base.make_primitive_type("Int8")
+Uint8 = type_base.make_primitive_type("Uint8")
+Int16 = type_base.make_primitive_type("Int16")
+Uint16 = type_base.make_primitive_type("Uint16")
+Int32 = type_base.make_primitive_type("Int32")
+Uint32 = type_base.make_primitive_type("Uint32")
+Int64 = type_base.make_primitive_type("Int64")
+Uint64 = type_base.make_primitive_type("Uint64")
+Float = type_base.make_primitive_type("Float")
+Double = type_base.make_primitive_type("Double")
+String = type_base.make_primitive_type("String")
+Utf8 = type_base.make_primitive_type("Utf8")
+Yson = type_base.make_primitive_type("Yson")
+Json = type_base.make_primitive_type("Json")
+Uuid = type_base.make_primitive_type("Uuid")
+Date = type_base.make_primitive_type("Date")
+Datetime = type_base.make_primitive_type("Datetime")
+Timestamp = type_base.make_primitive_type("Timestamp")
+Interval = type_base.make_primitive_type("Interval")
+TzDate = type_base.make_primitive_type("TzDate", yt_type_name="tz_date")
+TzDatetime = type_base.make_primitive_type("TzDatetime", yt_type_name="tz_datetime")
+TzTimestamp = type_base.make_primitive_type("TzTimestamp", yt_type_name="tz_timestamp")
+
+Void = type_base.make_primitive_type("Void")
+Null = type_base.make_primitive_type("Null")
+
+Optional = _SingleArgumentGeneric("Optional")
+List = _SingleArgumentGeneric("List")
+Tuple = _GenericTuple("Tuple")
+Dict = _GenericDict("Dict")
+Struct = _GenericStruct("Struct")
+Variant = _GenericVariant("Variant")
+Tagged = _GenericTagged("Tagged")
+Decimal = _GenericDecimal("Decimal")
+
+
+EmptyTuple = Tuple.__getitem__(tuple())
+EmptyStruct = Struct.__getitem__(tuple())
+
+PRIMITIVES = {type_.yt_type_name: type_ for type_ in locals().values() if isinstance(type_, type_base.Primitive)}
+GENERICS = {type_.yt_type_name: type_ for type_ in locals().values() if isinstance(type_, type_base.Generic)}
+
+
+def _validate(condition, *error_args):
+ if not condition:
+ raise ValueError(*error_args)
+
+
+def _validate_contains(dict, key):
+ _validate(key in dict, "missing required key \"{}\"".format(key))
+
+
+def _parse_type(type_description):
+ if isinstance(type_description, six.string_types):
+ _validate(type_description in PRIMITIVES, "unknown type \"{}\"".format(type_description))
+ return PRIMITIVES[type_description]
+
+ _validate(isinstance(type_description, dict),
+ "type must be either a string or a map, got {}".format(type_base._with_type(type_description)))
+
+ _validate_contains(type_description, "type_name")
+
+ type_name = type_description["type_name"]
+ _validate(isinstance(type_name, six.string_types), "\"type_name\" must contain a string")
+
+ _validate(type_name in PRIMITIVES or type_name in GENERICS, "unknown type \"{}\"".format(type_name))
+
+ if type_name in PRIMITIVES:
+ return PRIMITIVES[type_name]
+
+ return GENERICS[type_name].from_dict(type_description)
+
+
+def _check_serialization_available():
+ if not _TI_SERIALIZATION_AVAILABLE:
+ raise ImportError("Module `yt.yson` is required to use type_info serialization. "
+ "Make sure you have yt/python/yt/yson in your PEERDIR")
+
+
+def serialize_yson(type_, human_readable=False):
+ _check_serialization_available()
+
+ if not type_base.is_valid_type(type_):
+ raise TypeError("serialize_yson can be called only for types")
+
+ return yt.yson.dumps(type_, yson_format="pretty" if human_readable else "binary")
+
+
+def deserialize_yson(yson):
+ _check_serialization_available()
+
+ if type(yson) is six.text_type and six.PY3:
+ yson = yson.encode("utf-8")
+
+ try:
+ type_description = yt.yson.loads(yson)
+ return _parse_type(type_description)
+ except (ValueError, yt.yson.YsonError) as e:
+ six.raise_from(ValueError("deserialization failed: {}".format(e)), e)
diff --git a/yt/python/yt/type_info/ya.make b/yt/python/yt/type_info/ya.make
new file mode 100644
index 00000000000..22777dca01d
--- /dev/null
+++ b/yt/python/yt/type_info/ya.make
@@ -0,0 +1,19 @@
+PY23_LIBRARY()
+
+PY_SRCS(
+ NAMESPACE yt.type_info
+
+ __init__.py
+ typing.py
+ type_base.py
+)
+
+PEERDIR(
+ contrib/python/six
+)
+
+END()
+
+RECURSE(
+ test
+)
diff --git a/yt/python/yt/ya.make b/yt/python/yt/ya.make
new file mode 100644
index 00000000000..7d72655d65d
--- /dev/null
+++ b/yt/python/yt/ya.make
@@ -0,0 +1,26 @@
+PY23_LIBRARY()
+
+PEERDIR(
+ contrib/python/simplejson
+ contrib/python/six
+ yt/python/yt/type_info
+)
+
+IF(LINUX)
+ PEERDIR(
+ library/python/prctl
+ )
+ENDIF()
+
+PY_SRCS(
+ NAMESPACE yt
+
+ __init__.py
+ common.py
+ json_wrapper.py
+ logger.py
+ logger_config.py
+ subprocess_wrapper.py
+)
+
+END()
diff --git a/yt/python/yt/yson/__init__.py b/yt/python/yt/yson/__init__.py
new file mode 100644
index 00000000000..1bfcd8595e7
--- /dev/null
+++ b/yt/python/yt/yson/__init__.py
@@ -0,0 +1,86 @@
+"""
+YSON library.
+
+Package supports `YT YSON format <https://ytsaurus.tech/docs/en/user-guide/storage/yson>`_.
+
+Package provides special classes for all yson types, see :mod:`yson_types <yt.yson.yson_types>` module.
+Also it provides methods for serialization and deserialization yson data:
+:func:`load <yt.yson.parser.load>`, :func:`loads <yt.yson.parser.loads>`,
+:func:`dump <yt.yson.writer.dump>`, :func:`dumps <yt.yson.writer.dumps>`.
+And finally it provides method :func:`to_yson_type <yt.yson.convert.to_yson_type>` for conversion
+python objects to special yson types.
+
+In special variable `TYPE` you can find implementation type of the library.
+In equals "BINARY" if c++ bindings found and "PYTHON" otherwise.
+
+Examples:
+
+>>> import yt.yson as yson
+>>> yson.loads("{a=10}")
+{'a': 10}
+
+>>> yson.dumps(True)
+'"true"'
+
+>>> number = yson.YsonInteger(10)
+>>> number.attributes["my_attr"] = "hello"
+>>> yson.dumps(number)
+'<"attr"="hello">10'
+
+>>> boolean = to_yson_type(False, attributes={"my_attr": "my_value"})
+
+"""
+
+from __future__ import print_function
+
+from . import writer # noqa
+from . import parser # noqa
+from . import yson_types # noqa
+
+TYPE = None
+try:
+ from yt_yson_bindings import load, loads, dump, dumps, dump_parquete # noqa
+ TYPE = "BINARY"
+except ImportError as error:
+ # XXX(asaitgalin): Sometimes module can't be imported because
+ # it depends on missing dynamic libraries (e.g. libatomic). In this case
+ # diagnostic is printed to stderr.
+ message = str(error)
+ if "No module named" not in message:
+ import sys as _sys
+ print("Warning! Failed to import YSON bindings: " + message, file=_sys.stderr)
+
+if TYPE is None:
+ from .parser import load, loads # noqa
+ from .writer import dump, dumps # noqa
+ TYPE = "PYTHON"
+
+from .yson_types import ( # noqa
+ YsonString, YsonUnicode, YsonInt64, YsonUint64, YsonDouble,
+ YsonBoolean, YsonList, YsonMap, YsonEntity, YsonType, YsonStringProxy,
+ is_unicode, get_bytes, make_byte_key)
+
+from .convert import to_yson_type, yson_to_json, json_to_yson # noqa
+from .common import YsonError # noqa
+
+
+def _loads_from_native_str(string, encoding="utf-8", **kwargs):
+ import sys
+
+ if sys.version_info[0] < 3:
+ return loads(string, **kwargs)
+
+ if isinstance(string, str):
+ string = string.encode(encoding)
+
+ return loads(string, encoding=encoding, **kwargs)
+
+
+def _dumps_to_native_str(obj, encoding="utf-8", **kwargs):
+ import sys
+
+ if sys.version_info[0] < 3:
+ return dumps(obj, **kwargs)
+
+ s = dumps(obj, encoding=encoding, **kwargs)
+ return s.decode(encoding)
diff --git a/yt/python/yt/yson/common.py b/yt/python/yt/yson/common.py
new file mode 100644
index 00000000000..a50526c0557
--- /dev/null
+++ b/yt/python/yt/yson/common.py
@@ -0,0 +1,69 @@
+from yt.common import YtError
+
+try:
+ from yt.packages.six import int2byte, indexbytes
+except ImportError:
+ from six import int2byte, indexbytes
+
+
+class YsonError(YtError):
+ pass
+
+
+def raise_yson_error(message, position_info):
+ line_index, position, offset = position_info
+ raise YsonError(message, attributes={"line": line_index, "position": position, "offset": offset})
+
+
+class StreamWrap(object):
+ def __init__(self, stream, header, footer):
+ self.stream = stream
+ self.header = header
+ self.footer = footer
+
+ self.pos = 0
+ self.state = 0
+
+ def read(self, n):
+ if n == 0:
+ return self.stream.read(0)
+
+ assert n == 1
+
+ if self.state == 0:
+ if self.pos == len(self.header):
+ self.state += 1
+ else:
+ res = int2byte(indexbytes(self.header, self.pos))
+ self.pos += 1
+ return res
+
+ if self.state == 1:
+ sym = self.stream.read(1)
+ if sym:
+ return sym
+ else:
+ self.state += 1
+ self.pos = 0
+
+ if self.state == 2:
+ if self.pos == len(self.footer):
+ self.state += 1
+ else:
+ res = int2byte(indexbytes(self.footer, self.pos))
+ self.pos += 1
+ return res
+
+ if self.state == 3:
+ return b""
+
+
+_ENCODING_SENTINEL = object()
+
+# Binary literals markers
+STRING_MARKER = int2byte(1)
+INT64_MARKER = int2byte(2)
+DOUBLE_MARKER = int2byte(3)
+FALSE_MARKER = int2byte(4)
+TRUE_MARKER = int2byte(5)
+UINT64_MARKER = int2byte(6)
diff --git a/yt/python/yt/yson/convert.py b/yt/python/yt/yson/convert.py
new file mode 100644
index 00000000000..3868fa42441
--- /dev/null
+++ b/yt/python/yt/yson/convert.py
@@ -0,0 +1,162 @@
+from .yson_types import (
+ YsonType, YsonString, YsonUnicode, YsonBoolean, YsonInt64, YsonUint64, YsonDouble,
+ YsonList, YsonMap, YsonEntity)
+from .common import YsonError
+
+try:
+ from yt.packages.six import text_type, binary_type, integer_types, iteritems, PY3
+ from yt.packages.six.moves import map as imap
+except ImportError:
+ from six import text_type, binary_type, integer_types, iteritems, PY3
+ from six.moves import map as imap
+
+import copy
+
+
+def to_yson_type(value, attributes=None, always_create_attributes=True, encoding="utf-8"):
+ """Wraps value with YSON type."""
+ if not always_create_attributes and attributes is None:
+ if isinstance(value, text_type) and not PY3:
+ return value.encode("utf-8")
+ return value
+
+ if isinstance(value, YsonType):
+ if attributes is not None:
+ value = copy.deepcopy(value)
+ value.attributes = attributes
+ return value
+
+ if isinstance(value, text_type):
+ if PY3:
+ result = YsonUnicode(value)
+ else: # COMPAT
+ result = YsonString(value.encode("utf-8"))
+ elif isinstance(value, binary_type):
+ result = YsonString(value)
+ elif value is False or value is True:
+ result = YsonBoolean(value)
+ elif isinstance(value, integer_types):
+ if value < -2 ** 63 or value >= 2 ** 64:
+ raise TypeError("Integer {0} cannot be represented in YSON "
+ "since it is out of range [-2^63, 2^64 - 1])".format(value))
+ greater_than_max_int64 = value >= 2 ** 63
+ if greater_than_max_int64 or isinstance(value, YsonUint64):
+ result = YsonUint64(value)
+ else:
+ result = YsonInt64(value)
+ elif isinstance(value, float):
+ result = YsonDouble(value)
+ elif isinstance(value, list):
+ result = YsonList(value)
+ elif isinstance(value, dict):
+ result = YsonMap(value)
+ else:
+ result = YsonEntity()
+
+ if attributes is not None:
+ result.attributes = attributes
+ else:
+ result.attributes = {}
+
+ return result
+
+
+# TODO(ignat): Should we make auto-detection for use_byte_strings?
+def json_to_yson(json_tree, use_byte_strings=None):
+ """Converts json representation to YSON representation."""
+ def to_literal(string):
+ if use_byte_strings:
+ return string.encode("ascii")
+ else:
+ return string
+
+ def decode_key(string):
+ # In yt wrapper we expect here correct keys, but other usages in arcadia could not give this guarantee.
+ # TODO(ignat): fix this usages.
+ if use_byte_strings:
+ if not isinstance(string, binary_type):
+ string = string.encode("ascii")
+ else:
+ if not isinstance(string, text_type):
+ string = string.decode("ascii")
+
+ if string.startswith(to_literal("$")):
+ if not string.startswith(to_literal("$$")):
+ raise YsonError("Keys should not start with single dollar sign")
+ string = string[1:]
+ return string
+
+ if use_byte_strings is None:
+ use_byte_strings = not PY3
+
+ has_attrs = isinstance(json_tree, dict) and to_literal("$value") in json_tree
+ value = json_tree[to_literal("$value")] if has_attrs else json_tree
+ if isinstance(value, text_type):
+ result = YsonUnicode(value)
+ elif isinstance(value, binary_type):
+ result = YsonString(value)
+ elif value is False or value is True:
+ result = YsonBoolean(value)
+ elif isinstance(value, integer_types):
+ greater_than_max_int64 = value >= 2 ** 63
+ if greater_than_max_int64:
+ result = YsonUint64(value)
+ else:
+ result = YsonInt64(value)
+ elif isinstance(value, float):
+ result = YsonDouble(value)
+ elif isinstance(value, list):
+ result = YsonList(imap(lambda item: json_to_yson(item, use_byte_strings=use_byte_strings), value))
+ elif isinstance(value, dict):
+ result = YsonMap((decode_key(k), json_to_yson(v, use_byte_strings=use_byte_strings)) for k, v in iteritems(YsonMap(value)))
+ elif value is None:
+ result = YsonEntity()
+ else:
+ raise YsonError("Unknown type:", type(value))
+
+ if has_attrs and json_tree.get(to_literal("$attributes"), {}):
+ result.attributes = json_to_yson(json_tree[to_literal("$attributes")], use_byte_strings=use_byte_strings)
+ return result
+
+
+def yson_to_json(yson_tree, print_attributes=True):
+ def encode_key(key):
+ if PY3 and isinstance(key, binary_type):
+ key = key.decode("ascii")
+ if key and key[0] == "$":
+ return "$" + key
+ return key
+
+ def process_dict(d):
+ return dict((encode_key(k), yson_to_json(v)) for k, v in iteritems(d))
+
+ if hasattr(yson_tree, "attributes") and yson_tree.attributes and print_attributes:
+ return {"$attributes": process_dict(yson_tree.attributes),
+ "$value": yson_to_json(yson_tree, print_attributes=False)}
+ if isinstance(yson_tree, list):
+ return list(imap(yson_to_json, yson_tree))
+ elif isinstance(yson_tree, dict):
+ return process_dict(yson_tree)
+ elif isinstance(yson_tree, YsonEntity):
+ return None
+ elif PY3 and (isinstance(yson_tree, YsonString) or isinstance(yson_tree, binary_type)):
+ return yson_tree.decode("utf-8")
+ elif isinstance(yson_tree, bool) or isinstance(yson_tree, YsonBoolean):
+ return True if yson_tree else False
+ else:
+ if type(yson_tree) is YsonEntity:
+ return None
+
+ bases = type(yson_tree).__bases__
+ iter = 0
+ while len(bases) == 1 and YsonType not in bases:
+ bases = bases[0].__bases__
+ iter += 1
+
+ if YsonType in bases:
+ other_types = list(set(bases) - set([YsonType]))
+ if not other_types:
+ raise RuntimeError("Failed to perform yson to json conversion of {!r}".format(yson_tree))
+ other = other_types[0]
+ return other(yson_tree)
+ return yson_tree
diff --git a/yt/python/yt/yson/lexer.py b/yt/python/yt/yson/lexer.py
new file mode 100644
index 00000000000..a6b70cd41e2
--- /dev/null
+++ b/yt/python/yt/yson/lexer.py
@@ -0,0 +1,392 @@
+from . import yson_types
+from .yson_token import (
+ YsonToken,
+ TOKEN_STRING,
+ TOKEN_INT64,
+ TOKEN_UINT64,
+ TOKEN_DOUBLE,
+ TOKEN_BOOLEAN,
+ TOKEN_HASH,
+ TOKEN_LEFT_PARENTHESIS,
+ TOKEN_RIGHT_PARENTHESIS,
+ TOKEN_COMMA,
+ TOKEN_COLON,
+ TOKEN_SEMICOLON,
+ TOKEN_LEFT_ANGLE,
+ TOKEN_EQUALS,
+ TOKEN_RIGHT_ANGLE,
+ TOKEN_LEFT_BRACKET,
+ TOKEN_RIGHT_BRACKET,
+ TOKEN_LEFT_BRACE,
+ TOKEN_RIGHT_BRACE)
+
+from .common import (
+ raise_yson_error, _ENCODING_SENTINEL,
+ STRING_MARKER, INT64_MARKER, DOUBLE_MARKER,
+ FALSE_MARKER, TRUE_MARKER, UINT64_MARKER)
+
+try:
+ from yt.packages.six.moves import xrange
+ from yt.packages.six import int2byte, iterbytes
+except ImportError:
+ from six.moves import xrange
+ from six import int2byte, iterbytes
+
+import struct
+
+_SEEMS_INT64 = int2byte(0)
+_SEEMS_UINT64 = int2byte(1)
+_SEEMS_DOUBLE = int2byte(2)
+
+PERCENT_LITERALS = [b"true", b"false", b"nan", b"inf", b"-inf", b"+inf"]
+PERCENT_LITERAL_LENGTH = dict((s[0:1], len(s)) for s in PERCENT_LITERALS)
+assert len(PERCENT_LITERALS) == len(PERCENT_LITERAL_LENGTH)
+
+
+def _get_numeric_type(string):
+ for code in iterbytes(string):
+ ch = int2byte(code)
+ if ch == b"E" or ch == b"e" or ch == b".":
+ return _SEEMS_DOUBLE
+ elif ch == b"u":
+ return _SEEMS_UINT64
+ return _SEEMS_INT64
+
+
+def _zig_zag_decode(value):
+ return (value >> 1) ^ -(value & 1)
+
+
+class YsonLexer(object):
+ def __init__(self, stream, encoding=None, output_buffer=None):
+ assert (encoding is _ENCODING_SENTINEL) != (output_buffer is None), \
+ "Exactly one of encoding and output_buffer parameters must be specified"
+
+ self._line_index = 1
+ self._position = 1
+ self._offset = 0
+ self._stream = stream
+ self._lookahead = None
+ self._encoding = encoding
+ self._output_buffer = output_buffer
+
+ def _get_start_state(self, ch):
+ tokens = {
+ b"#": TOKEN_HASH,
+ b"(": TOKEN_LEFT_PARENTHESIS,
+ b")": TOKEN_RIGHT_PARENTHESIS,
+ b",": TOKEN_COMMA,
+ b":": TOKEN_COLON,
+ b";": TOKEN_SEMICOLON,
+ b"<": TOKEN_LEFT_ANGLE,
+ b"=": TOKEN_EQUALS,
+ b">": TOKEN_RIGHT_ANGLE,
+ b"[": TOKEN_LEFT_BRACKET,
+ b"]": TOKEN_RIGHT_BRACKET,
+ b"{": TOKEN_LEFT_BRACE,
+ b"}": TOKEN_RIGHT_BRACE,
+ }
+ return tokens.get(ch)
+
+ def get_next_token(self):
+ self._skip_whitespaces()
+ ch = self._peek_char()
+ if not ch:
+ return YsonToken()
+
+ if ch == STRING_MARKER:
+ return YsonToken(value=self._parse_string(), type=TOKEN_STRING)
+
+ elif ch == b"_" or ch == b'"' or ch.isalpha():
+ return YsonToken(value=self._parse_string(), type=TOKEN_STRING)
+ elif ch == INT64_MARKER:
+ return YsonToken(value=self._parse_binary_int64(), type=TOKEN_INT64)
+
+ elif ch == UINT64_MARKER:
+ return YsonToken(value=self._parse_binary_uint64(), type=TOKEN_UINT64)
+
+ elif ch == DOUBLE_MARKER:
+ return YsonToken(value=self._parse_binary_double(), type=TOKEN_DOUBLE)
+
+ elif ch == FALSE_MARKER:
+ self._expect_char(ch)
+ return YsonToken(value=self._maybe_value(False), type=TOKEN_BOOLEAN)
+
+ elif ch == TRUE_MARKER:
+ self._expect_char(ch)
+ return YsonToken(value=self._maybe_value(True), type=TOKEN_BOOLEAN)
+
+ elif ch == b"%":
+ value, token_type = self._parse_percent_literal()
+ return YsonToken(value=self._maybe_value(value), type=token_type)
+
+ elif ch == b"#":
+ return YsonToken(value=self._parse_entity(), type=TOKEN_HASH)
+
+ elif ch == b"+" or ch == b"-" or ch.isdigit():
+ value, token_type = self._parse_numeric()
+ return YsonToken(value=self._maybe_value(value), type=token_type)
+
+ state = self._get_start_state(ch)
+ self._read_char()
+ return YsonToken(value=self._maybe_value(ch), type=state)
+
+ def get_position_info(self):
+ return self._line_index, self._position, self._offset
+
+ def _maybe_value(self, value):
+ if self._output_buffer is None:
+ return value
+ return None
+
+ def _read_char(self, binary_input=False):
+ if self._lookahead is None:
+ result = self._stream.read(1)
+ else:
+ result = self._lookahead
+ self._lookahead = None
+
+ self._offset += 1
+ if not binary_input and result == b"\n":
+ self._line_index += 1
+ self._position = 1
+ else:
+ self._position += 1
+
+ if self._output_buffer is not None:
+ self._output_buffer.append(ord(result))
+
+ return result
+
+ def _peek_char(self):
+ if self._lookahead is not None:
+ return self._lookahead
+ self._lookahead = self._stream.read(1)
+ return self._lookahead
+
+ def _read_binary_chars(self, char_count):
+ if self._output_buffer is not None:
+ string = self._stream.read(char_count)
+ self._position += len(string)
+ if len(string) != char_count:
+ raise_yson_error(
+ "Premature end-of-stream while reading byte {0} out of {1}".format(len(string) + 1, char_count),
+ self.get_position_info())
+ self._output_buffer += string
+ return string
+
+ result = []
+ for i in xrange(char_count):
+ ch = self._read_char(True)
+ if not ch:
+ raise_yson_error(
+ "Premature end-of-stream while reading byte {0} out of {1}".format(i + 1, char_count),
+ self.get_position_info())
+ result.append(ch)
+ return b"".join(result)
+
+ def _expect_char(self, expected_ch):
+ read_ch = self._read_char()
+ if not read_ch:
+ raise_yson_error(
+ 'Premature end-of-stream expecting "{0}" in Yson'.format(expected_ch),
+ self.get_position_info())
+ if read_ch != expected_ch:
+ raise_yson_error(
+ 'Found "{0}" while expecting "{1}" in Yson'.format(read_ch, expected_ch),
+ self.get_position_info())
+
+ def _skip_whitespaces(self):
+ while self._peek_char().isspace():
+ self._read_char()
+
+ def _read_string(self):
+ ch = self._peek_char()
+ if not ch:
+ raise_yson_error(
+ "Premature end-of-stream while expecting string literal in Yson",
+ self.get_position_info())
+ if ch == STRING_MARKER:
+ return self._read_binary_string()
+ if ch == b'"':
+ return self._read_quoted_string()
+ if not ch.isalpha() and not ch == b"_" and not ch == b"%":
+ raise_yson_error(
+ "Expecting string literal but found {0} in Yson".format(ch),
+ self.get_position_info())
+ return self._read_unquoted_string()
+
+ def _read_binary_string(self):
+ self._expect_char(STRING_MARKER)
+ length = _zig_zag_decode(self._read_varint())
+ string = self._read_binary_chars(length)
+ if self._output_buffer is None:
+ return self._decode_string(string)
+
+ def _read_varint(self):
+ count = 0
+ result = 0
+ read_next = True
+ while read_next:
+ ch = self._read_char()
+ if not ch:
+ raise_yson_error(
+ "Premature end-of-stream while reading varinteger in Yson",
+ self.get_position_info())
+ byte = ord(ch)
+ result |= (byte & 0x7F) << (7 * count)
+ if result > 2 ** 64 - 1:
+ raise_yson_error(
+ "Varinteger is too large for Int64 in Yson",
+ self.get_position_info())
+ count += 1
+ read_next = byte & 0x80 != 0
+
+ return yson_types._YsonIntegerBase(result)
+
+ def _read_quoted_string(self):
+ self._expect_char(b'"')
+ if self._output_buffer is None:
+ result = []
+ pending_next_char = False
+ while True:
+ ch = self._read_char()
+ if not ch:
+ raise_yson_error(
+ "Premature end-of-stream while reading string literal in Yson",
+ self.get_position_info())
+ if ch == b'"' and not pending_next_char:
+ break
+ if self._output_buffer is None:
+ result.append(ch)
+ if pending_next_char:
+ pending_next_char = False
+ elif ch == b"\\":
+ pending_next_char = True
+ if self._output_buffer is None:
+ return self._decode_string(self._unescape(b"".join(result)))
+
+ def _unescape(self, string):
+ return string.decode("unicode_escape").encode("latin1")
+
+ def _decode_string(self, string):
+ assert self._encoding is not _ENCODING_SENTINEL
+ if self._encoding is not None:
+ try:
+ return string.decode(self._encoding)
+ except UnicodeDecodeError:
+ proxy = yson_types.YsonStringProxy()
+ proxy._bytes = string
+ return proxy
+ else:
+ return string
+
+ def _read_unquoted_string(self):
+ if self._output_buffer is None:
+ result = []
+ while True:
+ ch = self._peek_char()
+ if ch and (ch.isalpha() or ch.isdigit() or ch in b"_%-."):
+ self._read_char()
+ if self._output_buffer is None:
+ result.append(ch)
+ else:
+ break
+ if self._output_buffer is None:
+ return self._decode_string(b"".join(result))
+
+ def _read_numeric(self):
+ result = []
+ while True:
+ ch = self._peek_char()
+ if not ch or not (ch.isdigit() or ch in b"+-.eEu"):
+ break
+ self._read_char()
+ result.append(ch)
+ if not result:
+ raise_yson_error(
+ "Premature end-of-stream while parsing numeric literal in Yson",
+ self.get_position_info())
+ return b"".join(result)
+
+ def _parse_percent_literal(self):
+ def raise_unexpected(string):
+ expected = [b"%" + literal for literal in PERCENT_LITERALS]
+ raise_yson_error(
+ "Incorrect percent-preceded literal %s, expected one of %s" % (b"%" + string, expected),
+ self.get_position_info())
+ self._expect_char(b"%")
+ ch = self._peek_char()
+ if ch not in PERCENT_LITERAL_LENGTH:
+ raise_unexpected(ch)
+ string = self._read_binary_chars(PERCENT_LITERAL_LENGTH[ch])
+ if string == b"true":
+ return True, TOKEN_BOOLEAN
+ elif string == b"false":
+ return False, TOKEN_BOOLEAN
+ elif string in PERCENT_LITERALS:
+ return float(string), TOKEN_DOUBLE
+ else:
+ raise_unexpected(string)
+
+ def _parse_entity(self):
+ self._expect_char(b"#")
+ return None
+
+ def _parse_string(self):
+ return self._read_string()
+
+ def _parse_binary_int64(self):
+ self._expect_char(INT64_MARKER)
+ varint = self._read_varint()
+ if self._output_buffer is None:
+ return _zig_zag_decode(varint)
+
+ def _parse_binary_uint64(self):
+ self._expect_char(UINT64_MARKER)
+ varint = self._read_varint()
+ if self._output_buffer is None:
+ return yson_types.YsonUint64(varint)
+
+ def _parse_binary_double(self):
+ self._expect_char(DOUBLE_MARKER)
+ bytes_ = self._read_binary_chars(struct.calcsize(b"<d"))
+ if self._output_buffer is None:
+ return struct.unpack(b"<d", bytes_)[0]
+
+ def _parse_numeric(self):
+ string = self._read_numeric()
+ numeric_type = _get_numeric_type(string)
+ if numeric_type == _SEEMS_INT64:
+ try:
+ result = yson_types._YsonIntegerBase(string)
+ token_type = TOKEN_INT64
+ if result > 2 ** 63 - 1 or result < -(2 ** 63):
+ raise ValueError()
+ except ValueError:
+ raise_yson_error(
+ "Failed to parse Int64 literal {0} in Yson".format(string),
+ self.get_position_info())
+ elif numeric_type == _SEEMS_UINT64:
+ try:
+ if string.endswith(b"u"):
+ string = string[:-1]
+ else:
+ raise ValueError()
+ result = yson_types.YsonUint64(int(string))
+ token_type = TOKEN_UINT64
+ if result > 2 ** 64 - 1:
+ raise ValueError()
+ except ValueError:
+ raise_yson_error(
+ "Failed to parse Uint64 literal {0} in Yson".format(string),
+ self.get_position_info())
+ else:
+ try:
+ result = float(string)
+ token_type = TOKEN_DOUBLE
+ except ValueError:
+ raise_yson_error(
+ "Failed to parse Double literal {0} in Yson".format(string),
+ self.get_position_info())
+ return result, token_type
diff --git a/yt/python/yt/yson/parser.py b/yt/python/yt/yson/parser.py
new file mode 100644
index 00000000000..746ee6806bf
--- /dev/null
+++ b/yt/python/yt/yson/parser.py
@@ -0,0 +1,287 @@
+from . import convert
+from .common import raise_yson_error, YsonError, StreamWrap, _ENCODING_SENTINEL
+from .tokenizer import YsonTokenizer
+from .yson_token import (
+ TOKEN_STRING,
+ TOKEN_INT64,
+ TOKEN_UINT64,
+ TOKEN_DOUBLE,
+ TOKEN_BOOLEAN,
+ TOKEN_HASH,
+ TOKEN_SEMICOLON,
+ TOKEN_LEFT_ANGLE,
+ TOKEN_EQUALS,
+ TOKEN_RIGHT_ANGLE,
+ TOKEN_LEFT_BRACKET,
+ TOKEN_RIGHT_BRACKET,
+ TOKEN_LEFT_BRACE,
+ TOKEN_RIGHT_BRACE,
+ TOKEN_START_OF_STREAM,
+ TOKEN_END_OF_STREAM,
+)
+
+try:
+ from yt.packages.six import PY3, BytesIO, text_type
+except ImportError:
+ from six import PY3, BytesIO, text_type
+
+
+def _is_text_reader(stream):
+ return type(stream.read(0)) is text_type
+
+
+class YsonParser(object):
+ def __init__(self, stream, encoding, always_create_attributes):
+ # COMPAT: Before porting YSON to Python 3 it supported parsing from
+ # unicode strings.
+ if _is_text_reader(stream) and PY3:
+ raise TypeError("Only binary streams are supported by YSON parser")
+ self._tokenizer = YsonTokenizer(stream, encoding)
+ self._always_create_attributes = always_create_attributes
+ self._encoding = encoding
+
+ def _has_attributes(self):
+ try:
+ self._tokenizer.parse_next()
+ except YsonError:
+ return False
+ return self._tokenizer.get_current_type() == TOKEN_LEFT_ANGLE
+
+ def _parse_attributes(self):
+ self._tokenizer.get_current_token().expect_type(TOKEN_LEFT_ANGLE)
+ result = {}
+ while True:
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == TOKEN_RIGHT_ANGLE:
+ break
+ self._tokenizer.get_current_token().expect_type(TOKEN_STRING)
+ key = self._tokenizer.get_current_token().get_value()
+ if not key:
+ raise_yson_error(
+ "Empty attribute name in Yson",
+ self._tokenizer.get_position_info())
+ self._tokenizer.parse_next()
+ self._tokenizer.get_current_token().expect_type(TOKEN_EQUALS)
+ self._tokenizer.parse_next()
+ value = self._parse_any()
+ if key in result:
+ raise_yson_error(
+ 'Repeated attribute "{0}" in Yson'.format(key),
+ self._tokenizer.get_position_info())
+ result[key] = value
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == TOKEN_RIGHT_ANGLE:
+ break
+ self._tokenizer.get_current_token().expect_type(TOKEN_SEMICOLON)
+ self._tokenizer.get_current_token().expect_type(TOKEN_RIGHT_ANGLE)
+ return result
+
+ def _parse_list(self):
+ self._tokenizer.get_current_token().expect_type(TOKEN_LEFT_BRACKET)
+ result = []
+ while True:
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == TOKEN_RIGHT_BRACKET:
+ break
+ value = self._parse_any()
+ result.append(value)
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == TOKEN_RIGHT_BRACKET:
+ break
+ self._tokenizer.get_current_token().expect_type(TOKEN_SEMICOLON)
+ self._tokenizer.get_current_token().expect_type(TOKEN_RIGHT_BRACKET)
+ return result
+
+ def _parse_map(self):
+ self._tokenizer.get_current_token().expect_type(TOKEN_LEFT_BRACE)
+ result = {}
+ while True:
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == TOKEN_RIGHT_BRACE:
+ break
+ self._tokenizer.get_current_token().expect_type(TOKEN_STRING)
+ key = self._tokenizer.get_current_token().get_value()
+ self._tokenizer.parse_next()
+ self._tokenizer.get_current_token().expect_type(TOKEN_EQUALS)
+ self._tokenizer.parse_next()
+ value = self._parse_any()
+ if key in result:
+ raise_yson_error(
+ 'Duplicate map key "{0}" in YSON'.format(key),
+ self._tokenizer.get_position_info())
+ result[key] = value
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == TOKEN_RIGHT_BRACE:
+ break
+ self._tokenizer.get_current_token().expect_type(TOKEN_SEMICOLON)
+ self._tokenizer.get_current_token().expect_type(TOKEN_RIGHT_BRACE)
+ return result
+
+ def _parse_any(self):
+ if self._tokenizer.get_current_type() == TOKEN_START_OF_STREAM:
+ self._tokenizer.parse_next()
+ attributes = None
+ if self._tokenizer.get_current_type() == TOKEN_LEFT_ANGLE:
+ attributes = self._parse_attributes()
+ self._tokenizer.parse_next()
+
+ if self._tokenizer.get_current_type() == TOKEN_END_OF_STREAM:
+ raise_yson_error(
+ "Premature end-of-stream in Yson",
+ self._tokenizer.get_position_info())
+
+ if self._tokenizer.get_current_type() == TOKEN_LEFT_BRACKET:
+ result = self._parse_list()
+
+ elif self._tokenizer.get_current_type() == TOKEN_LEFT_BRACE:
+ result = self._parse_map()
+
+ elif self._tokenizer.get_current_type() == TOKEN_HASH:
+ result = None
+
+ else:
+ self._tokenizer.get_current_token().expect_type((TOKEN_BOOLEAN, TOKEN_INT64, TOKEN_UINT64,
+ TOKEN_STRING, TOKEN_DOUBLE))
+ result = self._tokenizer.get_current_token().get_value()
+
+ return convert.to_yson_type(
+ result,
+ attributes=attributes,
+ always_create_attributes=self._always_create_attributes,
+ encoding=self._encoding,
+ )
+
+ def parse(self):
+ result = self._parse_any()
+ self._tokenizer.parse_next()
+ self._tokenizer.get_current_token().expect_type(TOKEN_END_OF_STREAM)
+ return result
+
+
+class RawYsonParser(object):
+ def __init__(self, stream):
+ if _is_text_reader(stream) and PY3:
+ raise TypeError("Only binary streams are supported by YSON parser")
+ self._buffer = bytearray()
+ self._tokenizer = YsonTokenizer(stream, output_buffer=self._buffer)
+
+ def _parse_mapping(self, end_token):
+ while True:
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == end_token:
+ break
+ self._tokenizer.get_current_token().expect_type(TOKEN_STRING)
+ self._tokenizer.parse_next()
+ self._tokenizer.get_current_token().expect_type(TOKEN_EQUALS)
+ self._tokenizer.parse_next()
+ self._parse_any()
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == end_token:
+ break
+ self._tokenizer.get_current_token().expect_type(TOKEN_SEMICOLON)
+ self._tokenizer.get_current_token().expect_type(end_token)
+
+ def _parse_attributes(self):
+ self._tokenizer.get_current_token().expect_type(TOKEN_LEFT_ANGLE)
+ self._parse_mapping(TOKEN_RIGHT_ANGLE)
+
+ def _parse_map(self):
+ self._tokenizer.get_current_token().expect_type(TOKEN_LEFT_BRACE)
+ self._parse_mapping(TOKEN_RIGHT_BRACE)
+
+ def _parse_list(self):
+ self._tokenizer.get_current_token().expect_type(TOKEN_LEFT_BRACKET)
+ while True:
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == TOKEN_RIGHT_BRACKET:
+ break
+ self._parse_any()
+ self._tokenizer.parse_next()
+ if self._tokenizer.get_current_type() == TOKEN_RIGHT_BRACKET:
+ break
+ self._tokenizer.get_current_token().expect_type(TOKEN_SEMICOLON)
+ self._tokenizer.get_current_token().expect_type(TOKEN_RIGHT_BRACKET)
+
+ def _parse_any(self):
+ if self._tokenizer.get_current_type() == TOKEN_START_OF_STREAM:
+ self._tokenizer.parse_next()
+
+ if self._tokenizer.get_current_type() == TOKEN_LEFT_ANGLE:
+ self._parse_attributes()
+ self._tokenizer.parse_next()
+
+ if self._tokenizer.get_current_type() == TOKEN_END_OF_STREAM:
+ raise_yson_error(
+ "Premature end-of-stream in Yson",
+ self._tokenizer.get_position_info())
+
+ if self._tokenizer.get_current_type() == TOKEN_LEFT_BRACKET:
+ self._parse_list()
+
+ elif self._tokenizer.get_current_type() == TOKEN_LEFT_BRACE:
+ self._parse_map()
+
+ elif self._tokenizer.get_current_type() == TOKEN_HASH:
+ pass
+
+ else:
+ self._tokenizer.get_current_token().expect_type((TOKEN_BOOLEAN, TOKEN_INT64, TOKEN_UINT64,
+ TOKEN_STRING, TOKEN_DOUBLE))
+
+ def _flush_buffer(self):
+ res = bytes(self._buffer)
+ self._buffer[:] = b''
+ return res
+
+ def parse(self):
+ while self._tokenizer.get_current_type() != TOKEN_END_OF_STREAM:
+ self._parse_any()
+ self._tokenizer.parse_next()
+ self._tokenizer.get_current_token().expect_type(TOKEN_SEMICOLON)
+ yield self._flush_buffer()
+ self._tokenizer.parse_next()
+
+
+def load(stream, yson_type=None, always_create_attributes=True, raw=None,
+ encoding=_ENCODING_SENTINEL, lazy=False):
+ """Deserializes object from YSON formatted stream `stream`.
+
+ :param str yson_type: type of YSON, one of ["node", "list_fragment", "map_fragment"].
+ """
+ if lazy:
+ raise YsonError("Lazy parsing is not supported in python parser")
+
+ if raw:
+ if yson_type != "list_fragment":
+ raise YsonError("Raw mode is only supported for list fragments")
+ return RawYsonParser(stream).parse()
+
+ if not PY3 and encoding is not _ENCODING_SENTINEL and encoding is not None:
+ raise YsonError("Encoding parameter is not supported for Python 2")
+
+ if encoding is _ENCODING_SENTINEL:
+ if PY3:
+ encoding = "utf-8"
+ else:
+ encoding = None
+
+ if yson_type == "list_fragment":
+ stream = StreamWrap(stream, b"[", b"]")
+ elif yson_type == "map_fragment":
+ stream = StreamWrap(stream, b"{", b"}")
+ else:
+ if yson_type is not None:
+ raise YsonError("Unexpected yson type: {0!r}".format(yson_type))
+
+ parser = YsonParser(stream, encoding, always_create_attributes)
+ return parser.parse()
+
+
+def loads(string, yson_type=None, always_create_attributes=True, raw=None,
+ encoding=_ENCODING_SENTINEL, lazy=False):
+ """Deserializes object from YSON formatted string `string`. See :func:`load <.load>`."""
+ if type(string) is text_type and PY3:
+ raise TypeError("Only binary streams are supported by YSON parser")
+ return load(BytesIO(string), yson_type=yson_type,
+ always_create_attributes=always_create_attributes,
+ raw=raw, encoding=encoding, lazy=lazy)
diff --git a/yt/python/yt/yson/tokenizer.py b/yt/python/yt/yson/tokenizer.py
new file mode 100644
index 00000000000..eb4d5a77a92
--- /dev/null
+++ b/yt/python/yt/yson/tokenizer.py
@@ -0,0 +1,21 @@
+from .yson_token import YsonToken, TOKEN_START_OF_STREAM
+from .lexer import YsonLexer
+from .common import _ENCODING_SENTINEL
+
+
+class YsonTokenizer(object):
+ def __init__(self, input_str, encoding=_ENCODING_SENTINEL, output_buffer=None):
+ self._token = YsonToken(type=TOKEN_START_OF_STREAM)
+ self._lexer = YsonLexer(input_str, encoding=encoding, output_buffer=output_buffer)
+
+ def parse_next(self):
+ self._token = self._lexer.get_next_token()
+
+ def get_current_token(self):
+ return self._token
+
+ def get_current_type(self):
+ return self.get_current_token().get_type()
+
+ def get_position_info(self):
+ return self._lexer.get_position_info()
diff --git a/yt/python/yt/yson/writer.py b/yt/python/yt/yson/writer.py
new file mode 100644
index 00000000000..5b0511a0236
--- /dev/null
+++ b/yt/python/yt/yson/writer.py
@@ -0,0 +1,442 @@
+# -*- coding: utf-8 -*-
+
+"""``yson`` exposes an API familiar to users of the standard library
+:mod:`marshal` and :mod:`pickle` modules.
+
+Serializable types (rules applied top to bottom):
+int, long -> yson int
+str -> yson string
+unicode -> yson string (using specified encoding)
+any Mapping -> yson dict
+any Iterable -> yson list
+
+Simple examples::
+
+ >>> import yson
+ >>> b = [4, 5, 6]
+ >>> print yson.dumps({"a" : b, "b" : b}, indent=" ")
+ {
+ "a" : [
+ 4;
+ 5;
+ 6;
+ ];
+ "b" : [
+ 4;
+ 5;
+ 6;
+ ];
+ }
+ >>> print yson.dumps(("a", "a"), indent=" ")
+ [
+ "a";
+ "a";
+ ]
+ >>> print yson.dumps(123456)
+ 123456
+ >>> print yson.dumps(u'"Hello world!" -- "Превед, медвед!"')
+ "\"Hello world!\" -- \"\xd0\x9f\xd1\x80\xd0\xb5\xd0\xb2\xd0\xb5\xd0\xb4, \xd0\xbc\xd0\xb5\xd0\xb4\xd0\xb2\xd0\xb5\xd0\xb4!\""
+"""
+
+from .common import (YsonError,
+ STRING_MARKER, INT64_MARKER, DOUBLE_MARKER,
+ FALSE_MARKER, TRUE_MARKER, UINT64_MARKER)
+from . import yson_types
+
+try:
+ from yt.packages.six.moves import map as imap
+ from yt.packages.six import (integer_types, text_type, binary_type,
+ iteritems, iterkeys, iterbytes, PY3)
+except ImportError:
+ from six.moves import map as imap
+ from six import (integer_types, text_type, binary_type,
+ iteritems, iterkeys, iterbytes, PY3)
+
+import math
+import struct
+# Python3 compatibility
+try:
+ from collections.abc import Iterable, Mapping
+except ImportError:
+ from collections import Iterable, Mapping
+
+__all__ = ["dump", "dumps"]
+
+
+def _is_hex_digit(c):
+ return ord(b'0') <= c <= ord(b'9') or ord(b'A') <= c <= ord(b'F') or ord(b'a') <= c <= ord(b'f')
+
+
+def _is_oct_digit(c):
+ return ord(b'0') <= c <= ord(b'7')
+
+
+def _escape_byte(c, nxt, r):
+ if c == ord(b"\""):
+ r += b"\\\""
+ elif c == ord(b"\\"):
+ r += b"\\\\"
+ elif 32 <= c <= 126:
+ r.append(c)
+ elif c == ord(b"\r"):
+ r += b"\\r"
+ elif c == ord(b"\n"):
+ r += b"\\n"
+ elif c == ord(b"\t"):
+ r += b"\\t"
+ elif c < 8 and not _is_oct_digit(nxt):
+ r += '\\{}'.format(c).encode("ascii")
+ elif not _is_hex_digit(nxt):
+ r += '\\x{:02X}'.format(c).encode("ascii")
+ else:
+ r += '\\{:03o}'.format(c).encode("ascii")
+
+
+def _escape_bytes(obj):
+ if len(obj) == 0:
+ return b""
+
+ res = bytearray()
+ iterator = iterbytes(obj)
+ cur = next(iterator)
+ for nxt in iterator:
+ _escape_byte(cur, nxt, res)
+ cur = nxt
+ _escape_byte(cur, ord(b" "), res)
+ return bytes(res)
+
+
+def dump(object, stream, yson_format=None, yson_type=None, indent=None,
+ ignore_inner_attributes=False, encoding="utf-8", sort_keys=False,
+ check_circular=True):
+ """Serializes `object` as a YSON formatted stream to `stream`.
+
+ :param str yson_format: format of YSON, one of ["binary", "text", "pretty"].
+ :param str yson_type: type of YSON, one of ["node", "list_fragment", "map_fragment"].
+ :param int indent: number of indentation spaces in pretty format.
+ :param bool ignore_inner_attributes: skip attributes of non-top-level values.
+ :param str encoding: encoding that uses to encode unicode strings.
+ :param bool sort_keys: if True, mapping items are printed in sorted order.
+ :param bool check_circular: prevent the attempt to serialize an object with reference loop.
+ """
+
+ stream.write(dumps(object, yson_format=yson_format, yson_type=yson_type, indent=indent,
+ ignore_inner_attributes=ignore_inner_attributes,
+ encoding=encoding, sort_keys=sort_keys,
+ check_circular=check_circular))
+
+
+class YsonContext(object):
+ def __init__(self):
+ self.path_parts = []
+ self.row_index = None
+
+ def push(self, key_or_index):
+ self.path_parts.append(key_or_index)
+
+ def pop(self):
+ self.path_parts.pop()
+
+
+def _raise_error_with_context(message, context):
+ attributes = {}
+ if context.row_index is not None:
+ attributes["row_index"] = context.row_index
+
+ path_parts = imap(str, context.path_parts)
+ if context.path_parts:
+ attributes["row_key_path"] = "/" + "/".join(path_parts)
+ raise YsonError(message, attributes=attributes)
+
+
+def _zig_zag_encode(value):
+ return (value >> 63) ^ (value << 1)
+
+
+def _dump_varint(value):
+ assert 0 <= value <= 2 ** 64 - 1
+ result = bytearray()
+ while value >= 0x80:
+ result.append(0x80 | (value & 0x7F))
+ value >>= 7
+ result.append(value)
+ return bytes(result)
+
+
+def dumps(object, yson_format=None, yson_type=None, indent=None,
+ ignore_inner_attributes=False, encoding="utf-8", sort_keys=False,
+ check_circular=True):
+ """Serializes `object` as a YSON formatted stream to string and returns it. See :func:`dump <.dump>`."""
+ if indent is None:
+ indent = 4
+ if isinstance(indent, int):
+ indent = b" " * indent
+ if yson_format is None:
+ yson_format = "text"
+ if yson_format not in ("pretty", "text", "binary"):
+ raise YsonError("{0} format is not supported".format(yson_format))
+ if yson_format in ("text", "binary"):
+ indent = None
+ if yson_type is not None:
+ if yson_type not in ["list_fragment", "map_fragment", "node"]:
+ raise YsonError("YSON type {0} is not supported".format(yson_type))
+ else:
+ yson_type = "node"
+
+ is_text = yson_format in ("text", "pretty")
+ d = Dumper(check_circular, encoding, indent, yson_type, sort_keys,
+ is_text=is_text, ignore_inner_attributes=ignore_inner_attributes)
+ return d.dumps(object, YsonContext())
+
+
+class Dumper(object):
+ def __init__(self, check_circular, encoding, indent, yson_type, sort_keys,
+ is_text, ignore_inner_attributes):
+ self.yson_type = yson_type
+
+ self._seen_objects = None
+ if check_circular:
+ self._seen_objects = {}
+
+ self._encoding = encoding
+ self._format = FormatDetails(indent, sort_keys)
+ if yson_type == "node":
+ self._level = -1
+ else:
+ self._level = -2 # Stream elements are one level deep, but need not be indented
+
+ self._is_text = is_text
+ self._ignore_inner_attributes = ignore_inner_attributes
+
+ def _has_attributes(self, obj):
+ if hasattr(obj, "has_attributes"):
+ return obj.has_attributes()
+ return hasattr(obj, "attributes")
+
+ def dumps(self, obj, context):
+ if hasattr(obj, "to_yson_type") and callable(obj.to_yson_type):
+ return self.dumps(obj.to_yson_type(), context)
+ self._level += 1
+ attributes = b""
+ if self._has_attributes(obj) and (not self._ignore_inner_attributes or self._level == 0):
+ if not isinstance(obj.attributes, dict):
+ _raise_error_with_context('Invalid field "attributes": it must be string or None', context)
+ if obj.attributes:
+ attributes = self._dump_attributes(obj.attributes, context)
+
+ result = None
+ if obj is False or (isinstance(obj, yson_types.YsonBoolean) and not obj):
+ if self._is_text:
+ result = b"%false"
+ else:
+ result = FALSE_MARKER
+ elif obj is True or (isinstance(obj, yson_types.YsonBoolean) and obj):
+ if self._is_text:
+ result = b"%true"
+ else:
+ result = TRUE_MARKER
+ elif isinstance(obj, integer_types):
+ if obj < -2 ** 63 or obj >= 2 ** 64:
+ _raise_error_with_context("Integer {0} cannot be represented in YSON "
+ "since it is out of range [-2^63, 2^64 - 1])".format(obj), context)
+
+ greater_than_max_int64 = obj >= 2 ** 63
+ if isinstance(obj, yson_types.YsonUint64) and obj < 0:
+ _raise_error_with_context("Can not dump negative integer as YSON uint64", context)
+ if isinstance(obj, yson_types.YsonInt64) and greater_than_max_int64:
+ _raise_error_with_context("Can not dump integer greater than 2^63-1 as YSON int64", context)
+
+ result = self._dump_integer(obj, greater_than_max_int64)
+ elif isinstance(obj, float):
+ result = self._dump_float(obj)
+ elif isinstance(obj, (text_type, binary_type, yson_types.YsonStringProxy)):
+ result = self._dump_string(obj, context)
+ elif isinstance(obj, Mapping):
+ result = self._dump_map(obj, context)
+ elif isinstance(obj, Iterable):
+ result = self._dump_list(obj, context)
+ elif isinstance(obj, yson_types.YsonEntity) or obj is None:
+ result = b"#"
+ else:
+ _raise_error_with_context("{0!r} is not Yson serializable".format(obj), context)
+ self._level -= 1
+ return attributes + result
+
+ def _dump_integer(self, obj, force_uint64):
+ if self._is_text:
+ if isinstance(obj, (yson_types.YsonInt64, yson_types.YsonUint64)):
+ obj_str = str(yson_types._YsonIntegerBase(obj))
+ else:
+ obj_str = str(obj)
+
+ result = obj_str.encode("ascii")
+ if not PY3:
+ result = result.rstrip(b"L")
+ if force_uint64 or isinstance(obj, yson_types.YsonUint64):
+ result += b"u"
+ else:
+ if force_uint64 or isinstance(obj, yson_types.YsonUint64):
+ result = UINT64_MARKER + _dump_varint(obj)
+ else:
+ result = INT64_MARKER + _dump_varint(_zig_zag_encode(obj))
+ return result
+
+ def _dump_float(self, obj):
+ if self._is_text:
+ if math.isnan(obj):
+ result = b"%nan"
+ elif math.isinf(obj):
+ if obj > 0:
+ result = b"%inf"
+ else:
+ result = b"%-inf"
+ else:
+ if type(obj) == yson_types.YsonDouble:
+ obj_str = str(float(obj))
+ else:
+ obj_str = str(obj)
+ result = obj_str.encode("ascii")
+ else:
+ result = DOUBLE_MARKER + struct.pack("<d", obj)
+ return result
+
+ def _dump_string(self, obj, context):
+ if isinstance(obj, binary_type):
+ result = obj
+ elif isinstance(obj, text_type):
+ if self._encoding is None:
+ _raise_error_with_context('Cannot encode unicode object {0!r} to bytes since "encoding" '
+ 'parameter is None. Consider using byte strings '
+ 'instead or specify encoding'.format(obj),
+ context)
+ result = obj.encode(self._encoding)
+ elif isinstance(obj, yson_types.YsonStringProxy):
+ result = obj._bytes
+ else:
+ assert False
+ if self._is_text:
+ return b"".join([b'"', _escape_bytes(result), b'"'])
+ else:
+ encoded_len = _zig_zag_encode(len(result))
+ return b"".join([STRING_MARKER, _dump_varint(encoded_len), result])
+
+ def _dump_map(self, obj, context):
+ is_stream = self.yson_type == "map_fragment" and self._level == -1
+ result = []
+ if not is_stream:
+ result += [b"{", self._format.nextline()]
+
+ for k, v in self._format.mapping_iter(obj):
+ if not isinstance(k, (text_type, binary_type, yson_types.YsonStringProxy)):
+ _raise_error_with_context("Only string can be Yson map key. Key: {0!r}".format(k), context)
+
+ @self._circular_check(v)
+ def process_item():
+ context.push(k)
+ item = [self._format.prefix(self._level + 1),
+ self._dump_string(k, context), self._format.space(), b"=",
+ self._format.space(), self.dumps(v, context), b";", self._format.nextline(is_stream)]
+ context.pop()
+ return item
+
+ result += process_item()
+
+ if not is_stream:
+ result += [self._format.prefix(self._level), b"}"]
+
+ return b"".join(result)
+
+ def _dump_list(self, obj, context):
+ is_stream = self.yson_type == "list_fragment" and self._level == -1
+ result = []
+ if not is_stream:
+ result += [b"[", self._format.nextline()]
+
+ for index, v in enumerate(obj):
+ @self._circular_check(v)
+ def process_item():
+ if is_stream:
+ context.row_index = index
+ else:
+ context.push(index)
+ item = [self._format.prefix(self._level + 1),
+ self.dumps(v, context), b";", self._format.nextline(is_stream)]
+ if not is_stream:
+ context.pop()
+ return item
+
+ result += process_item()
+
+ if not is_stream:
+ result += [self._format.prefix(self._level), b"]"]
+
+ return b"".join(result)
+
+ def _dump_attributes(self, obj, context):
+ result = [b"<", self._format.nextline()]
+ for k, v in obj.items():
+ if not isinstance(k, (text_type, binary_type)):
+ _raise_error_with_context("Only string can be Yson map key. Key: {0!r}".format(obj), context)
+
+ @self._circular_check(v)
+ def process_item():
+ context.push("@" + k)
+ item = [self._format.prefix(self._level + 1),
+ self._dump_string(k, context), self._format.space(), b"=",
+ self._format.space(), self.dumps(v, context), b";", self._format.nextline()]
+ context.pop()
+ return item
+
+ result += process_item()
+ result += [self._format.prefix(self._level), b">"]
+ return b"".join(result)
+
+ def _circular_check(self, obj):
+ def decorator(fn):
+ def wrapper(*args, **kwargs):
+ obj_id = None
+ if self._seen_objects is not None:
+ obj_id = id(obj)
+ if obj_id in self._seen_objects:
+ raise YsonError("Circular reference detected. Object: {0!r}".format(obj))
+ else:
+ self._seen_objects[obj_id] = obj
+
+ result = fn(*args, **kwargs)
+
+ if self._seen_objects:
+ del self._seen_objects[obj_id]
+ return result
+
+ return wrapper
+ return decorator
+
+
+class FormatDetails(object):
+ def __init__(self, indent, sort_keys=False):
+ self._indent = indent
+ self._sort_keys = sort_keys
+
+ def prefix(self, level):
+ if self._indent:
+ return b"".join([self._indent] * level)
+ else:
+ return b""
+
+ def nextline(self, force=False):
+ if force or self._indent:
+ return b"\n"
+ else:
+ return b""
+
+ def space(self):
+ if self._indent:
+ return b" "
+ else:
+ return b""
+
+ def mapping_iter(self, mapping):
+ if self._sort_keys:
+ return ((key, mapping[key]) for key in sorted(iterkeys(mapping)))
+ else:
+ return iteritems(mapping)
diff --git a/yt/python/yt/yson/ya.make b/yt/python/yt/yson/ya.make
new file mode 100644
index 00000000000..37bf8a4046a
--- /dev/null
+++ b/yt/python/yt/yson/ya.make
@@ -0,0 +1,23 @@
+PY23_LIBRARY()
+
+PEERDIR(
+ yt/python/yt
+
+ contrib/python/six
+)
+
+PY_SRCS(
+ NAMESPACE yt.yson
+
+ __init__.py
+ common.py
+ convert.py
+ lexer.py
+ parser.py
+ tokenizer.py
+ writer.py
+ yson_token.py
+ yson_types.py
+)
+
+END()
diff --git a/yt/python/yt/yson/yson_token.py b/yt/python/yt/yson/yson_token.py
new file mode 100644
index 00000000000..ef230f72031
--- /dev/null
+++ b/yt/python/yt/yson/yson_token.py
@@ -0,0 +1,155 @@
+from .common import YsonError
+
+from yt.common import flatten
+
+try:
+ from yt.packages.six.moves import map as imap
+ from yt.packages.six import PY3
+except ImportError:
+ from six.moves import map as imap
+ from six import PY3
+
+import string
+
+
+TOKEN_LITERAL = 0
+TOKEN_SLASH = 1
+TOKEN_AMPERSAND = 2
+TOKEN_AT = 3
+TOKEN_ASTERISK = 4
+TOKEN_START_OF_STREAM = 5
+TOKEN_END_OF_STREAM = 6
+TOKEN_RANGE = 7
+TOKEN_SEMICOLON = 8
+TOKEN_EQUALS = 9
+TOKEN_LEFT_BRACE = 10
+TOKEN_RIGHT_BRACE = 11
+TOKEN_HASH = 12
+TOKEN_LEFT_BRACKET = 13
+TOKEN_RIGHT_BRACKET = 14
+TOKEN_LEFT_ANGLE = 15
+TOKEN_RIGHT_ANGLE = 16
+TOKEN_LEFT_PARENTHESIS = 17
+TOKEN_RIGHT_PARENTHESIS = 18
+TOKEN_COLON = 19
+TOKEN_COMMA = 20
+TOKEN_STRING = 21
+TOKEN_INT64 = 22
+TOKEN_UINT64 = 23
+TOKEN_DOUBLE = 24
+TOKEN_BOOLEAN = 25
+TOKEN_SPECIAL = 26
+
+CHAR_TO_TOKEN_TYPE = {
+ ";": TOKEN_SEMICOLON,
+ "=": TOKEN_EQUALS,
+ "{": TOKEN_LEFT_BRACE,
+ "}": TOKEN_RIGHT_BRACE,
+ "#": TOKEN_HASH,
+ "[": TOKEN_LEFT_BRACKET,
+ "]": TOKEN_RIGHT_BRACKET,
+ "<": TOKEN_LEFT_ANGLE,
+ ">": TOKEN_RIGHT_ANGLE,
+ "(": TOKEN_LEFT_PARENTHESIS,
+ ")": TOKEN_RIGHT_PARENTHESIS,
+ ":": TOKEN_COLON,
+ ",": TOKEN_COMMA,
+ "/": TOKEN_SLASH,
+ "@": TOKEN_AT,
+ "&": TOKEN_AMPERSAND,
+ "*": TOKEN_ASTERISK
+}
+
+
+def char_to_token_type(char_or_byte):
+ if PY3:
+ char_or_byte = chr(char_or_byte)
+ if char_or_byte not in CHAR_TO_TOKEN_TYPE:
+ return TOKEN_END_OF_STREAM
+ return CHAR_TO_TOKEN_TYPE[char_or_byte]
+
+
+def token_type_to_string(token):
+ names = {
+ TOKEN_LITERAL: "Literal",
+ TOKEN_SLASH: "Slash",
+ TOKEN_AMPERSAND: "Ampersand",
+ TOKEN_AT: "At",
+ TOKEN_ASTERISK: "Asterisk",
+ TOKEN_START_OF_STREAM: "Start-of-stream",
+ TOKEN_END_OF_STREAM: "End-of-stream",
+ TOKEN_RANGE: "Range",
+ TOKEN_SEMICOLON: "Semicolon",
+ TOKEN_EQUALS: "Equals",
+ TOKEN_LEFT_BRACE: "Left-brace",
+ TOKEN_RIGHT_BRACE: "Right-brace",
+ TOKEN_HASH: "Hash",
+ TOKEN_LEFT_BRACKET: "Left-bracket",
+ TOKEN_RIGHT_BRACKET: "Right-bracket",
+ TOKEN_LEFT_ANGLE: "Left-angle",
+ TOKEN_RIGHT_ANGLE: "Right-angle",
+ TOKEN_LEFT_PARENTHESIS: "Left-parenthesis",
+ TOKEN_RIGHT_PARENTHESIS: "Right-parenthesis",
+ TOKEN_COLON: "Colon",
+ TOKEN_COMMA: "Comma",
+ TOKEN_STRING: "String",
+ TOKEN_INT64: "Int64",
+ TOKEN_UINT64: "Uint64",
+ TOKEN_DOUBLE: "Double",
+ TOKEN_BOOLEAN: "Boolean",
+ TOKEN_SPECIAL: "Special",
+ }
+ if token is None:
+ return "Unknown"
+ return names[token]
+
+
+def decode_token_value(value):
+ if not PY3 or not isinstance(value, bytes):
+ return value
+
+ chars = []
+ for byte in value:
+ char = chr(byte)
+ if char in string.printable: # whitespaces cannot present in token
+ chars.append(char)
+ else:
+ chars.append("\\x" + hex(byte)[2:])
+
+ return "".join(chars)
+
+
+class YsonToken(object):
+ def __init__(self, value="", type=TOKEN_END_OF_STREAM):
+ self._value = value
+ self._type = type
+
+ def get_value(self):
+ return self._value
+
+ def get_type(self):
+ return self._type
+
+ def _raise_error(self, message_end_of_stream, message_unexpected_token, token_type, value, expected_type):
+ if token_type == TOKEN_END_OF_STREAM:
+ raise YsonError(message_end_of_stream.format(expected_type))
+ else:
+ raise YsonError(message_unexpected_token.format(value, token_type_to_string(token_type), expected_type))
+
+ def expect_type(self, type_or_types):
+ token_type = self.get_type()
+ expected_types = flatten(type_or_types)
+ if token_type is None:
+ raise YsonError('Unexpected "{0}" while parsing node'.format(decode_token_value(self.get_value())))
+
+ if token_type not in expected_types:
+ if token_type == TOKEN_END_OF_STREAM:
+ raise YsonError("Unexpected end of stream; expected types are {0}".format(expected_types))
+ else:
+ raise YsonError('Unexpected token "{0}" of type {1}; '
+ 'expected types are {2}'.format(decode_token_value(self.get_value()),
+ token_type_to_string(token_type),
+ list(imap(token_type_to_string, expected_types))))
+
+ def __str__(self):
+ return str(self._value)
diff --git a/yt/python/yt/yson/yson_types.py b/yt/python/yt/yson/yson_types.py
new file mode 100644
index 00000000000..00dbcc55477
--- /dev/null
+++ b/yt/python/yt/yson/yson_types.py
@@ -0,0 +1,331 @@
+try:
+ from yt.packages.six import PY3, integer_types, binary_type, text_type
+except ImportError:
+ from six import PY3, integer_types, binary_type, text_type
+
+from yt.common import YtError
+
+
+class YsonType(object):
+ def __getattr__(self, attribute):
+ if attribute == "attributes":
+ self.__dict__[attribute] = {}
+ return self.__dict__[attribute]
+ raise AttributeError('Attribute "{0}" not found'.format(attribute))
+
+ def has_attributes(self):
+ try:
+ return "attributes" in self.__dict__ and self.attributes is not None and self.attributes != {}
+ except: # noqa
+ return False
+
+ def __eq__(self, other):
+ try:
+ has_attributes = other.has_attributes()
+ except AttributeError:
+ has_attributes = False
+ if has_attributes:
+ return self.attributes == other.attributes
+ return not self.has_attributes()
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def to_str(self, base_type, str_func):
+ if self.has_attributes():
+ return str_func({"value": base_type(self), "attributes": self.attributes})
+ return str_func(base_type(self))
+
+ def base_hash(self, type_):
+ if self.has_attributes():
+ raise TypeError("unhashable type: YSON has non-trivial attributes")
+ return hash(type_(self))
+
+
+class YsonString(binary_type, YsonType):
+ def __eq__(self, other):
+ # COMPAT: With implicit promotion of str to unicode it can make sense
+ # to compare binary YsonString to unicode string.
+ if not isinstance(other, (binary_type, text_type)):
+ return NotImplemented
+ return binary_type(self) == binary_type(other) and YsonType.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ return self.base_hash(binary_type)
+
+ def __repr__(self):
+ return self.to_str(binary_type, repr)
+
+
+class YsonUnicode(text_type, YsonType):
+ def __eq__(self, other):
+ if not isinstance(other, text_type):
+ return NotImplemented
+ return text_type(self) == text_type(other) and YsonType.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ return self.base_hash(text_type)
+
+ def __repr__(self):
+ return self.to_str(text_type, repr)
+
+
+class NotUnicodeError(YtError, TypeError):
+ pass
+
+
+def _truncate(s, length=50):
+ assert isinstance(s, bytes)
+ if len(s) < length:
+ return s
+ return s[:length] + b"..."
+
+
+def _make_raise_not_unicode_error(name):
+ def fun(self, *args, **kwargs):
+ raise NotUnicodeError('Method "{}" is not allowed: YSON string "{}" '
+ "could not be decoded to Unicode, "
+ "see https://ytsaurus.tech/docs/en/api/python/userdoc#python3_strings"
+ .format(name, _truncate(self._bytes)))
+ return fun
+
+
+def proxy(cls):
+ ALLOWED_METHODS = [
+ "get_bytes",
+ "is_unicode",
+ "__hash__",
+ "__eq__",
+ "__ne__",
+ "__repr__",
+ "__format__",
+ "__dict__",
+ "__qualname__",
+ "__class__",
+ "__mro__",
+ "__new__",
+ "__init__",
+ "__getattr__",
+ "__setattr__",
+ "__getattribute__",
+ "__copy__",
+ "__deepcopy__",
+ ]
+
+ ADDITIONAL_METHODS = [
+ "__radd__",
+ ]
+
+ for name in dir(text_type):
+ attr = getattr(text_type, name)
+ if callable(attr) and name not in ALLOWED_METHODS:
+ setattr(cls, name, _make_raise_not_unicode_error(name))
+ for name in ADDITIONAL_METHODS:
+ setattr(cls, name, _make_raise_not_unicode_error(name))
+ return cls
+
+
+# NB: This class is never returned by library in Python2.
+# NB: Don't create this class by hand, it should only be returned
+# from the library.
+@proxy
+class YsonStringProxy(YsonType):
+ def __repr__(self):
+ value = "<YsonStringProxy>{!r}".format(self._bytes)
+ if self.has_attributes():
+ return repr({"attributes": self.attributes, "value": value})
+ return value
+
+ def __format__(self, format_spec):
+ return repr(self)
+
+ def __copy__(self):
+ return self
+
+ def __deepcopy__(self, memo):
+ return self
+
+ def __hash__(self):
+ return hash(self._bytes)
+
+ def __eq__(self, other):
+ if isinstance(other, bytes):
+ return self._bytes == bytes(other) and YsonType.__eq__(self, other)
+ elif isinstance(other, YsonStringProxy):
+ return self._bytes == other._bytes and YsonType.__eq__(self, other)
+ else:
+ return NotImplemented
+
+ def __ne__(self, other):
+ return not (self == other)
+
+
+def is_unicode(x):
+ return isinstance(x, text_type)
+
+
+def get_bytes(x, encoding="utf8"):
+ if isinstance(x, text_type):
+ return x.encode(encoding)
+ elif isinstance(x, YsonStringProxy):
+ return x._bytes
+ elif isinstance(x, binary_type):
+ return x
+ else:
+ raise TypeError("get_bytes() expected str, bytes or YsonStringProxy, got <{}>{!r}"
+ .format(type(x), x))
+
+
+def make_byte_key(s):
+ proxy = YsonStringProxy()
+ proxy._bytes = s
+ return proxy
+
+
+if PY3:
+ _YsonIntegerBase = int
+else:
+ _YsonIntegerBase = long # noqa
+
+
+class YsonIntegerBase(_YsonIntegerBase, YsonType):
+ def __eq__(self, other):
+ if not isinstance(other, integer_types):
+ return NotImplemented
+ return _YsonIntegerBase(self) == _YsonIntegerBase(other) and YsonType.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ return self.base_hash(_YsonIntegerBase)
+
+ def __repr__(self):
+ return self.to_str(_YsonIntegerBase, repr)
+
+ def __str__(self):
+ return self.to_str(_YsonIntegerBase, str)
+
+
+class YsonInt64(YsonIntegerBase):
+ pass
+
+
+class YsonUint64(YsonIntegerBase):
+ pass
+
+
+class YsonDouble(float, YsonType):
+ def __eq__(self, other):
+ if not isinstance(other, float):
+ return NotImplemented
+ return float(self) == float(other) and YsonType.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ return self.base_hash(float)
+
+ def __repr__(self):
+ return self.to_str(float, repr)
+
+ def __str__(self):
+ return self.to_str(float, str)
+
+
+class YsonBoolean(int, YsonType):
+ def __eq__(self, other):
+ if not isinstance(other, int):
+ return NotImplemented
+ return (int(self) == 0) == (int(other) == 0) and YsonType.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ return self.base_hash(bool)
+
+ # NB: do not change this representation, because
+ # this type required to be JSON serializable.
+ # JSON encoder thinks that it is integer and calls str.
+ def __repr__(self):
+ return "true" if self else "false"
+
+ def __str__(self):
+ return self.__repr__()
+
+
+class YsonList(list, YsonType):
+ def __eq__(self, other):
+ if not isinstance(other, list):
+ return NotImplemented
+ return list(self) == list(other) and YsonType.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ raise TypeError('unhashable type "YsonList"')
+
+ def __repr__(self):
+ return self.to_str(list, repr)
+
+ def __str__(self):
+ return self.to_str(list, str)
+
+
+class YsonMap(dict, YsonType):
+ def __eq__(self, other):
+ if not isinstance(other, dict):
+ return NotImplemented
+ return dict(self) == dict(other) and YsonType.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ raise TypeError('unhashable type "YsonMap"')
+
+ def __repr__(self):
+ return self.to_str(dict, repr)
+
+ def __str__(self):
+ return self.to_str(dict, str)
+
+
+class YsonEntity(YsonType):
+ def __init__(self, value=None):
+ if value is not None:
+ assert isinstance(value, YsonEntity)
+ self.attributes = value.attributes
+
+ def __eq__(self, other):
+ if other is None and not self.attributes:
+ return True
+ if not isinstance(other, YsonEntity):
+ return NotImplemented
+ return YsonType.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __bool__(self):
+ return False
+
+ def __repr__(self):
+ if self.attributes:
+ return repr({"value": "YsonEntity", "attributes": self.attributes})
+ else:
+ return "YsonEntity"
+
+ def __str__(self):
+ return self.__repr__()
+
+ __nonzero__ = __bool__