diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /library/python/runtime_py3/importer.pxi | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'library/python/runtime_py3/importer.pxi')
-rw-r--r-- | library/python/runtime_py3/importer.pxi | 609 |
1 files changed, 609 insertions, 0 deletions
diff --git a/library/python/runtime_py3/importer.pxi b/library/python/runtime_py3/importer.pxi new file mode 100644 index 0000000000..6b3c5889a5 --- /dev/null +++ b/library/python/runtime_py3/importer.pxi @@ -0,0 +1,609 @@ +import marshal +import sys +from _codecs import utf_8_decode, utf_8_encode +from _frozen_importlib import _call_with_frames_removed, spec_from_loader, BuiltinImporter +from _frozen_importlib_external import _os, _path_isfile, _path_isabs, path_sep, _path_join, _path_split +from _io import FileIO + +import __res as __resource + +_b = lambda x: x if isinstance(x, bytes) else utf_8_encode(x)[0] +_s = lambda x: x if isinstance(x, str) else utf_8_decode(x)[0] +env_entry_point = b'Y_PYTHON_ENTRY_POINT' +env_source_root = b'Y_PYTHON_SOURCE_ROOT' +cfg_source_root = b'arcadia-source-root' +env_extended_source_search = b'Y_PYTHON_EXTENDED_SOURCE_SEARCH' +res_ya_ide_venv = b'YA_IDE_VENV' +executable = sys.executable or 'Y_PYTHON' +sys.modules['run_import_hook'] = __resource + +# This is the prefix in contrib/tools/python3/src/Lib/ya.make. +py_prefix = b'py/' +py_prefix_len = len(py_prefix) + +YA_IDE_VENV = __resource.find(res_ya_ide_venv) +Y_PYTHON_EXTENDED_SOURCE_SEARCH = _os.environ.get(env_extended_source_search) or YA_IDE_VENV + + +def _init_venv(): + if not _path_isabs(executable): + raise RuntimeError('path in sys.executable is not absolute: {}'.format(executable)) + + # Creative copy-paste from site.py + exe_dir, _ = _path_split(executable) + site_prefix, _ = _path_split(exe_dir) + libpath = _path_join(site_prefix, 'lib', + 'python%d.%d' % sys.version_info[:2], + 'site-packages') + sys.path.insert(0, libpath) + + # emulate site.venv() + sys.prefix = site_prefix + sys.exec_prefix = site_prefix + + conf_basename = 'pyvenv.cfg' + candidate_confs = [ + conffile for conffile in ( + _path_join(exe_dir, conf_basename), + _path_join(site_prefix, conf_basename) + ) + if _path_isfile(conffile) + ] + if not candidate_confs: + raise RuntimeError('{} not found'.format(conf_basename)) + virtual_conf = candidate_confs[0] + with FileIO(virtual_conf, 'r') as f: + for line in f: + if b'=' in line: + key, _, value = line.partition(b'=') + key = key.strip().lower() + value = value.strip() + if key == cfg_source_root: + return value + raise RuntimeError('{} key not found in {}'.format(cfg_source_root, virtual_conf)) + + +def _get_source_root(): + env_value = _os.environ.get(env_source_root) + if env_value or not YA_IDE_VENV: + return env_value + + return _init_venv() + + +Y_PYTHON_SOURCE_ROOT = _get_source_root() + + +def _print(*xs): + """ + This is helpful for debugging, since automatic bytes to str conversion is + not available yet. It is also possible to debug with GDB by breaking on + __Pyx_AddTraceback (with Python GDB pretty printers enabled). + """ + parts = [] + for s in xs: + if not isinstance(s, (bytes, str)): + s = str(s) + parts.append(_s(s)) + sys.stderr.write(' '.join(parts) + '\n') + + +def file_bytes(path): + # 'open' is not avaiable yet. + with FileIO(path, 'r') as f: + return f.read() + + +def iter_keys(prefix): + l = len(prefix) + for idx in range(__resource.count()): + key = __resource.key_by_index(idx) + if key.startswith(prefix): + yield key, key[l:] + + +def iter_py_modules(with_keys=False): + for key, path in iter_keys(b'resfs/file/' + py_prefix): + if path.endswith(b'.py'): # It may also end with '.pyc'. + mod = _s(path[:-3].replace(b'/', b'.')) + if with_keys: + yield key, mod + else: + yield mod + + +def py_src_key(filename): + return py_prefix + _b(filename) + + +def iter_prefixes(s): + i = s.find('.') + while i >= 0: + yield s[:i] + i = s.find('.', i + 1) + + +def resfs_resolve(path): + """ + Return the absolute path of a root-relative path if it exists. + """ + path = _b(path) + if Y_PYTHON_SOURCE_ROOT: + if not path.startswith(Y_PYTHON_SOURCE_ROOT): + path = _b(path_sep).join((Y_PYTHON_SOURCE_ROOT, path)) + if _path_isfile(path): + return path + + +def resfs_src(key, resfs_file=False): + """ + Return the root-relative file path of a resource key. + """ + if resfs_file: + key = b'resfs/file/' + _b(key) + return __resource.find(b'resfs/src/' + _b(key)) + + +def resfs_read(path, builtin=None): + """ + Return the bytes of the resource file at path, or None. + If builtin is True, do not look for it on the filesystem. + If builtin is False, do not look in the builtin resources. + """ + if builtin is not True: + arcpath = resfs_src(path, resfs_file=True) + if arcpath: + fspath = resfs_resolve(arcpath) + if fspath: + return file_bytes(fspath) + + if builtin is not False: + return __resource.find(b'resfs/file/' + _b(path)) + + +def resfs_files(prefix=b''): + """ + List builtin resource file paths. + """ + return [key[11:] for key, _ in iter_keys(b'resfs/file/' + _b(prefix))] + + +def mod_path(mod): + """ + Return the resfs path to the source code of the module with the given name. + """ + return py_prefix + _b(mod).replace(b'.', b'/') + b'.py' + + +class ResourceImporter(object): + + """ A meta_path importer that loads code from built-in resources. + """ + + def __init__(self): + self.memory = set(iter_py_modules()) # Set of importable module names. + self.source_map = {} # Map from file names to module names. + self._source_name = {} # Map from original to altered module names. + self._package_prefix = '' + if Y_PYTHON_SOURCE_ROOT and Y_PYTHON_EXTENDED_SOURCE_SEARCH: + self.arcadia_source_finder = ArcadiaSourceFinder(_s(Y_PYTHON_SOURCE_ROOT)) + else: + self.arcadia_source_finder = None + + for p in list(self.memory) + list(sys.builtin_module_names): + for pp in iter_prefixes(p): + k = pp + '.__init__' + if k not in self.memory: + self.memory.add(k) + + def for_package(self, name): + import copy + importer = copy.copy(self) + importer._package_prefix = name + '.' + return importer + + def _find_mod_path(self, fullname): + """Find arcadia relative path by module name""" + relpath = resfs_src(mod_path(fullname), resfs_file=True) + if relpath or not self.arcadia_source_finder: + return relpath + return self.arcadia_source_finder.get_module_path(fullname) + + def find_spec(self, fullname, path=None, target=None): + try: + is_package = self.is_package(fullname) + except ImportError: + return None + return spec_from_loader(fullname, self, is_package=is_package) + + def find_module(self, fullname, path=None): + """For backward compatibility.""" + spec = self.find_spec(fullname, path) + return spec.loader if spec is not None else None + + def create_module(self, spec): + """Use default semantics for module creation.""" + + def exec_module(self, module): + code = self.get_code(module.__name__) + module.__file__ = code.co_filename + if self.is_package(module.__name__): + module.__path__= [executable + path_sep + module.__name__.replace('.', path_sep)] + # exec(code, module.__dict__) + _call_with_frames_removed(exec, code, module.__dict__) + + # PEP-302 extension 1 of 3: data loader. + def get_data(self, path): + path = _b(path) + abspath = resfs_resolve(path) + if abspath: + return file_bytes(abspath) + path = path.replace(_b('\\'), _b('/')) + data = resfs_read(path, builtin=True) + if data is None: + raise IOError(path) # Y_PYTHON_ENTRY_POINT=:resource_files + return data + + # PEP-302 extension 2 of 3: get __file__ without importing. + def get_filename(self, fullname): + modname = fullname + if self.is_package(fullname): + fullname += '.__init__' + relpath = self._find_mod_path(fullname) + if isinstance(relpath, bytes): + relpath = _s(relpath) + return relpath or modname + + # PEP-302 extension 3 of 3: packaging introspection. + # Used by `linecache` (while printing tracebacks) unless module filename + # exists on the filesystem. + def get_source(self, fullname): + fullname = self._source_name.get(fullname) or fullname + if self.is_package(fullname): + fullname += '.__init__' + + relpath = self.get_filename(fullname) + if relpath: + abspath = resfs_resolve(relpath) + if abspath: + return _s(file_bytes(abspath)) + data = resfs_read(mod_path(fullname)) + return _s(data) if data else '' + + def get_code(self, fullname): + modname = fullname + if self.is_package(fullname): + fullname += '.__init__' + + path = mod_path(fullname) + relpath = self._find_mod_path(fullname) + if relpath: + abspath = resfs_resolve(relpath) + if abspath: + data = file_bytes(abspath) + return compile(data, _s(abspath), 'exec', dont_inherit=True) + + yapyc_path = path + b'.yapyc3' + yapyc_data = resfs_read(yapyc_path, builtin=True) + if yapyc_data: + return marshal.loads(yapyc_data) + else: + py_data = resfs_read(path, builtin=True) + if py_data: + return compile(py_data, _s(relpath), 'exec', dont_inherit=True) + else: + # This covers packages with no __init__.py in resources. + return compile('', modname, 'exec', dont_inherit=True) + + def is_package(self, fullname): + if fullname in self.memory: + return False + + if fullname + '.__init__' in self.memory: + return True + + if self.arcadia_source_finder: + return self.arcadia_source_finder.is_package(fullname) + + raise ImportError(fullname) + + # Extension for contrib/python/coverage. + def file_source(self, filename): + """ + Return the key of the module source by its resource path. + """ + if not self.source_map: + for key, mod in iter_py_modules(with_keys=True): + path = self.get_filename(mod) + self.source_map[path] = key + + if filename in self.source_map: + return self.source_map[filename] + + if resfs_read(filename, builtin=True) is not None: + return b'resfs/file/' + _b(filename) + + return b'' + + # Extension for pkgutil.iter_modules. + def iter_modules(self, prefix=''): + import re + rx = re.compile(re.escape(self._package_prefix) + r'([^.]+)(\.__init__)?$') + for p in self.memory: + m = rx.match(p) + if m: + yield prefix + m.group(1), m.group(2) is not None + if self.arcadia_source_finder: + for m in self.arcadia_source_finder.iter_modules(self._package_prefix, prefix): + yield m + + def get_resource_reader(self, fullname): + try: + if not self.is_package(fullname): + return None + except ImportError: + return None + return _ResfsResourceReader(self, fullname) + + +class _ResfsResourceReader: + + def __init__(self, importer, fullname): + self.importer = importer + self.fullname = fullname + + import os + self.prefix = "{}/".format(os.path.dirname(self.importer.get_filename(self.fullname))) + + def open_resource(self, resource): + path = f'{self.prefix}{resource}' + from io import BytesIO + try: + return BytesIO(self.importer.get_data(path)) + except OSError: + raise FileNotFoundError(path) + + def resource_path(self, resource): + # All resources are in the binary file, so there is no path to the file. + # Raising FileNotFoundError tells the higher level API to extract the + # binary data and create a temporary file. + raise FileNotFoundError + + def is_resource(self, name): + path = f'{self.prefix}{name}' + try: + self.importer.get_data(path) + except OSError: + return False + return True + + def contents(self): + subdirs_seen = set() + for key in resfs_files(self.prefix): + relative = key[len(self.prefix):] + res_or_subdir, *other = relative.split(b'/') + if not other: + yield _s(res_or_subdir) + elif res_or_subdir not in subdirs_seen: + subdirs_seen.add(res_or_subdir) + yield _s(res_or_subdir) + + +class BuiltinSubmoduleImporter(BuiltinImporter): + @classmethod + def find_spec(cls, fullname, path=None, target=None): + if path is not None: + return super().find_spec(fullname, None, target) + else: + return None + + +class ArcadiaSourceFinder: + """ + Search modules and packages in arcadia source tree. + See https://wiki.yandex-team.ru/devtools/extended-python-source-search/ for details + """ + NAMESPACE_PREFIX = b'py/namespace/' + PY_EXT = '.py' + YA_MAKE = 'ya.make' + S_IFDIR = 0o040000 + + def __init__(self, source_root): + self.source_root = source_root + self.module_path_cache = {'': set()} + for key, dirty_path in iter_keys(self.NAMESPACE_PREFIX): + # dirty_path contains unique prefix to prevent repeatable keys in the resource storage + path = dirty_path.split(b'/', 1)[1] + namespaces = __resource.find(key).split(b':') + for n in namespaces: + package_name = _s(n.rstrip(b'.')) + self.module_path_cache.setdefault(package_name, set()).add(_s(path)) + # Fill parents with default empty path set if parent doesn't exist in the cache yet + while package_name: + package_name = package_name.rpartition('.')[0] + if package_name in self.module_path_cache: + break + self.module_path_cache.setdefault(package_name, set()) + for package_name in self.module_path_cache.keys(): + self._add_parent_dirs(package_name, visited=set()) + + def get_module_path(self, fullname): + """ + Find file path for module 'fullname'. + For packages caller pass fullname as 'package.__init__'. + Return None if nothing is found. + """ + try: + if not self.is_package(fullname): + return _b(self._cache_module_path(fullname)) + except ImportError: + pass + + def is_package(self, fullname): + """Check if fullname is a package. Raise ImportError if fullname is not found""" + path = self._cache_module_path(fullname) + if isinstance(path, set): + return True + if isinstance(path, str): + return False + raise ImportError(fullname) + + def iter_modules(self, package_prefix, prefix): + paths = self._cache_module_path(package_prefix.rstrip('.')) + if paths is not None: + # Note: it's ok to yield duplicates because pkgutil discards them + + # Yield from cache + import re + rx = re.compile(re.escape(package_prefix) + r'([^.]+)$') + # Save result to temporary list to prevent 'RuntimeError: dictionary changed size during iteration' + found = [] + for mod, path in self.module_path_cache.items(): + if path is not None: + m = rx.match(mod) + if m: + found.append((prefix + m.group(1), self.is_package(mod))) + for cm in found: + yield cm + + # Yield from file system + for path in paths: + abs_path = _path_join(self.source_root, path) + for dir_item in _os.listdir(abs_path): + if self._path_is_simple_dir(_path_join(abs_path, dir_item)): + yield prefix + dir_item, True + elif dir_item.endswith(self.PY_EXT) and _path_isfile(_path_join(abs_path, dir_item)): + yield prefix + dir_item[:-len(self.PY_EXT)], False + + def _isdir(self, path): + """ Unlike _path_isdir() this function don't follow symlink """ + try: + stat_info = _os.lstat(path) + except OSError: + return False + return (stat_info.st_mode & 0o170000) == self.S_IFDIR + + def _path_is_simple_dir(self, abs_path): + """ + Check if path is a directory but doesn't contain ya.make file. + We don't want to steal directory from nested project and treat it as a package + """ + return self._isdir(abs_path) and not _path_isfile(_path_join(abs_path, self.YA_MAKE)) + + def _find_module_in_paths(self, find_package_only, paths, module): + """Auxiliary method. See _cache_module_path() for details""" + if paths: + package_paths = set() + for path in paths: + rel_path = _path_join(path, module) + if not find_package_only: + # Check if file_path is a module + module_path = rel_path + self.PY_EXT + if _path_isfile(_path_join(self.source_root, module_path)): + return module_path + # Check if file_path is a package + if self._path_is_simple_dir(_path_join(self.source_root, rel_path)): + package_paths.add(rel_path) + if package_paths: + return package_paths + + def _cache_module_path(self, fullname, find_package_only=False): + """ + Find module path or package directory paths and save result in the cache + + find_package_only=True - don't try to find module + + Returns: + List of relative package paths - for a package + Relative module path - for a module + None - module or package is not found + """ + if fullname not in self.module_path_cache: + parent, _, tail = fullname.rpartition('.') + parent_paths = self._cache_module_path(parent, find_package_only=True) + self.module_path_cache[fullname] = self._find_module_in_paths(find_package_only, parent_paths, tail) + return self.module_path_cache[fullname] + + def _add_parent_dirs(self, package_name, visited): + if not package_name or package_name in visited: + return + visited.add(package_name) + + parent, _, tail = package_name.rpartition('.') + self._add_parent_dirs(parent, visited) + + paths = self.module_path_cache[package_name] + for parent_path in self.module_path_cache[parent]: + rel_path = _path_join(parent_path, tail) + if self._path_is_simple_dir(_path_join(self.source_root, rel_path)): + paths.add(rel_path) + + +def excepthook(*args, **kws): + # traceback module cannot be imported at module level, because interpreter + # is not fully initialized yet + + import traceback + + return traceback.print_exception(*args, **kws) + + +importer = ResourceImporter() + + +def executable_path_hook(path): + if path == executable: + return importer + + if path.startswith(executable + path_sep): + return importer.for_package(path[len(executable + path_sep):].replace(path_sep, '.')) + + raise ImportError(path) + + +def get_path0(): + """ + An incomplete and simplified version of _PyPathConfig_ComputeSysPath0. + We need this to somewhat properly emulate the behaviour of a normal python interpreter + when using ya ide venv. + + """ + if not sys.argv: + return + argv0 = sys.argv[0] + + have_module_arg = argv0 == '-m' + + if have_module_arg: + return _os.getcwd() + + +if YA_IDE_VENV: + sys.meta_path.append(importer) + sys.meta_path.append(BuiltinSubmoduleImporter) + if executable not in sys.path: + sys.path.append(executable) + path0 = get_path0() + if path0 is not None: + sys.path.insert(0, path0) + + sys.path_hooks.append(executable_path_hook) +else: + sys.meta_path.insert(0, BuiltinSubmoduleImporter) + sys.meta_path.insert(0, importer) + if executable not in sys.path: + sys.path.insert(0, executable) + sys.path_hooks.insert(0, executable_path_hook) + +sys.path_importer_cache[executable] = importer + +# Indicator that modules and resources are built-in rather than on the file system. +sys.is_standalone_binary = True +sys.frozen = True + +# Set of names of importable modules. +sys.extra_modules = importer.memory + +# Use custom implementation of traceback printer. +# Built-in printer (PyTraceBack_Print) does not support custom module loaders +sys.excepthook = excepthook |