diff options
author | prettyboy <prettyboy@yandex-team.com> | 2025-01-18 00:52:04 +0300 |
---|---|---|
committer | prettyboy <prettyboy@yandex-team.com> | 2025-01-18 01:13:15 +0300 |
commit | 29c54233d8257efabcec7fd41d359d826a1bf610 (patch) | |
tree | d942e12c2f14efb7e8537322be6ceea61f685b0b /library/python | |
parent | fad0cdd2a82a23ebe041e1643bd2175285f76b37 (diff) | |
download | ydb-29c54233d8257efabcec7fd41d359d826a1bf610.tar.gz |
[library/python/runtime_py3] Added bytecode support for external-py-files mode
Добавляется поддержка стандартной работы с байткодом для бинарей собранных с --ext-py (HIDDEN_URL
На примере импорт теста для `devtools/ya/bin`
`time Y_PYTHON_ENTRY_POINT=library.python.testing.import_test.import_test:main YA_TEST_CONTEXT_FILE=/tmp/test.context ./ya-bin >/dev/null`
#|
||
Без патча ya m -r
|
real 0m2,934s
user 0m2,772s
sys 0m0,157s
||
||
Без патча
ya m -r --ext-py
|
real 0m10,916s
user 0m9,927s
sys 0m0,368s
||
||
C патчем
ya m -r
|
real 0m2,931s
user 0m2,774s
sys 0m0,156s
||
||
C патчем
ya m -r --ext-py
первый запуск с записью байткода
|
real 0m7,482s
user 0m5,174s
sys 0m0,620s
||
||
C патчем
ya m -r --ext-py
последующие запуски с использованием байткода с фс
|
real 0m3,550s
user 0m2,801s
sys 0m0,262s
||
|#
commit_hash:84f7bb273b09d51b88eb5c5b6dfcd3bb7c108307
Diffstat (limited to 'library/python')
-rw-r--r-- | library/python/runtime_py3/__res.pyx | 8 | ||||
-rw-r--r-- | library/python/runtime_py3/importer.pxi | 73 |
2 files changed, 72 insertions, 9 deletions
diff --git a/library/python/runtime_py3/__res.pyx b/library/python/runtime_py3/__res.pyx index 97190d9f29..2c1d0c3ab4 100644 --- a/library/python/runtime_py3/__res.pyx +++ b/library/python/runtime_py3/__res.pyx @@ -6,6 +6,7 @@ from util.generic.string cimport TString, TStringBuf cdef extern from "library/cpp/resource/resource.h" namespace "NResource": + cdef bool Has(const TStringBuf key) except + cdef size_t Count() except + cdef TStringBuf KeyByIndex(size_t idx) except + cdef bool FindExact(const TStringBuf key, TString* result) nogil except + @@ -33,4 +34,11 @@ def find(s): return None +def has(s): + if isinstance(s, str): + s = utf_8_encode(s)[0] + + return Has(s) + + include "importer.pxi" diff --git a/library/python/runtime_py3/importer.pxi b/library/python/runtime_py3/importer.pxi index 228a00baee..f4f3252306 100644 --- a/library/python/runtime_py3/importer.pxi +++ b/library/python/runtime_py3/importer.pxi @@ -2,7 +2,18 @@ import marshal import sys from _codecs import utf_8_decode, utf_8_encode from _frozen_importlib import _call_with_frames_removed, spec_from_loader, BuiltinImporter -from _frozen_importlib_external import _os, _path_isfile, _path_isabs, path_sep, _path_join, _path_split +from _frozen_importlib_external import ( + _os, + _path_isfile, + _path_isabs, + path_sep, + _path_join, + _path_split, + _path_stat, + SourceFileLoader, + cache_from_source, +) + from _io import FileIO import __res as __resource @@ -121,6 +132,17 @@ def _get_source_root(): Y_PYTHON_SOURCE_ROOT = _get_source_root() +if EXTERNAL_PY_FILES_MODE: + import _frozen_importlib_external + + # Turn relative paths into absolute ones so that the python machinery stores the bytecode files next to the module. + # For more info see data flow in SourceLoader.get_data in contrib/tools/python3/Lib/importlib/_bootstrap_external.py + def patched_cache_from_source(filename): + filename = resfs_resolve(filename, check_existence=False) + return cache_from_source(_s(filename)) + + setattr(_frozen_importlib_external, 'cache_from_source', patched_cache_from_source) + def _print(*xs): """ @@ -165,7 +187,7 @@ def iter_prefixes(s): i = s.find('.', i + 1) -def resfs_resolve(path): +def resfs_resolve(path, check_existence=True): """ Return the absolute path of a root-relative path if it exists. """ @@ -173,6 +195,8 @@ def resfs_resolve(path): if Y_PYTHON_SOURCE_ROOT: if not path.startswith(Y_PYTHON_SOURCE_ROOT): path = _b(path_sep).join((Y_PYTHON_SOURCE_ROOT, path)) + if not check_existence: + return path if _path_isfile(path): return path @@ -186,6 +210,13 @@ def resfs_src(key, resfs_file=False): return __resource.find(b'resfs/src/' + _b(key)) +def resfs_has(path): + """ + Return true if the requested file is embedded in the program + """ + return __resource.has(b'resfs/file/' + _b(path)) + + def resfs_read(path, builtin=None): """ Return the bytes of the resource file at path, or None. @@ -217,12 +248,13 @@ def mod_path(mod): return py_prefix + _b(mod).replace(b'.', b'/') + b'.py' -class ResourceImporter: +class ResourceImporter(SourceFileLoader): """ A meta_path importer that loads code from built-in resources. """ - def __init__(self): + def __init__(self, fullname, path): + super().__init__(fullname, path) self.memory = set(iter_py_modules()) # Set of importable module names. self.source_map = {} # Map from file names to module names. self._source_name = {} # Map from original to altered module names. @@ -329,6 +361,7 @@ class ResourceImporter: def get_data(self, path): path = _b(path) abspath = resfs_resolve(path) + if abspath: return file_bytes(abspath) path = path.replace(_b('\\'), _b('/')) @@ -373,21 +406,43 @@ class ResourceImporter: if relpath: abspath = resfs_resolve(relpath) if abspath: - data = file_bytes(abspath) - return compile(data, _s(abspath), 'exec', dont_inherit=True) + if EXTERNAL_PY_FILES_MODE: + if not resfs_has(path): + # 1. This is the case when the requested module is registered in the metadata, + # but the content itself is not embedded in the binary. + # And the application itself is compiled in the external py files mode. + # Thus, we have an abspath to the python file and we need to get its bytecode. + # We transfer control to the standard python machinery, + # which will process the bytecode generation and cache it nearby, + # according to the general rules. + return super().get_code(modname) + else: + # 2. This is the case when the binary is launched in the mode of + # reading python sources from the file system (Y_PYTHON_SOURCE_ROOT), + # and not from the built-in storage. + data = file_bytes(abspath) + return compile(data, _s(abspath), 'exec', dont_inherit=True) yapyc_path = path + b'.yapyc3' yapyc_data = resfs_read(yapyc_path, builtin=True) if yapyc_data: + # 3. This is the basic case - we read the compiled bytecode from the built-in storage. return marshal.loads(yapyc_data) else: py_data = resfs_read(path, builtin=True) if py_data: + # 4. This is the case when the bytecode for the module is not embedded in the binary (PYBUILD_NO_PYC). + # Read the python file and compile on the fly. return compile(py_data, _s(relpath), 'exec', dont_inherit=True) else: - # This covers packages with no __init__.py in resources. + # 5. This covers packages with no __init__.py in resources. return compile('', modname, 'exec', dont_inherit=True) + def path_stats(self, path): + path = resfs_resolve(path, check_existence=False) + st = _path_stat(path) + return {'mtime': st.st_mtime, 'size': st.st_size} + def is_package(self, fullname): if fullname in self.memory: return False @@ -413,7 +468,7 @@ class ResourceImporter: if filename in self.source_map: return self.source_map[filename] - if resfs_read(filename, builtin=True) is not None: + if resfs_has(filename): return b'resfs/file/' + _b(filename) return b'' @@ -652,7 +707,7 @@ def excepthook(*args, **kws): return traceback.print_exception(*args, **kws) -importer = ResourceImporter() +importer = ResourceImporter(fullname='<resfs>', path='<resfs>') def executable_path_hook(path): |