aboutsummaryrefslogtreecommitdiffstats
path: root/library/python
diff options
context:
space:
mode:
authorprettyboy <prettyboy@yandex-team.com>2025-01-18 00:52:04 +0300
committerprettyboy <prettyboy@yandex-team.com>2025-01-18 01:13:15 +0300
commit29c54233d8257efabcec7fd41d359d826a1bf610 (patch)
treed942e12c2f14efb7e8537322be6ceea61f685b0b /library/python
parentfad0cdd2a82a23ebe041e1643bd2175285f76b37 (diff)
downloadydb-29c54233d8257efabcec7fd41d359d826a1bf610.tar.gz
[library/python/runtime_py3] Added bytecode support for external-py-files mode
Добавляется поддержка стандартной работы с байткодом для бинарей собранных с --ext-py (HIDDEN_URL На примере импорт теста для `devtools/ya/bin` `time Y_PYTHON_ENTRY_POINT=library.python.testing.import_test.import_test:main YA_TEST_CONTEXT_FILE=/tmp/test.context ./ya-bin >/dev/null` #| || Без патча ya m -r | real 0m2,934s user 0m2,772s sys 0m0,157s || || Без патча ya m -r --ext-py | real 0m10,916s user 0m9,927s sys 0m0,368s || || C патчем ya m -r | real 0m2,931s user 0m2,774s sys 0m0,156s || || C патчем ya m -r --ext-py первый запуск с записью байткода | real 0m7,482s user 0m5,174s sys 0m0,620s || || C патчем ya m -r --ext-py последующие запуски с использованием байткода с фс | real 0m3,550s user 0m2,801s sys 0m0,262s || |# commit_hash:84f7bb273b09d51b88eb5c5b6dfcd3bb7c108307
Diffstat (limited to 'library/python')
-rw-r--r--library/python/runtime_py3/__res.pyx8
-rw-r--r--library/python/runtime_py3/importer.pxi73
2 files changed, 72 insertions, 9 deletions
diff --git a/library/python/runtime_py3/__res.pyx b/library/python/runtime_py3/__res.pyx
index 97190d9f29..2c1d0c3ab4 100644
--- a/library/python/runtime_py3/__res.pyx
+++ b/library/python/runtime_py3/__res.pyx
@@ -6,6 +6,7 @@ from util.generic.string cimport TString, TStringBuf
cdef extern from "library/cpp/resource/resource.h" namespace "NResource":
+ cdef bool Has(const TStringBuf key) except +
cdef size_t Count() except +
cdef TStringBuf KeyByIndex(size_t idx) except +
cdef bool FindExact(const TStringBuf key, TString* result) nogil except +
@@ -33,4 +34,11 @@ def find(s):
return None
+def has(s):
+ if isinstance(s, str):
+ s = utf_8_encode(s)[0]
+
+ return Has(s)
+
+
include "importer.pxi"
diff --git a/library/python/runtime_py3/importer.pxi b/library/python/runtime_py3/importer.pxi
index 228a00baee..f4f3252306 100644
--- a/library/python/runtime_py3/importer.pxi
+++ b/library/python/runtime_py3/importer.pxi
@@ -2,7 +2,18 @@ import marshal
import sys
from _codecs import utf_8_decode, utf_8_encode
from _frozen_importlib import _call_with_frames_removed, spec_from_loader, BuiltinImporter
-from _frozen_importlib_external import _os, _path_isfile, _path_isabs, path_sep, _path_join, _path_split
+from _frozen_importlib_external import (
+ _os,
+ _path_isfile,
+ _path_isabs,
+ path_sep,
+ _path_join,
+ _path_split,
+ _path_stat,
+ SourceFileLoader,
+ cache_from_source,
+)
+
from _io import FileIO
import __res as __resource
@@ -121,6 +132,17 @@ def _get_source_root():
Y_PYTHON_SOURCE_ROOT = _get_source_root()
+if EXTERNAL_PY_FILES_MODE:
+ import _frozen_importlib_external
+
+ # Turn relative paths into absolute ones so that the python machinery stores the bytecode files next to the module.
+ # For more info see data flow in SourceLoader.get_data in contrib/tools/python3/Lib/importlib/_bootstrap_external.py
+ def patched_cache_from_source(filename):
+ filename = resfs_resolve(filename, check_existence=False)
+ return cache_from_source(_s(filename))
+
+ setattr(_frozen_importlib_external, 'cache_from_source', patched_cache_from_source)
+
def _print(*xs):
"""
@@ -165,7 +187,7 @@ def iter_prefixes(s):
i = s.find('.', i + 1)
-def resfs_resolve(path):
+def resfs_resolve(path, check_existence=True):
"""
Return the absolute path of a root-relative path if it exists.
"""
@@ -173,6 +195,8 @@ def resfs_resolve(path):
if Y_PYTHON_SOURCE_ROOT:
if not path.startswith(Y_PYTHON_SOURCE_ROOT):
path = _b(path_sep).join((Y_PYTHON_SOURCE_ROOT, path))
+ if not check_existence:
+ return path
if _path_isfile(path):
return path
@@ -186,6 +210,13 @@ def resfs_src(key, resfs_file=False):
return __resource.find(b'resfs/src/' + _b(key))
+def resfs_has(path):
+ """
+ Return true if the requested file is embedded in the program
+ """
+ return __resource.has(b'resfs/file/' + _b(path))
+
+
def resfs_read(path, builtin=None):
"""
Return the bytes of the resource file at path, or None.
@@ -217,12 +248,13 @@ def mod_path(mod):
return py_prefix + _b(mod).replace(b'.', b'/') + b'.py'
-class ResourceImporter:
+class ResourceImporter(SourceFileLoader):
""" A meta_path importer that loads code from built-in resources.
"""
- def __init__(self):
+ def __init__(self, fullname, path):
+ super().__init__(fullname, path)
self.memory = set(iter_py_modules()) # Set of importable module names.
self.source_map = {} # Map from file names to module names.
self._source_name = {} # Map from original to altered module names.
@@ -329,6 +361,7 @@ class ResourceImporter:
def get_data(self, path):
path = _b(path)
abspath = resfs_resolve(path)
+
if abspath:
return file_bytes(abspath)
path = path.replace(_b('\\'), _b('/'))
@@ -373,21 +406,43 @@ class ResourceImporter:
if relpath:
abspath = resfs_resolve(relpath)
if abspath:
- data = file_bytes(abspath)
- return compile(data, _s(abspath), 'exec', dont_inherit=True)
+ if EXTERNAL_PY_FILES_MODE:
+ if not resfs_has(path):
+ # 1. This is the case when the requested module is registered in the metadata,
+ # but the content itself is not embedded in the binary.
+ # And the application itself is compiled in the external py files mode.
+ # Thus, we have an abspath to the python file and we need to get its bytecode.
+ # We transfer control to the standard python machinery,
+ # which will process the bytecode generation and cache it nearby,
+ # according to the general rules.
+ return super().get_code(modname)
+ else:
+ # 2. This is the case when the binary is launched in the mode of
+ # reading python sources from the file system (Y_PYTHON_SOURCE_ROOT),
+ # and not from the built-in storage.
+ data = file_bytes(abspath)
+ return compile(data, _s(abspath), 'exec', dont_inherit=True)
yapyc_path = path + b'.yapyc3'
yapyc_data = resfs_read(yapyc_path, builtin=True)
if yapyc_data:
+ # 3. This is the basic case - we read the compiled bytecode from the built-in storage.
return marshal.loads(yapyc_data)
else:
py_data = resfs_read(path, builtin=True)
if py_data:
+ # 4. This is the case when the bytecode for the module is not embedded in the binary (PYBUILD_NO_PYC).
+ # Read the python file and compile on the fly.
return compile(py_data, _s(relpath), 'exec', dont_inherit=True)
else:
- # This covers packages with no __init__.py in resources.
+ # 5. This covers packages with no __init__.py in resources.
return compile('', modname, 'exec', dont_inherit=True)
+ def path_stats(self, path):
+ path = resfs_resolve(path, check_existence=False)
+ st = _path_stat(path)
+ return {'mtime': st.st_mtime, 'size': st.st_size}
+
def is_package(self, fullname):
if fullname in self.memory:
return False
@@ -413,7 +468,7 @@ class ResourceImporter:
if filename in self.source_map:
return self.source_map[filename]
- if resfs_read(filename, builtin=True) is not None:
+ if resfs_has(filename):
return b'resfs/file/' + _b(filename)
return b''
@@ -652,7 +707,7 @@ def excepthook(*args, **kws):
return traceback.print_exception(*args, **kws)
-importer = ResourceImporter()
+importer = ResourceImporter(fullname='<resfs>', path='<resfs>')
def executable_path_hook(path):