summaryrefslogtreecommitdiffstats
path: root/library/python/archive
diff options
context:
space:
mode:
authorthegeorg <[email protected]>2023-10-03 11:19:48 +0300
committerthegeorg <[email protected]>2023-10-03 11:43:28 +0300
commitcda0c13f23f6b169fb0a49dc504b40a0aaecea09 (patch)
tree26476e92e5af2c856e017afb1df8f8dff42495bf /library/python/archive
parent4854116da9c5e3c95bb8440f2ea997c54b6e1a61 (diff)
Move contrib/tools/jdk to build/platform/java/jdk/testing
Diffstat (limited to 'library/python/archive')
-rw-r--r--library/python/archive/__init__.py266
-rw-r--r--library/python/archive/ya.make19
2 files changed, 285 insertions, 0 deletions
diff --git a/library/python/archive/__init__.py b/library/python/archive/__init__.py
new file mode 100644
index 00000000000..a6e032ff4c5
--- /dev/null
+++ b/library/python/archive/__init__.py
@@ -0,0 +1,266 @@
+import errno
+import logging
+import os
+import random
+import shutil
+import stat
+import string
+import sys
+
+import six
+
+import libarchive
+import libarchive._libarchive as _libarchive
+
+from pathlib2 import PurePath
+
+logger = logging.getLogger(__name__)
+
+GZIP = "gzip"
+ZSTD = "zstd"
+
+ENCODING = "utf-8"
+
+
+class ConfigureError(Exception):
+ pass
+
+
+class Level(object):
+ def __init__(self, level):
+ self.level = level
+
+
+class Compression(object):
+ Fast = Level(1)
+ Default = Level(2)
+ Best = Level(3)
+
+
+def get_compression_level(filter_name, level):
+ if level is None or not filter_name:
+ return None
+ elif isinstance(level, Level):
+ level = {
+ GZIP: {
+ Compression.Fast: 1,
+ Compression.Default: 6,
+ Compression.Best: 9,
+ },
+ ZSTD: {
+ Compression.Fast: 1,
+ Compression.Default: 3,
+ Compression.Best: 22,
+ },
+ }[filter_name][level]
+ return level
+
+
+def encode(value, encoding):
+ return value.encode(encoding)
+
+
+def extract_tar(tar_file_path, output_dir, strip_components=None, fail_on_duplicates=True):
+ output_dir = encode(output_dir, ENCODING)
+ _make_dirs(output_dir)
+ with libarchive.Archive(tar_file_path, mode="rb") as tarfile:
+ for e in tarfile:
+ p = _strip_prefix(e.pathname, strip_components)
+ if not p:
+ continue
+ dest = os.path.join(output_dir, encode(p, ENCODING))
+ if e.pathname.endswith("/"):
+ _make_dirs(dest)
+ continue
+
+ if strip_components and fail_on_duplicates:
+ if os.path.exists(dest):
+ raise Exception(
+ "The file {} is duplicated because of strip_components={}".format(dest, strip_components)
+ )
+
+ _make_dirs(os.path.dirname(dest))
+
+ if e.ishardlink():
+ src = os.path.join(output_dir, _strip_prefix(e.hardlink, strip_components))
+ _hardlink(src, dest)
+ continue
+ if e.issym():
+ src = _strip_prefix(e.linkname, strip_components)
+ _symlink(src, dest)
+ continue
+
+ with open(dest, "wb") as f:
+ if hasattr(os, "fchmod"):
+ os.fchmod(f.fileno(), e.mode & 0o7777)
+ libarchive.call_and_check(
+ _libarchive.archive_read_data_into_fd,
+ tarfile._a,
+ tarfile._a,
+ f.fileno(),
+ )
+
+
+def _strip_prefix(path, strip_components):
+ if not strip_components:
+ return path
+ p = PurePath(path)
+ stripped = str(p.relative_to(*p.parts[:strip_components]))
+ return '' if stripped == '.' else stripped
+
+
+def tar(
+ paths,
+ output,
+ compression_filter=None,
+ compression_level=None,
+ fixed_mtime=None,
+ onerror=None,
+ postprocess=None,
+ dereference=False,
+):
+ if isinstance(paths, six.string_types):
+ paths = [paths]
+
+ if isinstance(output, six.string_types):
+ temp_tar_path, stream = (
+ output + "." + "".join(random.sample(string.ascii_lowercase, 8)),
+ None,
+ )
+ else:
+ temp_tar_path, stream = None, output
+
+ compression_level = get_compression_level(compression_filter, compression_level)
+
+ try:
+ if compression_filter:
+ filter_name = compression_filter
+ if compression_level is not None:
+ filter_opts = {"compression-level": str(compression_level)}
+ else:
+ filter_opts = {}
+ # force gzip don't store mtime of the original file being compressed (http://www.gzip.org/zlib/rfc-gzip.html#file-format)
+ if fixed_mtime is not None and compression_filter == GZIP:
+ filter_opts["timestamp"] = ""
+ else:
+ filter_name = filter_opts = None
+
+ with libarchive.Archive(
+ stream or temp_tar_path,
+ mode="wb",
+ format="gnu",
+ filter=filter_name,
+ filter_opts=filter_opts,
+ fixed_mtime=fixed_mtime,
+ ) as tarfile:
+ # determine order if fixed_mtime is specified to produce stable archive
+ paths = paths if fixed_mtime is None else sorted(paths)
+
+ for p in paths:
+ if type(p) == tuple:
+ path, arcname = p
+ else:
+ path, arcname = p, os.path.basename(p)
+
+ if os.path.isdir(path):
+ for root, dirs, files in os.walk(path, followlinks=dereference):
+ if fixed_mtime is None:
+ entries = dirs + files
+ else:
+ entries = sorted(dirs) + sorted(files)
+
+ reldir = os.path.relpath(root, path)
+ for f in entries:
+ _writepath(
+ tarfile,
+ os.path.join(root, f),
+ os.path.normpath(os.path.join(arcname, reldir, f)),
+ onerror,
+ postprocess,
+ dereference,
+ )
+ else:
+ if not os.path.exists(path):
+ raise OSError("Specified path doesn't exist: {}".format(path))
+ _writepath(tarfile, path, arcname, onerror, postprocess, dereference)
+
+ if temp_tar_path:
+ os.rename(temp_tar_path, output)
+ except Exception:
+ if temp_tar_path and os.path.exists(temp_tar_path):
+ os.remove(temp_tar_path)
+ raise
+
+
+def _writepath(tarfile, src, dst, onerror, postprocess, dereference):
+ def tar_writepath(src, dst):
+ st = os.lstat(src)
+ if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode) or stat.S_ISLNK(st.st_mode):
+ if dereference and stat.S_ISLNK(st.st_mode):
+ src = os.path.realpath(src)
+
+ tarfile.writepath(src, dst)
+
+ if postprocess:
+ postprocess(src, dst, st.st_mode)
+ else:
+ logger.debug("Skipping non-regular file '%s' (stat: %s)", src, st)
+
+ try:
+ return tar_writepath(src, dst)
+ except Exception as e:
+ if isinstance(e, OSError) and e.errno == errno.ENOENT:
+ logger.debug(
+ "Skipping missing file '%s' - looks like directory content has changed during archiving",
+ src,
+ )
+ return
+
+ if onerror:
+ if onerror(src, dst, sys.exc_info()):
+ return tar_writepath(src, dst)
+ else:
+ raise
+
+
+def check_tar(tar_file_path):
+ if os.path.isfile(tar_file_path) or os.path.islink(tar_file_path):
+ return libarchive.is_archive(tar_file_path)
+ return False
+
+
+def _make_dirs(path):
+ try:
+ os.makedirs(path)
+ except OSError as e:
+ if e.errno != errno.EEXIST or not os.path.isdir(path):
+ raise
+
+
+def _hardlink(src, dst):
+ if hasattr(os, "link"):
+ os.link(src, dst)
+ else:
+ shutil.copyfile(src, dst)
+
+
+def _symlink(src, dst):
+ if hasattr(os, "symlink"):
+ os.symlink(src, dst)
+ else:
+ # Windows specific case - we cannot copy file right now,
+ # because it doesn't exist yet (and would be met later in the archive) or symlink is broken.
+ # Act like tar and tarfile - skip such symlinks
+ if os.path.exists(src):
+ shutil.copytree(src, dst)
+
+
+def get_archive_filter_name(filename):
+ filters = libarchive.get_archive_filter_names(filename)
+ # https://a.yandex-team.ru/arc/trunk/arcadia/contrib/libs/libarchive/libarchive/archive_read.c?rev=5800047#L522
+ assert filters[-1] == "none", filters
+ if len(filters) == 1:
+ return None
+ if len(filters) == 2:
+ return filters[0]
+ raise Exception("Archive has chain of filter: {}".format(filters))
diff --git a/library/python/archive/ya.make b/library/python/archive/ya.make
new file mode 100644
index 00000000000..5b86a45a422
--- /dev/null
+++ b/library/python/archive/ya.make
@@ -0,0 +1,19 @@
+PY23_LIBRARY()
+
+STYLE_PYTHON()
+
+PY_SRCS(
+ __init__.py
+)
+
+PEERDIR(
+ contrib/python/pathlib2
+ contrib/python/python-libarchive
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ benchmark
+ test
+)