aboutsummaryrefslogtreecommitdiffstats
path: root/library/python
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@ydb.tech>2023-11-30 13:26:22 +0300
committervitalyisaev <vitalyisaev@ydb.tech>2023-11-30 15:44:45 +0300
commit0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch)
tree291d72dbd7e9865399f668c84d11ed86fb190bbf /library/python
parentcb2c8d75065e5b3c47094067cb4aa407d4813298 (diff)
downloadydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'library/python')
-rw-r--r--library/python/archive/__init__.py266
-rw-r--r--library/python/archive/ya.make19
-rw-r--r--library/python/cityhash/cityhash.pyx75
-rw-r--r--library/python/cityhash/hash.cpp32
-rw-r--r--library/python/cityhash/hash.h6
-rw-r--r--library/python/cityhash/ya.make16
-rw-r--r--library/python/codecs/__codecs.pyx61
-rw-r--r--library/python/codecs/__init__.py1
-rw-r--r--library/python/codecs/ya.make16
-rw-r--r--library/python/color/README.md9
-rw-r--r--library/python/color/__init__.py92
-rw-r--r--library/python/color/ya.make13
-rw-r--r--library/python/compress/__init__.py147
-rw-r--r--library/python/compress/ya.make16
-rw-r--r--library/python/coredump_filter/README.md19
-rw-r--r--library/python/coredump_filter/__init__.py1500
-rw-r--r--library/python/coredump_filter/core_proc.js21
-rw-r--r--library/python/coredump_filter/epilog.html2
-rw-r--r--library/python/coredump_filter/prolog.html24
-rw-r--r--library/python/coredump_filter/styles.css116
-rw-r--r--library/python/coredump_filter/ya.make23
-rw-r--r--library/python/json/__init__.py44
-rw-r--r--library/python/json/loads.cpp246
-rw-r--r--library/python/json/loads.h5
-rw-r--r--library/python/json/loads.pyx14
-rw-r--r--library/python/json/ya.make17
-rw-r--r--library/python/mlockall/__init__.py10
-rw-r--r--library/python/mlockall/mlockall.pyx19
-rw-r--r--library/python/mlockall/ya.make14
-rw-r--r--library/python/nstools/__init__.py6
-rw-r--r--library/python/nstools/nstools.pyx28
-rw-r--r--library/python/nstools/ya.make14
-rw-r--r--library/python/par_apply/__init__.py114
-rw-r--r--library/python/par_apply/ya.make11
-rw-r--r--library/python/symbols/libmagic/syms.cpp19
-rw-r--r--library/python/symbols/libmagic/ya.make12
-rw-r--r--library/python/testing/coverage_utils/__init__.py14
-rw-r--r--library/python/testing/coverage_utils/ya.make5
-rw-r--r--library/python/testing/system_info/__init__.py204
-rw-r--r--library/python/testing/system_info/ya.make15
40 files changed, 3285 insertions, 0 deletions
diff --git a/library/python/archive/__init__.py b/library/python/archive/__init__.py
new file mode 100644
index 0000000000..a6e032ff4c
--- /dev/null
+++ b/library/python/archive/__init__.py
@@ -0,0 +1,266 @@
+import errno
+import logging
+import os
+import random
+import shutil
+import stat
+import string
+import sys
+
+import six
+
+import libarchive
+import libarchive._libarchive as _libarchive
+
+from pathlib2 import PurePath
+
+logger = logging.getLogger(__name__)
+
+GZIP = "gzip"
+ZSTD = "zstd"
+
+ENCODING = "utf-8"
+
+
+class ConfigureError(Exception):
+ pass
+
+
+class Level(object):
+ def __init__(self, level):
+ self.level = level
+
+
+class Compression(object):
+ Fast = Level(1)
+ Default = Level(2)
+ Best = Level(3)
+
+
+def get_compression_level(filter_name, level):
+ if level is None or not filter_name:
+ return None
+ elif isinstance(level, Level):
+ level = {
+ GZIP: {
+ Compression.Fast: 1,
+ Compression.Default: 6,
+ Compression.Best: 9,
+ },
+ ZSTD: {
+ Compression.Fast: 1,
+ Compression.Default: 3,
+ Compression.Best: 22,
+ },
+ }[filter_name][level]
+ return level
+
+
+def encode(value, encoding):
+ return value.encode(encoding)
+
+
+def extract_tar(tar_file_path, output_dir, strip_components=None, fail_on_duplicates=True):
+ output_dir = encode(output_dir, ENCODING)
+ _make_dirs(output_dir)
+ with libarchive.Archive(tar_file_path, mode="rb") as tarfile:
+ for e in tarfile:
+ p = _strip_prefix(e.pathname, strip_components)
+ if not p:
+ continue
+ dest = os.path.join(output_dir, encode(p, ENCODING))
+ if e.pathname.endswith("/"):
+ _make_dirs(dest)
+ continue
+
+ if strip_components and fail_on_duplicates:
+ if os.path.exists(dest):
+ raise Exception(
+ "The file {} is duplicated because of strip_components={}".format(dest, strip_components)
+ )
+
+ _make_dirs(os.path.dirname(dest))
+
+ if e.ishardlink():
+ src = os.path.join(output_dir, _strip_prefix(e.hardlink, strip_components))
+ _hardlink(src, dest)
+ continue
+ if e.issym():
+ src = _strip_prefix(e.linkname, strip_components)
+ _symlink(src, dest)
+ continue
+
+ with open(dest, "wb") as f:
+ if hasattr(os, "fchmod"):
+ os.fchmod(f.fileno(), e.mode & 0o7777)
+ libarchive.call_and_check(
+ _libarchive.archive_read_data_into_fd,
+ tarfile._a,
+ tarfile._a,
+ f.fileno(),
+ )
+
+
+def _strip_prefix(path, strip_components):
+ if not strip_components:
+ return path
+ p = PurePath(path)
+ stripped = str(p.relative_to(*p.parts[:strip_components]))
+ return '' if stripped == '.' else stripped
+
+
+def tar(
+ paths,
+ output,
+ compression_filter=None,
+ compression_level=None,
+ fixed_mtime=None,
+ onerror=None,
+ postprocess=None,
+ dereference=False,
+):
+ if isinstance(paths, six.string_types):
+ paths = [paths]
+
+ if isinstance(output, six.string_types):
+ temp_tar_path, stream = (
+ output + "." + "".join(random.sample(string.ascii_lowercase, 8)),
+ None,
+ )
+ else:
+ temp_tar_path, stream = None, output
+
+ compression_level = get_compression_level(compression_filter, compression_level)
+
+ try:
+ if compression_filter:
+ filter_name = compression_filter
+ if compression_level is not None:
+ filter_opts = {"compression-level": str(compression_level)}
+ else:
+ filter_opts = {}
+ # force gzip don't store mtime of the original file being compressed (http://www.gzip.org/zlib/rfc-gzip.html#file-format)
+ if fixed_mtime is not None and compression_filter == GZIP:
+ filter_opts["timestamp"] = ""
+ else:
+ filter_name = filter_opts = None
+
+ with libarchive.Archive(
+ stream or temp_tar_path,
+ mode="wb",
+ format="gnu",
+ filter=filter_name,
+ filter_opts=filter_opts,
+ fixed_mtime=fixed_mtime,
+ ) as tarfile:
+ # determine order if fixed_mtime is specified to produce stable archive
+ paths = paths if fixed_mtime is None else sorted(paths)
+
+ for p in paths:
+ if type(p) == tuple:
+ path, arcname = p
+ else:
+ path, arcname = p, os.path.basename(p)
+
+ if os.path.isdir(path):
+ for root, dirs, files in os.walk(path, followlinks=dereference):
+ if fixed_mtime is None:
+ entries = dirs + files
+ else:
+ entries = sorted(dirs) + sorted(files)
+
+ reldir = os.path.relpath(root, path)
+ for f in entries:
+ _writepath(
+ tarfile,
+ os.path.join(root, f),
+ os.path.normpath(os.path.join(arcname, reldir, f)),
+ onerror,
+ postprocess,
+ dereference,
+ )
+ else:
+ if not os.path.exists(path):
+ raise OSError("Specified path doesn't exist: {}".format(path))
+ _writepath(tarfile, path, arcname, onerror, postprocess, dereference)
+
+ if temp_tar_path:
+ os.rename(temp_tar_path, output)
+ except Exception:
+ if temp_tar_path and os.path.exists(temp_tar_path):
+ os.remove(temp_tar_path)
+ raise
+
+
+def _writepath(tarfile, src, dst, onerror, postprocess, dereference):
+ def tar_writepath(src, dst):
+ st = os.lstat(src)
+ if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode) or stat.S_ISLNK(st.st_mode):
+ if dereference and stat.S_ISLNK(st.st_mode):
+ src = os.path.realpath(src)
+
+ tarfile.writepath(src, dst)
+
+ if postprocess:
+ postprocess(src, dst, st.st_mode)
+ else:
+ logger.debug("Skipping non-regular file '%s' (stat: %s)", src, st)
+
+ try:
+ return tar_writepath(src, dst)
+ except Exception as e:
+ if isinstance(e, OSError) and e.errno == errno.ENOENT:
+ logger.debug(
+ "Skipping missing file '%s' - looks like directory content has changed during archiving",
+ src,
+ )
+ return
+
+ if onerror:
+ if onerror(src, dst, sys.exc_info()):
+ return tar_writepath(src, dst)
+ else:
+ raise
+
+
+def check_tar(tar_file_path):
+ if os.path.isfile(tar_file_path) or os.path.islink(tar_file_path):
+ return libarchive.is_archive(tar_file_path)
+ return False
+
+
+def _make_dirs(path):
+ try:
+ os.makedirs(path)
+ except OSError as e:
+ if e.errno != errno.EEXIST or not os.path.isdir(path):
+ raise
+
+
+def _hardlink(src, dst):
+ if hasattr(os, "link"):
+ os.link(src, dst)
+ else:
+ shutil.copyfile(src, dst)
+
+
+def _symlink(src, dst):
+ if hasattr(os, "symlink"):
+ os.symlink(src, dst)
+ else:
+ # Windows specific case - we cannot copy file right now,
+ # because it doesn't exist yet (and would be met later in the archive) or symlink is broken.
+ # Act like tar and tarfile - skip such symlinks
+ if os.path.exists(src):
+ shutil.copytree(src, dst)
+
+
+def get_archive_filter_name(filename):
+ filters = libarchive.get_archive_filter_names(filename)
+ # https://a.yandex-team.ru/arc/trunk/arcadia/contrib/libs/libarchive/libarchive/archive_read.c?rev=5800047#L522
+ assert filters[-1] == "none", filters
+ if len(filters) == 1:
+ return None
+ if len(filters) == 2:
+ return filters[0]
+ raise Exception("Archive has chain of filter: {}".format(filters))
diff --git a/library/python/archive/ya.make b/library/python/archive/ya.make
new file mode 100644
index 0000000000..5b86a45a42
--- /dev/null
+++ b/library/python/archive/ya.make
@@ -0,0 +1,19 @@
+PY23_LIBRARY()
+
+STYLE_PYTHON()
+
+PY_SRCS(
+ __init__.py
+)
+
+PEERDIR(
+ contrib/python/pathlib2
+ contrib/python/python-libarchive
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ benchmark
+ test
+)
diff --git a/library/python/cityhash/cityhash.pyx b/library/python/cityhash/cityhash.pyx
new file mode 100644
index 0000000000..6f0046f0d7
--- /dev/null
+++ b/library/python/cityhash/cityhash.pyx
@@ -0,0 +1,75 @@
+from libcpp.pair cimport pair
+
+cdef extern from "util/system/types.h":
+ ctypedef unsigned long ui64
+
+
+cdef extern from "util/digest/city.h":
+ ui64 CityHash64(const char* buf, size_t len) nogil
+ pair[ui64, ui64] CityHash128(const char* buf, size_t len) nogil
+ ui64 CityHash64WithSeed(const char* buf, size_t len, ui64 seed) nogil
+
+
+cdef extern from "library/python/cityhash/hash.h":
+ ui64 FileCityHash128WithSeedHigh64(const char* fpath) nogil except+
+ ui64 FileCityHash64(const char* fpath) nogil except+
+
+
+def hash64(content):
+ cdef const char* s = content
+ cdef size_t size = len(content)
+ cdef ui64 res = 0
+
+ if size > 128:
+ with nogil:
+ res = CityHash64(s, size)
+ else:
+ res = CityHash64(s, size)
+
+ return res
+
+def hash128(content):
+ cdef const char* s = content
+ cdef size_t size = len(content)
+ cdef pair[ui64, ui64] res = pair[ui64, ui64](0, 0)
+
+ if size > 128:
+ with nogil:
+ res = CityHash128(s, size)
+ else:
+ res = CityHash128(s, size)
+ return res
+
+
+def hash64seed(content, seed):
+ cdef const char* s = content
+ cdef size_t size = len(content)
+ cdef ui64 _seed = seed;
+
+ if size > 128:
+ with nogil:
+ res = CityHash64WithSeed(s, size, _seed)
+ else:
+ res = CityHash64WithSeed(s, size, _seed)
+
+ return res
+
+
+def filehash64(path):
+ cdef const char* p = path
+ cdef ui64 res = 0
+
+ with nogil:
+ res = FileCityHash64(p)
+
+ return res
+
+
+def filehash128high64(path):
+ cdef const char* p = path
+ cdef ui64 res = 0
+
+ with nogil:
+ res = FileCityHash128WithSeedHigh64(p)
+
+ return res
diff --git a/library/python/cityhash/hash.cpp b/library/python/cityhash/hash.cpp
new file mode 100644
index 0000000000..17bd3a75f3
--- /dev/null
+++ b/library/python/cityhash/hash.cpp
@@ -0,0 +1,32 @@
+#include "hash.h"
+
+#include <util/digest/city.h>
+#include <util/generic/string.h>
+#include <util/memory/blob.h>
+#include <util/system/file.h>
+#include <util/system/fstat.h>
+
+void ReadFile(const char* fpath, TBlob& blob) {
+ TFile f(TString{fpath}, RdOnly | Seq);
+ const TFileStat fs(f);
+ auto size = fs.Size;
+
+ if (size < (64 << 10)) {
+ blob = TBlob::FromFileContent(f, 0, size);
+ } else {
+ blob = TBlob::FromFile(f);
+ }
+}
+
+ui64 FileCityHash128WithSeedHigh64(const char* fpath) {
+ TBlob blob;
+ ReadFile(fpath, blob);
+ const uint128 hash = CityHash128WithSeed((const char*)blob.Data(), blob.Size(), uint128(0, blob.Size()));
+ return Uint128High64(hash);
+}
+
+ui64 FileCityHash64(const char* fpath) {
+ TBlob blob;
+ ReadFile(fpath, blob);
+ return CityHash64(static_cast<const char*>(blob.Data()), blob.Size());
+}
diff --git a/library/python/cityhash/hash.h b/library/python/cityhash/hash.h
new file mode 100644
index 0000000000..64b22ba74b
--- /dev/null
+++ b/library/python/cityhash/hash.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <util/system/defaults.h>
+
+ui64 FileCityHash128WithSeedHigh64(const char* fpath);
+ui64 FileCityHash64(const char* fpath);
diff --git a/library/python/cityhash/ya.make b/library/python/cityhash/ya.make
new file mode 100644
index 0000000000..7948e19389
--- /dev/null
+++ b/library/python/cityhash/ya.make
@@ -0,0 +1,16 @@
+PY23_LIBRARY()
+
+SRCS(
+ hash.cpp
+)
+
+PY_SRCS(
+ TOP_LEVEL
+ cityhash.pyx
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/library/python/codecs/__codecs.pyx b/library/python/codecs/__codecs.pyx
new file mode 100644
index 0000000000..42ec37fe88
--- /dev/null
+++ b/library/python/codecs/__codecs.pyx
@@ -0,0 +1,61 @@
+import six
+
+from libcpp cimport bool
+
+from util.generic.string cimport TString, TStringBuf
+
+
+def to_bytes(s):
+ try:
+ return s.encode('utf-8')
+ except AttributeError:
+ pass
+
+ return s
+
+
+def from_bytes(s):
+ if six.PY3:
+ return s.decode('utf-8')
+
+ return s
+
+
+cdef extern from "library/cpp/blockcodecs/codecs.h" namespace "NBlockCodecs":
+ cdef cppclass ICodec:
+ void Encode(TStringBuf data, TString& res) nogil
+ void Decode(TStringBuf data, TString& res) nogil
+
+ cdef const ICodec* Codec(const TStringBuf& name) except +
+ cdef TString ListAllCodecsAsString() except +
+
+
+def dumps(name, data):
+ name = to_bytes(name)
+
+ cdef const ICodec* codec = Codec(TStringBuf(name, len(name)))
+ cdef TString res
+ cdef TStringBuf cdata = TStringBuf(data, len(data))
+
+ with nogil:
+ codec.Encode(cdata, res)
+
+ return res.c_str()[:res.length()]
+
+
+def loads(name, data):
+ name = to_bytes(name)
+
+ cdef const ICodec* codec = Codec(TStringBuf(name, len(name)))
+ cdef TString res
+ cdef TStringBuf cdata = TStringBuf(data, len(data))
+
+ with nogil:
+ codec.Decode(cdata, res)
+
+ return res.c_str()[:res.length()]
+
+def list_all_codecs():
+ cdef TString res = ListAllCodecsAsString()
+
+ return from_bytes(res.c_str()[:res.length()]).split(',')
diff --git a/library/python/codecs/__init__.py b/library/python/codecs/__init__.py
new file mode 100644
index 0000000000..b9fb00deb0
--- /dev/null
+++ b/library/python/codecs/__init__.py
@@ -0,0 +1 @@
+from __codecs import loads, dumps, list_all_codecs # noqa
diff --git a/library/python/codecs/ya.make b/library/python/codecs/ya.make
new file mode 100644
index 0000000000..f42d115d5d
--- /dev/null
+++ b/library/python/codecs/ya.make
@@ -0,0 +1,16 @@
+PY23_LIBRARY()
+
+PEERDIR(
+ library/cpp/blockcodecs
+ contrib/python/six
+)
+
+PY_SRCS(
+ __init__.py
+)
+
+BUILDWITH_CYTHON_CPP(__codecs.pyx)
+
+PY_REGISTER(__codecs)
+
+END()
diff --git a/library/python/color/README.md b/library/python/color/README.md
new file mode 100644
index 0000000000..9deae40092
--- /dev/null
+++ b/library/python/color/README.md
@@ -0,0 +1,9 @@
+Форк ((termcolor https://github.com/termcolor/termcolor/)) для PY23 с дополнительным функционалом.
+
+Может быть использован для конвертации текстовых спецификаций цвета (например, из markup) в esc-последовательности для корректного отображения в терминале.
+
+Пример использования:
+```python
+from library.python.color import tcolor
+tcolor("some text", "green-bold-on_red") -> '\x1b[32m\x1b[41m\x1b[1msome text\x1b[0m'
+```
diff --git a/library/python/color/__init__.py b/library/python/color/__init__.py
new file mode 100644
index 0000000000..a70234945e
--- /dev/null
+++ b/library/python/color/__init__.py
@@ -0,0 +1,92 @@
+from __future__ import print_function
+
+import copy
+import os
+
+from termcolor import ATTRIBUTES, COLORS, HIGHLIGHTS, RESET
+
+__all__ = [
+ "ATTRIBUTES",
+ "COLORS",
+ "HIGHLIGHTS",
+ "RESET",
+ "colored",
+ "cprint",
+ "tcolor",
+ "get_color_by_spec"
+]
+
+ATTRIBUTES = copy.deepcopy(ATTRIBUTES)
+ATTRIBUTES["light"] = ATTRIBUTES['bold']
+
+COLORS = copy.deepcopy(COLORS)
+COLORS['gray'] = COLORS['grey']
+COLORS['purple'] = COLORS['magenta']
+COLORS["reset"] = 0
+
+
+def get_code(code):
+ if os.getenv("ANSI_COLORS_DISABLED") is None:
+ return "\033[{}m".format(code)
+ return ""
+
+
+def get_color_by_spec(color_spec):
+ color, on_color, attrs = get_spec(color_spec)
+ return get_color(color, on_color, attrs)
+
+
+def get_color(color, on_color, attrs):
+ res = ""
+
+ if color is not None:
+ res += get_code(COLORS[color])
+
+ if on_color is not None:
+ res += get_code(HIGHLIGHTS[on_color])
+
+ if attrs is not None:
+ for attr in attrs:
+ res += get_code(ATTRIBUTES[attr])
+
+ return res
+
+
+def get_spec(color_spec):
+ """Parses string text color formatting specification.
+
+ Arguments:
+ color_spec -- string spec for text color formatting, csv string with
+ `color` / `bg_color` / `attr` spec items having "-" as a delimiter.
+
+ Returns a tuple: (color, bg-color, attributes list)
+
+ Example:
+ get_spec("green-bold-on_red") -> (32, 41, [1])
+ """
+ parts = color_spec.split("-")
+ color = None
+ on_color = None
+ attrs = []
+ for part in parts:
+ part = part.lower()
+ if part in COLORS:
+ color = part
+ if part in HIGHLIGHTS:
+ on_color = part
+ if part in ATTRIBUTES:
+ attrs.append(part)
+ return color, on_color, attrs
+
+
+def tcolor(text, color_spec):
+ color, on_color, attrs = get_spec(color_spec)
+ return colored(text, color=color, on_color=on_color, attrs=attrs)
+
+
+def colored(text, color=None, on_color=None, attrs=None):
+ return get_color(color, on_color, attrs) + text + get_code(COLORS["reset"])
+
+
+def cprint(text, color=None, on_color=None, attrs=None, **kwargs):
+ print((colored(text, color, on_color, attrs)), **kwargs)
diff --git a/library/python/color/ya.make b/library/python/color/ya.make
new file mode 100644
index 0000000000..ff6740b1d4
--- /dev/null
+++ b/library/python/color/ya.make
@@ -0,0 +1,13 @@
+PY23_LIBRARY()
+
+LICENSE(MIT)
+
+PY_SRCS(
+ __init__.py
+)
+
+PEERDIR(
+ contrib/python/termcolor
+)
+
+END()
diff --git a/library/python/compress/__init__.py b/library/python/compress/__init__.py
new file mode 100644
index 0000000000..380ec47dca
--- /dev/null
+++ b/library/python/compress/__init__.py
@@ -0,0 +1,147 @@
+from io import open
+
+import struct
+import json
+import os
+import logging
+
+import library.python.par_apply as lpp
+import library.python.codecs as lpc
+
+
+logger = logging.getLogger('compress')
+
+
+def list_all_codecs():
+ return sorted(frozenset(lpc.list_all_codecs()))
+
+
+def find_codec(ext):
+ def ext_compress(x):
+ return lpc.dumps(ext, x)
+
+ def ext_decompress(x):
+ return lpc.loads(ext, x)
+
+ ext_decompress(ext_compress(b''))
+
+ return {'c': ext_compress, 'd': ext_decompress, 'n': ext}
+
+
+def codec_for(path):
+ for ext in reversed(path.split('.')):
+ try:
+ return find_codec(ext)
+ except Exception as e:
+ logger.debug('in codec_for(): %s', e)
+
+ raise Exception('unsupported file %s' % path)
+
+
+def compress(fr, to, codec=None, fopen=open, threads=1):
+ if codec:
+ codec = find_codec(codec)
+ else:
+ codec = codec_for(to)
+
+ func = codec['c']
+
+ def iter_blocks():
+ with fopen(fr, 'rb') as f:
+ while True:
+ chunk = f.read(16 * 1024 * 1024)
+
+ if chunk:
+ yield chunk
+ else:
+ yield b''
+
+ return
+
+ def iter_results():
+ info = {
+ 'codec': codec['n'],
+ }
+
+ if fr:
+ info['size'] = os.path.getsize(fr)
+
+ yield json.dumps(info, sort_keys=True) + '\n'
+
+ for c in lpp.par_apply(iter_blocks(), func, threads):
+ yield c
+
+ with fopen(to, 'wb') as f:
+ for c in iter_results():
+ logger.debug('complete %s', len(c))
+ f.write(struct.pack('<I', len(c)))
+
+ try:
+ f.write(c)
+ except TypeError:
+ f.write(c.encode('utf-8'))
+
+
+def decompress(fr, to, codec=None, fopen=open, threads=1):
+ def iter_chunks():
+ with fopen(fr, 'rb') as f:
+ cnt = 0
+
+ while True:
+ ll = f.read(4)
+
+ if ll:
+ ll = struct.unpack('<I', ll)[0]
+
+ if ll:
+ if ll > 100000000:
+ raise Exception('broken stream')
+
+ yield f.read(ll)
+
+ cnt += ll
+ else:
+ if not cnt:
+ raise Exception('empty stream')
+
+ return
+
+ it = iter_chunks()
+ extra = []
+
+ for chunk in it:
+ hdr = {}
+
+ try:
+ hdr = json.loads(chunk)
+ except Exception as e:
+ logger.info('can not parse header, suspect old format: %s', e)
+ extra.append(chunk)
+
+ break
+
+ def resolve_codec():
+ if 'codec' in hdr:
+ return find_codec(hdr['codec'])
+
+ if codec:
+ return find_codec(codec)
+
+ return codec_for(fr)
+
+ dc = resolve_codec()['d']
+
+ def iter_all_chunks():
+ for x in extra:
+ yield x
+
+ for x in it:
+ yield x
+
+ with fopen(to, 'wb') as f:
+ for c in lpp.par_apply(iter_all_chunks(), dc, threads):
+ if c:
+ logger.debug('complete %s', len(c))
+ f.write(c)
+ else:
+ break
diff --git a/library/python/compress/ya.make b/library/python/compress/ya.make
new file mode 100644
index 0000000000..bbf2a784e2
--- /dev/null
+++ b/library/python/compress/ya.make
@@ -0,0 +1,16 @@
+PY23_LIBRARY()
+
+PEERDIR(
+ library/python/codecs
+ library/python/par_apply
+)
+
+PY_SRCS(
+ __init__.py
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ tests
+)
diff --git a/library/python/coredump_filter/README.md b/library/python/coredump_filter/README.md
new file mode 100644
index 0000000000..87b02e7985
--- /dev/null
+++ b/library/python/coredump_filter/README.md
@@ -0,0 +1,19 @@
+# Coredump Filter
+
+- ABC: https://abc.yandex-team.ru/services/cores/
+
+Библиотека для разбора (парсинга) трейсов отладчика gdb/lldb, python traceback-ов и minidump-ов.
+
+На вход принимает текст трейса, на выходе - распаршенный текст + возможность преобразовать
+его в html-формат для удобства чтения.
+
+
+## Основные клиенты
+- [Агрегатор стектрейсов](https://wiki.yandex-team.ru/cores-aggregation)
+
+
+## Правила разработки
+
+Библиотека написана таким образом, чтобы файл `__init__.py` мог работать
+без внешних зависимостей. Это позволяет использовать библиотеку даже в Ter1-окружениях.
+На данный момент этот инвариант не обложен тестами (и это следует исправить).
diff --git a/library/python/coredump_filter/__init__.py b/library/python/coredump_filter/__init__.py
new file mode 100644
index 0000000000..de0830cd43
--- /dev/null
+++ b/library/python/coredump_filter/__init__.py
@@ -0,0 +1,1500 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import print_function
+
+import six
+import enum
+import datetime
+import os
+import re
+import pkgutil
+import sys
+import hashlib
+import json
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class CoredumpMode(enum.Enum):
+ GDB = "gdb"
+ LLDB = "lldb"
+ SDC_ASSERT = "sdc_assert"
+
+
+ARCADIA_ROOT_LINK = "https://a.yandex-team.ru/arc/trunk/arcadia/"
+
+ARCADIA_ROOT_DIRS = [
+ # hottest paths
+ "/util/",
+ "/contrib/",
+ "/library/",
+ "/kernel/",
+ "/build/",
+ "/search/",
+
+ # "/gcc-4.8.2/",
+
+ # system paths
+ # "/lib/x86_64-linux-gnu/",
+
+ # all other stuff
+ "/aapi/",
+ "/addappter/",
+ "/adfox/",
+ "/admins/",
+ "/ads/",
+ "/adv/",
+ "/advq/",
+ "/afisha/",
+ "/afro/",
+ "/alet/",
+ "/alice/",
+ "/analytics/",
+ "/antiadblock/",
+ "/antirobot/",
+ "/apphost/",
+ "/april/",
+ "/arc/",
+ "/arcanum/",
+ "/augur/",
+ "/aurora/",
+ "/autocheck/",
+ "/balancer/",
+ "/bass/",
+ "/billing/",
+ "/bindings/",
+ "/browser/",
+ "/build/",
+ "/bunker/",
+ "/caas/",
+ "/canvas/",
+ "/captcha/",
+ "/catboost/",
+ "/certs/",
+ "/ci/",
+ "/clickhouse/",
+ "/client_analytics/",
+ "/cloud/",
+ "/cmicot/",
+ "/cmnt/",
+ "/comdep_analytics/",
+ "/commerce/",
+ "/contrib/",
+ "/crm/",
+ "/crowdsourcing/",
+ "/crypta/",
+ "/cv/",
+ "/datacloud/",
+ "/datalens/",
+ "/data-ui/",
+ "/devtools/",
+ "/dict/",
+ "/direct/",
+ "/disk/",
+ "/distribution/",
+ "/distribution_interface/",
+ "/district/",
+ "/dj/",
+ "/docs/",
+ "/douber/",
+ "/drive/",
+ "/edadeal/",
+ "/education/",
+ "/entity/",
+ "/ether/",
+ "/extdata/",
+ "/extsearch/",
+ "/FactExtract/",
+ "/fintech/",
+ "/frontend/",
+ "/fuzzing/",
+ "/games/",
+ "/gencfg/",
+ "/geobase/",
+ "/geoproduct/",
+ "/geosuggest/",
+ "/geotargeting/",
+ "/glycine/",
+ "/groups/",
+ "/haas/",
+ "/health/",
+ "/helpdesk/",
+ "/hitman/",
+ "/home/",
+ "/htf/",
+ "/hw_watcher/",
+ "/hypercube/",
+ "/iaas/",
+ "/iceberg/",
+ "/infra/",
+ "/intranet/",
+ "/inventori/",
+ "/ipreg/",
+ "/irt/",
+ "/it-office/",
+ "/jdk/",
+ "/juggler/",
+ "/junk/",
+ "/jupytercloud/",
+ "/kernel/",
+ "/keyboard/",
+ "/kikimr/",
+ "/kinopoisk/",
+ "/kinopoisk-ott/",
+ "/laas/",
+ "/lbs/",
+ "/library/",
+ "/load/",
+ "/locdoc/",
+ "/logbroker/",
+ "/logfeller/",
+ "/mail/",
+ "/mapreduce/",
+ "/maps/",
+ "/maps_adv/",
+ "/market/",
+ "/mb/",
+ "/mds/",
+ "/media/",
+ "/media-billing/",
+ "/media-crm/",
+ "/mediapers/",
+ "/mediaplanner/",
+ "/mediastat/",
+ "/media-stories/",
+ "/metrika/",
+ "/milab/",
+ "/ml/",
+ "/mlp/",
+ "/mlportal/",
+ "/mobile/",
+ "/modadvert/",
+ "/ms/",
+ "/mssngr/",
+ "/music/",
+ "/musickit/",
+ "/netsys/",
+ "/nginx/",
+ "/nirvana/",
+ "/noc/",
+ "/ofd/",
+ "/offline_data/",
+ "/opensource/",
+ "/orgvisits/",
+ "/ott/",
+ "/packages/",
+ "/partner/",
+ "/passport/",
+ "/payplatform/",
+ "/paysys/",
+ "/plus/",
+ "/portal/",
+ "/portalytics/",
+ "/pythia/",
+ "/quality/",
+ "/quasar/",
+ "/razladki/",
+ "/regulargeo/",
+ "/release_machine/",
+ "/rem/",
+ "/repo/",
+ "/rnd_toolbox/",
+ "/robot/",
+ "/rtc/",
+ "/rtline/",
+ "/rtmapreduce/",
+ "/rt-research/",
+ "/saas/",
+ "/samogon/",
+ "/samsara/",
+ "/sandbox/",
+ "/scarab/",
+ "/sdc/",
+ "/search/",
+ "/security/",
+ "/semantic-web/",
+ "/serp/",
+ "/sitesearch/",
+ "/skynet/",
+ "/smart_devices/",
+ "/smarttv/",
+ "/smm/",
+ "/solomon/",
+ "/specsearches/",
+ "/speechkit/",
+ "/sport/",
+ "/sprav/",
+ "/statbox/",
+ "/strm/",
+ "/suburban-trains/",
+ "/sup/",
+ "/switch/",
+ "/talents/",
+ "/tasklet/",
+ "/taxi/",
+ "/taxi_efficiency/",
+ "/testenv/",
+ "/testpalm/",
+ "/testpers/",
+ "/toloka/",
+ "/toolbox/",
+ "/tools/",
+ "/tracker/",
+ "/traffic/",
+ "/transfer_manager/",
+ "/travel/",
+ "/trust/",
+ "/urfu/",
+ "/vcs/",
+ "/velocity/",
+ "/vendor/",
+ "/vh/",
+ "/voicetech/",
+ "/weather/",
+ "/web/",
+ "/wmconsole/",
+ "/xmlsearch/",
+ "/yabs/",
+ "/yadoc/",
+ "/yandex_io/",
+ "/yaphone/",
+ "/ydf/",
+ "/ydo/",
+ "/yp/",
+ "/yql/",
+ "/ysite/",
+ "/yt/",
+ "/yweb/",
+ "/zen/",
+ "/zapravki/",
+ "/zen/",
+ "/zootopia/",
+ "/zora/",
+]
+
+MY_PATH = os.path.dirname(os.path.abspath(__file__))
+
+# 0.2.x uses stable hashing
+CORE_PROC_VERSION = "0.2.1"
+
+ARCADIA_ROOT_SIGN = "$S/"
+SIGNAL_NOT_FOUND = "signal not found"
+
+
+class SourceRoot(object):
+ def __init__(self):
+ self.root = None
+
+ def detect(self, source):
+ if not source:
+ # For example, regexp_4
+ return
+
+ if source.startswith("/-S/"):
+ return source[4:]
+
+ if source.startswith("../"):
+ return source
+
+ """
+ if self.root is not None:
+ return self.root
+ """
+
+ min_pos = 100000
+ for root_dir in ARCADIA_ROOT_DIRS:
+ pos = source.find(root_dir)
+ if pos < 0:
+ continue
+
+ if pos < min_pos:
+ min_pos = pos
+
+ if min_pos < len(source):
+ self.root = source[:min_pos + 1]
+
+ def crop(self, source):
+ if not source:
+ return ""
+
+ # detection attempt
+ self.detect(source)
+
+ if self.root is not None:
+ return source.replace(self.root, ARCADIA_ROOT_SIGN, 1)
+
+ # when traceback contains only ??, source root cannot be detected
+ return source
+
+
+def highlight_func(s):
+ return (
+ s
+ .replace("=", '<span class="symbol">=</span>')
+ .replace("(", '<span class="symbol">(</span>')
+ .replace(")", '<span class="symbol">)</span>')
+ )
+
+
+class FrameBase(object):
+ def __init__(
+ self,
+ frame_no=None,
+ addr="",
+ func="",
+ source="",
+ source_no="",
+ func_name="",
+ ):
+ self.frame_no = frame_no
+ self.addr = addr
+ self.func = func
+ self.source = source
+ self.source_no = source_no
+ self.func_name = func_name
+
+ def __str__(self):
+ return "{}\t{}\t{}".format(
+ self.frame_no,
+ self.func,
+ self.source,
+ )
+
+ def to_json(self):
+ return {
+ "frame_no": self.frame_no,
+ "addr": self.addr,
+ "func": self.func,
+ "func_name": self.func_name,
+ "source": self.source,
+ "source_no": self.source_no,
+ }
+
+ def fingerprint(self):
+ return self.func_name
+
+ def cropped_source(self):
+ return self.source
+
+ def raw(self):
+ return "{frame} {func} {source}".format(
+ frame=self.frame_no,
+ func=self.func,
+ source=self.source,
+ )
+
+ def html(self):
+ source, source_fmt = self.find_source()
+ return (
+ '<span class="frame">{frame}</span>'
+ '<span class="func">{func}</span> '
+ '<span class="source">{source}</span>{source_fmt}\n'.format(
+ frame=self.frame_no,
+ func=highlight_func(self.func.replace("&", "&amp;").replace("<", "&lt;")),
+ source=source,
+ source_fmt=source_fmt,
+ )
+ )
+
+
+class LLDBFrame(FrameBase):
+ SOURCE_NO_RE = re.compile(r"(.*?[^\d]):(\d+)")
+ FUNC_RE = re.compile(r"(\w+\s)?(\w+[\w,:,_,<,>,\s,*]+).*$")
+
+ def __init__(
+ self,
+ frame_no=None,
+ addr="",
+ func="",
+ source="",
+ source_no="",
+ func_name="",
+ ):
+ super(LLDBFrame, self).__init__(
+ frame_no=frame_no,
+ addr=addr,
+ func=func,
+ source=source,
+ source_no=source_no,
+ func_name=func_name,
+ )
+ # .source calculation
+
+ func = func.replace("(anonymous namespace)::", "")
+ m = self.FUNC_RE.match(func)
+ if m:
+ self.func_name = m.group(2) # overwrite func_name if name is in func
+
+ if source_no:
+ self.source_no = source_no
+ self.source = source
+ else:
+ m = self.SOURCE_NO_RE.match(source)
+ if m:
+ self.source = m.group(1)
+ self.source_no = m.group(2)
+
+ def find_source(self):
+ """
+ :return: pair (source, source_fmt)
+ """
+ source_fmt = ""
+
+ if self.source_no:
+ source_fmt = ' +<span class="source-no">{}</span>'.format(self.source_no)
+
+ return self.source, source_fmt
+
+
+class GDBFrame(FrameBase):
+ SOURCE_NO_RE = re.compile(r"(.*):(\d+)")
+ # #7 0x00007f105f3a221d in NAppHost::NTransport::TCoroutineExecutor::Poll (this=0x7f08416a5d00,
+ # tasks=empty TVector (capacity=32)) at /-S/apphost/lib/executors/executors.cpp:373
+ # We match with non-greedy regex a function name that cannot contain equal sign
+ FUNC_RE = re.compile(r"(.*?) \(([a-zA-Z0-9_]+=.*|)\)$") # function with kwarg-params or zero params
+
+ def __init__(
+ self,
+ frame_no=None,
+ addr="",
+ func="",
+ source="",
+ source_no="",
+ func_name="",
+ ):
+ super(GDBFrame, self).__init__(
+ frame_no=frame_no,
+ addr=addr,
+ func=func,
+ source=source,
+ source_no=source_no,
+ func_name=func_name,
+ )
+ if not source_no:
+ m = self.SOURCE_NO_RE.match(source)
+ if m:
+ self.source = m.group(1)
+ self.source_no = m.group(2)
+ if not func_name:
+ m = self.FUNC_RE.match(self.func)
+ if m:
+ self.func_name = m.group(1)
+
+ def find_source(self):
+ """
+ Returns link to arcadia if source is path in arcadia, else just string with path
+ :return: pair (source, source_fmt)
+ """
+ source_fmt = ""
+ source = ""
+ link = ""
+ dirs = self.source.split("/")
+ if len(dirs) > 1 and "/{dir}/".format(dir=dirs[1]) in ARCADIA_ROOT_DIRS:
+ link = self.source.replace(ARCADIA_ROOT_SIGN, ARCADIA_ROOT_LINK)
+ else:
+ source = self.source
+ if self.source_no:
+ source_fmt = ' +<span class="source-no">{}</span>'.format(self.source_no)
+ if link:
+ link += "?#L{line}".format(line=self.source_no)
+
+ if link:
+ source = '<a href="{link}">{source}</a>'.format(
+ link=link,
+ source=self.source,
+ )
+ return source, source_fmt
+
+
+class SDCAssertFrame(LLDBFrame):
+
+ def __init__(
+ self,
+ frame_no=None,
+ addr="",
+ func="",
+ source="",
+ source_no="",
+ func_name="",
+ ):
+ super(SDCAssertFrame, self).__init__(
+ frame_no=frame_no,
+ addr=addr,
+ func=func,
+ source=source,
+ source_no=source_no,
+ func_name=func_name,
+ )
+ # .source calculation
+
+ self.source = source or ""
+ if isinstance(source_no, str) and len(source_no) > 0:
+ source_no = int(source_no, 16)
+ self.source_no = source_no or ""
+
+ m = self.FUNC_RE.match(func)
+ if m:
+ self.func_name = m.group(2)
+
+
+class Stack(object):
+ # priority classes
+ LOW_IMPORTANT = 25
+ DEFAULT_IMPORTANT = 50
+ SUSPICIOUS_IMPORTANT = 75
+ MAX_IMPORTANT = 100
+
+ # default coredump's type
+ mode = CoredumpMode.GDB
+
+ max_depth = None
+
+ fingerprint_blacklist = [
+ # bottom frames
+ "raise",
+ "abort",
+ "__gnu_cxx::__verbose_terminate_handler",
+ "_cxxabiv1::__terminate",
+ "std::terminate",
+ "__cxxabiv1::__cxa_throw",
+ # top frames
+ "start_thread",
+ "clone",
+ "??",
+ "__clone",
+ "__libc_start_main",
+ "_start",
+ "__nanosleep",
+ ]
+
+ fingerprint_blacklist_prefix = ()
+
+ suspicious_functions = [
+ "CheckedDelete",
+ "NPrivate::Panic",
+ "abort",
+ "close_all_fds",
+ "__cxa_throw",
+ ]
+
+ low_important_functions_eq = [
+ "poll ()",
+ "recvfrom ()",
+ "pthread_join ()",
+ ]
+
+ low_important_functions_match = [
+ "TCommonSockOps::SendV",
+ "WaitD (",
+ "SleepT (",
+ "Join (",
+ "epoll_wait",
+ "nanosleep",
+ "pthread_cond_wait",
+ "pthread_cond_timedwait",
+ "gsignal",
+ "std::detail::_",
+ "std::type_info",
+ "ros::NodeHandle",
+ ]
+
+ def __init__(
+ self,
+ lines=None,
+ source_root=None,
+ thread_ptr=0,
+ thread_id=None,
+ frames=None,
+ important=None,
+ stack_fp=None,
+ fingerprint_hash=None,
+ stream=None,
+ mode=None, # type: CoredumpMode
+ ignore_bad_frames=True,
+ ):
+ self.lines = lines
+ self.source_root = source_root
+ self.thread_ptr = thread_ptr
+ self.thread_id = thread_id
+ if mode is not None:
+ self.mode = mode
+
+ self.frames = frames or []
+ if self.frames and isinstance(frames[0], dict):
+ self.frames = [self.frame_factory(f) for f in self.frames]
+ self.important = important or self.DEFAULT_IMPORTANT
+ if thread_id == 1:
+ self.important = self.MAX_IMPORTANT
+ self.fingerprint_hash = fingerprint_hash
+ self.stack_fp = stack_fp
+ self.stream = stream
+ self.ignore_bad_frames = ignore_bad_frames
+
+ def to_json(self):
+ """Should be symmetric with `from_json`."""
+ return {
+ "mode": self.mode.value,
+ "frames": [frame.to_json() for frame in self.frames],
+ "important": self.important,
+ }
+
+ @staticmethod
+ def from_json(stack):
+ """Should be symmetric with `to_json`."""
+ mode = CoredumpMode(stack.get("mode", CoredumpMode.GDB.value))
+ # old serialization format support, should be dropped
+ lldb_mode = stack.get("lldb_mode", False)
+ if lldb_mode:
+ mode = CoredumpMode.LLDB
+
+ unpacked_stack = {
+ "mode": mode,
+ "frames": stack["frames"],
+ "important": stack.get("important", Stack.DEFAULT_IMPORTANT),
+ }
+ return mode, unpacked_stack
+
+ def frame_factory(self, args):
+ frames = {
+ CoredumpMode.GDB: GDBFrame,
+ CoredumpMode.LLDB: LLDBFrame,
+ CoredumpMode.SDC_ASSERT: SDCAssertFrame,
+ }
+
+ class_object = frames.get(self.mode)
+ if not class_object:
+ raise Exception("Invalid mode: {}".format(self.mode.value))
+
+ return class_object(**args)
+
+ def low_important(self):
+ return self.important <= self.LOW_IMPORTANT
+
+ def check_importance(self, frame):
+ # raised priority cannot be lowered
+ if self.important > self.DEFAULT_IMPORTANT:
+ return
+
+ # detect suspicious stacks
+ for name in self.suspicious_functions:
+ if name in frame.func:
+ self.important = self.SUSPICIOUS_IMPORTANT
+ return
+
+ for name in self.low_important_functions_eq:
+ if name == frame.func:
+ self.important = self.LOW_IMPORTANT
+
+ for name in self.low_important_functions_match:
+ if name in frame.func:
+ self.important = self.LOW_IMPORTANT
+
+ def push_frame(self, frame):
+ self.check_importance(frame)
+ # ignore duplicated frames
+ if len(self.frames) and self.frames[-1].frame_no == frame.frame_no:
+ return
+ self.frames.append(frame)
+
+ def parse(self):
+ """
+ Parse one stack
+ """
+ assert self.lines is not None
+ assert self.source_root is not None
+
+ for line in self.lines:
+ match_found = False
+ for regexp in self.REGEXPS:
+ m = regexp.match(line)
+ if m:
+ frame_args = m.groupdict()
+ if "source" in frame_args:
+ frame_args["source"] = self.source_root.crop(frame_args["source"])
+
+ self.push_frame(self.frame_factory(frame_args))
+ match_found = True
+ break
+
+ if not match_found:
+ self.bad_frame(line)
+
+ def bad_frame(self, line):
+ if self.ignore_bad_frames:
+ logger.warning("Bad frame: %s", line)
+ return
+
+ raise Exception("Bad frame: `{}`, frame `{}`".format(
+ line,
+ self.debug(return_result=True),
+ ))
+
+ def debug(self, return_result=False):
+ if self.low_important():
+ return ""
+
+ res = "\n".join([str(f) for f in self.frames])
+ res += "----------------------------- DEBUG END\n"
+ if return_result:
+ return res
+
+ self.stream.write(res)
+
+ def raw(self):
+ return "\n".join([frame.raw() for frame in self.frames])
+
+ def html(self, same_hash=False, same_count=1, return_result=False):
+ ans = ""
+ pre_class = "important-" + str(self.important)
+ if same_hash:
+ pre_class += " same-hash"
+
+ ans += '<pre class="{0}">'.format(pre_class)
+ if not same_hash:
+ ans += '<a name="stack{0}"></a>'.format(self.hash())
+
+ ans += '<span class="hash"><a href="#stack{0}">#{0}</a>, {1} stack(s) with same hash</span>\n'.format(
+ self.hash(), same_count,
+ )
+
+ for f in self.frames:
+ ans += f.html()
+ ans += "</pre>\n"
+
+ if return_result:
+ return ans
+
+ self.stream.write(ans)
+
+ def fingerprint(self, max_num=None):
+ """
+ Stack fingerprint: concatenation of non-common stack frames
+ FIXME: wipe away `max_num`
+ """
+ stack_fp = list()
+ len_frames = min((max_num or len(self.frames)), len(self.frames))
+
+ for f in self.frames[:len_frames]:
+ fp = f.fingerprint()
+ if not fp:
+ continue
+
+ if fp in self.fingerprint_blacklist:
+ continue
+
+ if fp.startswith(self.fingerprint_blacklist_prefix):
+ continue
+
+ if fp in stack_fp:
+ # FIXME: optimize duplicate remover: check only previous frame
+ # see also `push_frame`
+ continue
+
+ stack_fp.append(fp.strip())
+
+ if self.max_depth is not None and len(stack_fp) >= self.max_depth:
+ break
+
+ return "\n".join(stack_fp)
+
+ def simple_html(self, num_frames=None):
+ if not num_frames:
+ num_frames = len(self.frames)
+ pre_class = "important-0"
+ ans = '<pre class="{0}">'.format(pre_class)
+ for i in range(min(len(self.frames), num_frames)):
+ ans += self.frames[i].html()
+ ans += "</pre>\n"
+ return ans
+
+ def __str__(self):
+ return "\n".join(map(str, self.frames))
+
+ def hash(self, max_num=None):
+ """
+ Entire stack hash for merging same stacks
+ """
+ if self.fingerprint_hash is None:
+ self.fingerprint_hash = int(hashlib.md5(self.fingerprint(max_num).encode("utf-8")).hexdigest()[0:15], 16)
+
+ return self.fingerprint_hash
+
+
+class GDBStack(Stack):
+
+ mode = CoredumpMode.GDB
+
+ REGEXPS = [
+ # #6 0x0000000001d9203e in NAsio::TIOService::TImpl::Run (this=0x137b1ec00) at /place/
+ # sandbox-data/srcdir/arcadia_cache/library/neh/asio/io_service_impl.cpp:77
+
+ re.compile(
+ r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*) at (?P<source>.*)"
+ ),
+
+ # #5 TCondVar::WaitD (this=this@entry=0x10196b2b8, mutex=..., deadLine=..., deadLine@entry=...)
+ # at /place/sandbox-data/srcdir/arcadia_cache/util/system/condvar.cpp:150
+ re.compile(
+ r"#(?P<frame_no>\d+)[ \t]+(?P<func>.*) at (?P<source>/.*)"
+ ),
+
+ # #0 0x00007faf8eb31d84 in pthread_cond_wait@@GLIBC_2.3.2 ()
+ # from /lib/x86_64-linux-gnu/libpthread.so.0
+ re.compile(
+ r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*) from (?P<source>.*)"
+ ),
+
+ # #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
+ re.compile(
+ r"#(?P<frame_no>\d+)[ \t]+ (?P<func>.*) at (?P<source>.*)"
+ ),
+
+ # #10 0x0000000000000000 in ?? ()
+ re.compile(
+ r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*)"
+ ),
+ ]
+
+
+class LLDBStack(Stack):
+
+ mode = CoredumpMode.LLDB
+
+ REGEXPS = [
+ # 0x00007fd7b300a886 libthird_Uparty_Sros_Sros_Ucomm_Sclients_Sroscpp_Sliblibroscpp.so`
+ # std::thread::_State_impl<std::thread::_Invoker<std::tuple<ros::PollManager::PollManager()::$_1> > >::_M_run()
+ # [inlined] ros::PollManager::threadFunc(this=0x00007fd7b30dab20) at poll_manager.cpp:75:16 # noqa
+ re.compile(
+ r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+).+inlined]\s(?P<func>.+)\sat\s(?P<source>.+)"
+ ),
+
+ re.compile(
+ r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+).+?`(?P<func>.+)\sat\s(?P<source>.+)"
+ ),
+
+ # * frame #0: 0x00007fd7aee51f47 libc.so.6`gsignal + 199
+ re.compile(
+ r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+)\s(?P<source>.+)`(?P<func>.+)\s\+\s(?P<source_no>\d+)"
+ ),
+ ]
+
+ # Take not more than `max_depth` non-filtered frames into fingerprint
+ # See CORES-180
+ max_depth = 10
+
+ fingerprint_blacklist = Stack.fingerprint_blacklist + [
+ "ros::ros_wallsleep",
+ ]
+
+ fingerprint_blacklist_prefix = Stack.fingerprint_blacklist_prefix + (
+ "___lldb_unnamed_symbol",
+ "__gnu_cxx",
+ "__gthread",
+ "__pthread",
+ "decltype",
+ "myriapoda::BuildersRunner",
+ "non",
+ "std::_Function_handler",
+ "std::_Sp_counted_ptr_inplace",
+ "std::__invoke_impl",
+ "std::__invoke_result",
+ "std::__shared_ptr",
+ "std::conditional",
+ "std::shared_ptr",
+ "std::thread::_Invoker",
+ "std::thread::_State_impl",
+ "yandex::sdc::assert_details_",
+ )
+
+ suspicious_functions = Stack.suspicious_functions + [
+ "Xml",
+ "boost",
+ "ros",
+ "supernode",
+ "tensorflow",
+ "yandex::sdc",
+ ]
+
+
+class PythonStack(Stack):
+
+ REGEXPS = [
+ re.compile(
+ r'File "(?P<source>.*)", line (?P<source_no>\d+), in (?P<func_name>.*)'
+ ),
+ ]
+
+
+class SDCAssertStack(LLDBStack):
+
+ mode = CoredumpMode.SDC_ASSERT
+
+ REGEXPS = [
+ # 0: ./modules/_shared/libcore_Stools_Slibassert.so(yandex::sdc::assert_details_::PanicV(char const*,
+ # long, char const*, char const*, bool, char const*, __va_list_tag*)
+ # +0x2aa)[0x7fb83268feaa]
+ re.compile(
+ r"(?P<frame_no>\d+):\s(?P<source>.+.so)\((?P<func>.+)\+(?P<source_no>.+).+\[(?P<addr>0x[0-9a-f]+)"
+ ),
+
+ re.compile(
+ r"(?P<frame_no>\d+):\s(?P<source>\w+)\((?P<func>.+)\+(?P<source_no>.+).+\[(?P<addr>0x[0-9a-f]+)"
+ )
+ ]
+
+
+def parse_python_traceback(trace):
+ trace = trace.replace("/home/zomb-sandbox/client/", "/")
+ trace = trace.replace("/home/zomb-sandbox/tasks/", "/sandbox/")
+ trace = trace.split("\n")
+ exception = trace[-1] # noqa: F841
+ trace = trace[1: -1]
+ pairs = zip(trace[::2], trace[1::2])
+ stack = Stack(lines=[])
+ for frame_no, (path, row) in enumerate(pairs):
+ # FIXME: wrap into generic tracer
+ m = PythonStack.REGEXPS[0].match(path.strip())
+ if m:
+ frame_args = m.groupdict()
+ if not frame_args["source"].startswith("/"):
+ frame_args["source"] = "/" + frame_args["source"]
+ frame_args["frame_no"] = str(frame_no)
+ frame_args["func"] = row.strip()
+ stack.push_frame(GDBFrame(**frame_args))
+ return [[stack]], [[stack.raw()]], 6
+
+
+def stack_factory(stack):
+ mode, unpacked_stack = Stack.from_json(stack)
+
+ if mode == CoredumpMode.GDB:
+ return GDBStack(**unpacked_stack)
+ elif mode == CoredumpMode.LLDB:
+ return LLDBStack(**unpacked_stack)
+ elif mode == CoredumpMode.SDC_ASSERT:
+ return SDCAssertStack(**unpacked_stack)
+
+ raise Exception("Invalid stack mode: {}. ".format(mode))
+
+
+def _read_file(file_name):
+ with open(file_name) as f:
+ return f.read()
+
+
+def _file_contents(file_name):
+ """Return file (or resource) contents as unicode string."""
+ if getattr(sys, "is_standalone_binary", False):
+ try:
+ contents = pkgutil.get_data(__package__, file_name)
+ except Exception:
+ raise IOError("Failed to find resource: " + file_name)
+ else:
+ if not os.path.exists(file_name):
+ file_name = os.path.join(MY_PATH, file_name)
+ contents = _read_file(file_name)
+ # py23 compatibility
+ if not isinstance(contents, six.text_type):
+ contents = contents.decode("utf-8")
+ return contents
+
+
+def html_prolog(stream, timestamp):
+ prolog = _file_contents("prolog.html")
+ assert isinstance(prolog, six.string_types)
+ stream.write(prolog.format(
+ style=_file_contents("styles.css"),
+ coredump_js=_file_contents("core_proc.js"),
+ version=CORE_PROC_VERSION,
+ timestamp=timestamp,
+ ))
+
+
+def html_epilog(stream):
+ stream.write(_file_contents("epilog.html"))
+
+
+def detect_coredump_mode(core_text):
+ if len(core_text) == 0:
+ raise Exception("Text stacktrace is blank")
+
+ if "Panic at unixtime" in core_text:
+ return CoredumpMode.SDC_ASSERT
+
+ if "(lldb)" in core_text:
+ return CoredumpMode.LLDB
+
+ return CoredumpMode.GDB
+
+
+def filter_stack_dump(
+ core_text=None,
+ stack_file_name=None,
+ use_fingerprint=False,
+ sandbox_failed_task_id=None,
+ output_stream=None,
+ timestamp=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+ ignore_bad_frames=True,
+):
+ """New interface for stacktrace filtering. Preferred to use."""
+ if not core_text and not stack_file_name:
+ raise ValueError("Either `core_text` or `stack_file_name` should be passed to `filter_stack_dump`. ")
+
+ if core_text is not None and stack_file_name:
+ raise ValueError("Only one of `core_text` and `stack_file_name` cannot be specified for `filter_stack_dump`. ")
+
+ if stack_file_name:
+ core_text = _read_file(stack_file_name)
+ # further processing uses `core_text` only
+
+ mode = detect_coredump_mode(core_text)
+ core_lines = core_text.split("\n")
+
+ return filter_stackdump(
+ file_lines=core_lines,
+ ignore_bad_frames=ignore_bad_frames,
+ mode=mode,
+ sandbox_failed_task_id=sandbox_failed_task_id,
+ stream=output_stream,
+ timestamp=timestamp,
+ use_fingerprint=use_fingerprint,
+ use_stream=output_stream is not None,
+ )
+
+
+class StackDumperBase(object):
+
+ SANDBOX_TASK_RE = re.compile(r".*/[0-9a-f]/[0-9a-f]/([0-9]+)/.*")
+ MAX_SAME_STACKS = 30
+
+ def __init__(
+ self,
+ use_fingerprint,
+ sandbox_failed_task_id,
+ stream,
+ use_stream,
+ file_lines,
+ timestamp,
+ mode,
+ file_name=None,
+ ignore_bad_frames=True,
+ ):
+ self.source_root = SourceRoot()
+ self.use_fingerprint = use_fingerprint
+ self.sandbox_task_id = None
+ self.sandbox_failed_task_id = sandbox_failed_task_id
+ self.stream = stream or sys.stdout
+ self.use_stream = use_stream
+ self.file_name = file_name
+ self.file_lines = file_lines
+ self.timestamp = timestamp
+ self.ignore_bad_frames = ignore_bad_frames
+ self.stack_class = self.get_stack_class(mode)
+
+ self.signal = SIGNAL_NOT_FOUND
+ self.stacks = []
+ self._main_info = []
+
+ @staticmethod
+ def is_ignored_line(line):
+ raise NotImplementedError("Not implemented static method `is_ignored_line`. ")
+
+ @staticmethod
+ def get_stack_class(mode):
+ exist_modes = {}
+ for cls in [GDBStack, LLDBStack, SDCAssertStack]:
+ current_mode = cls.mode
+ if current_mode in exist_modes:
+ raise Exception("Duplicate modes are disallowed. Repeated mode: `{}`".format(current_mode.value))
+ exist_modes[current_mode] = cls
+
+ if mode not in exist_modes:
+ raise Exception("Unexpected coredump processing mode: `{}`".format(mode.value))
+
+ return exist_modes[mode]
+
+ def check_signal(self, line):
+ raise NotImplementedError("Not implemented `check_signal`.")
+
+ def set_sandbox_task_id(self, task_id):
+ self.sandbox_task_id = task_id
+
+ def add_main_line(self, line):
+ self._main_info.append(line)
+
+ def add_stack(self, stack_lines, thread_id):
+ if not stack_lines:
+ return
+
+ stack = self.stack_class(
+ lines=stack_lines,
+ source_root=self.source_root,
+ thread_id=thread_id,
+ stream=self.stream,
+ ignore_bad_frames=self.ignore_bad_frames,
+ )
+ self.stacks.append(stack)
+
+ def dump(self):
+ if self.file_lines is None:
+ # FIXME(mvel): LLDB is not handled here
+ self.file_lines = get_parsable_gdb_text(_read_file(self.file_name))
+
+ self._collect_stacks()
+
+ for stack in self.stacks:
+ stack.parse()
+ # stack.debug()
+
+ if self.use_stream:
+ if self.use_fingerprint:
+ for stack in self.stacks:
+ self.stream.write(stack.fingerprint() + "\n")
+ self.stream.write("--------------------------------------\n")
+ return
+ else:
+ html_prolog(self.stream, self.timestamp)
+
+ if self.sandbox_task_id is not None:
+ self.stream.write(
+ '<div style="padding-top: 6px; font-size: 18px; font-weight: bold;">'
+ 'Coredumped binary build task: '
+ '<a href="https://sandbox.yandex-team.ru/task/{0}">{0}</a></div>\n'.format(
+ self.sandbox_task_id
+ )
+ )
+
+ if self.sandbox_failed_task_id is not None:
+ self.stream.write(
+ '<div style="padding-top: 6px; font-size: 18px; font-weight: bold;">'
+ 'Sandbox failed task: '
+ '<a href="https://sandbox.yandex-team.ru/task/{0}">{0}</a></div>\n'.format(
+ self.sandbox_failed_task_id
+ )
+ )
+
+ pre_class = ""
+ self.stream.write('<pre class="{0}">\n'.format(pre_class))
+ for line in self._main_info:
+ self.stream.write(line.replace("&", "&amp;").replace("<", "&lt;") + "\n")
+ self.stream.write("</pre>\n")
+
+ sorted_stacks = sorted(self.stacks, key=lambda x: (x.important, x.fingerprint()), reverse=True)
+
+ prev_hash = None
+ all_hash_stacks = []
+ cur_hash_stacks = []
+ for stack in sorted_stacks:
+ if stack.hash() == 0:
+ continue
+
+ if stack.hash() == prev_hash:
+ if len(cur_hash_stacks) < self.MAX_SAME_STACKS:
+ # do not collect too much
+ cur_hash_stacks.append(stack)
+ continue
+
+ # hash changed
+ if cur_hash_stacks:
+ all_hash_stacks.append(cur_hash_stacks)
+
+ prev_hash = stack.hash()
+ cur_hash_stacks = [stack, ]
+
+ # push last
+ if cur_hash_stacks:
+ all_hash_stacks.append(cur_hash_stacks)
+
+ if self.use_stream:
+ for cur_hash_stacks in all_hash_stacks:
+ same_hash = False
+ for stack in cur_hash_stacks:
+ stack.html(same_hash=same_hash, same_count=len(cur_hash_stacks))
+ same_hash = True
+
+ html_epilog(self.stream)
+ else:
+ raw_hash_stacks = [
+ [stack.raw() for stack in common_hash_stacks]
+ for common_hash_stacks in all_hash_stacks
+ ]
+ return all_hash_stacks, raw_hash_stacks, self.signal
+
+ def _collect_stacks(self):
+ stack_lines = []
+ stack_detected = False
+ thread_id = None
+
+ for line in self.file_lines:
+ line = line.strip()
+ if self.is_ignored_line(line):
+ continue
+
+ if "Core was generated" in line:
+ m = self.SANDBOX_TASK_RE.match(line)
+ if m:
+ self.set_sandbox_task_id(int(m.group(1)))
+
+ self.check_signal(line)
+
+ # [Switching to thread 55 (Thread 0x7f100a94c700 (LWP 21034))]
+ # Thread 584 (Thread 0x7ff363c03700 (LWP 2124)):
+
+ # see test2 and test3
+ tm = self.THREAD_RE.match(line)
+ if tm:
+ stack_detected = True
+ self.add_stack(
+ stack_lines=stack_lines,
+ thread_id=thread_id,
+ )
+ stack_lines = []
+ thread_id = int(tm.group(1))
+ continue
+
+ if stack_detected:
+ stack_lines.append(line)
+ else:
+ self.add_main_line(line)
+
+ # parse last stack
+ self.add_stack(
+ stack_lines=stack_lines,
+ thread_id=thread_id,
+ )
+
+
+class StackDumperGDB(StackDumperBase):
+
+ SIGNAL_FLAG = "Program terminated with signal"
+ THREAD_RE = re.compile(r".*[Tt]hread (\d+) .*")
+ LINE_IN = re.compile(r"\d+\tin ")
+
+ def is_ignored_line(self, line):
+ if not line:
+ return True
+
+ if line.startswith("[New "):
+ # LWP, Thread, process
+ return True
+
+ if line.startswith("[Thread "):
+ return True
+
+ if line.startswith("Using "):
+ return True
+
+ if line.startswith("warning:"):
+ return True
+
+ if line.startswith("Python Exception"):
+ # TODO: handle this more carefully
+ return True
+
+ if line[0] != "#" and "No such file or directory" in line:
+ return True
+
+ if self.LINE_IN.match(line):
+ # see test1.txt for example
+ # 641 in /place/sandbox-data/srcdir/arcadia/library/coroutine/engine/impl.h
+ return True
+
+ return False
+
+ def check_signal(self, line):
+ if self.SIGNAL_FLAG in line:
+ self.signal = line[line.find(self.SIGNAL_FLAG) + len(self.SIGNAL_FLAG):].split(",")[0]
+
+
+class StackDumperLLDB(StackDumperBase):
+
+ SIGNAL_FLAG = "stop reason = signal"
+
+ THREAD_RE = re.compile(r".*thread #(\d+), .*")
+
+ SKIP_LINES = {
+ "(lldb) bt all",
+ "(lldb) script import sys",
+ "(lldb) target create",
+ "Core file",
+
+ # Drop signal interceptor call
+ # * frame #0: 0x00007efd49042fb7 libc.so.6`__GI___libc_sigaction at sigaction.c:54
+ # TODO(epsilond1): Set MAX_IMPORTANT for some thread
+ "__GI___libc_sigaction",
+
+ # Drop unnamed symbols at lines like
+ # frame #4: 0x00007fd8054156df libstdc++.so.6`___lldb_unnamed_symbol440$$libstdc++.so.6 + 15
+ "$$",
+ }
+
+ @staticmethod
+ def is_ignored_line(line):
+ if not line:
+ return True
+
+ for skip_line in StackDumperLLDB.SKIP_LINES:
+ if skip_line in line:
+ return True
+ return False
+
+ def check_signal(self, line):
+ if self.SIGNAL_FLAG in line and self.signal == SIGNAL_NOT_FOUND:
+ self.signal = line.split()[-1]
+
+
+class StackDumperSDCAssert(StackDumperBase):
+
+ THREAD_RE = re.compile(
+ r"(\d+)(:\s)"
+ )
+
+ def is_ignored_line(self, line):
+ if not line:
+ return True
+
+ return not re.match(self.THREAD_RE, line)
+
+ def check_signal(self, line):
+ self.signal = SIGNAL_NOT_FOUND
+
+ def _collect_stacks(self):
+ stack_lines = []
+ for line in self.file_lines:
+ line = line.strip()
+ if self.is_ignored_line(line):
+ continue
+ stack_lines.append(line)
+ self.check_signal(line)
+
+ self.add_stack(
+ stack_lines=stack_lines,
+ thread_id=0,
+ )
+
+
+def filter_stackdump(
+ file_name=None,
+ use_fingerprint=False,
+ sandbox_failed_task_id=None,
+ stream=None,
+ file_lines=None,
+ use_stream=True,
+ timestamp=None,
+ ignore_bad_frames=True,
+ mode=None,
+):
+ if mode is None and file_name is not None:
+ mode = detect_coredump_mode(_read_file(file_name))
+ if mode == CoredumpMode.GDB:
+ stack_dumper_cls = StackDumperGDB
+ elif mode == CoredumpMode.LLDB:
+ stack_dumper_cls = StackDumperLLDB
+ elif mode == CoredumpMode.SDC_ASSERT:
+ stack_dumper_cls = StackDumperSDCAssert
+ else:
+ raise Exception("Invalid mode: {}".format(mode.value))
+
+ dumper = stack_dumper_cls(
+ file_name=file_name,
+ use_fingerprint=use_fingerprint,
+ sandbox_failed_task_id=sandbox_failed_task_id,
+ stream=stream,
+ use_stream=use_stream,
+ file_lines=file_lines,
+ timestamp=timestamp,
+ ignore_bad_frames=ignore_bad_frames,
+ mode=mode,
+ )
+
+ return dumper.dump()
+
+
+def get_parsable_gdb_text(core_text):
+ # FIXME(mvel): Check encoding?
+ # core_text = core_text.encode("ascii", "ignore").decode("ascii")
+ core_text = (
+ core_text
+ # .replace("#", "\n#") # bug here
+ .replace("No core", "\nNo core")
+ .replace("[New", "\n[New")
+ .replace("\n\n", "\n")
+ )
+
+ return core_text.split("\n")
+
+
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ sys.stderr.write(
+ """Traceback filter "Tri Korochki"
+https://wiki.yandex-team.ru/cores-aggregation/
+Usage:
+ core_proc.py <traceback.txt> [-f|--fingerprint]
+ core_proc.py -v|--version
+"""
+ )
+ sys.exit(1)
+
+ if sys.argv[1] == "--version" or sys.argv[1] == "-v":
+ if os.system("svn info 2>/dev/null | grep '^Revision'") != 0:
+ print(CORE_PROC_VERSION)
+ sys.exit(0)
+
+ sandbox_failed_task_id = None
+
+ use_fingerprint = False
+ if len(sys.argv) >= 3:
+ if sys.argv[2] == "-f" or sys.argv[2] == "--fingerprint":
+ use_fingerprint = True
+ sandbox_failed_task_id = sys.argv[2]
+
+ filter_stack_dump(
+ core_text=_read_file(sys.argv[1]),
+ use_fingerprint=use_fingerprint,
+ sandbox_failed_task_id=sandbox_failed_task_id,
+ output_stream=sys.stdout,
+ )
+
+
+"""
+Stack group is a `Stack` objects list with the same hash (fingerprint).
+"""
+
+
+class StackEncoder(json.JSONEncoder):
+ """Stack JSON serializer."""
+
+ def default(self, obj):
+ if isinstance(obj, Stack):
+ return obj.to_json()
+
+ return json.JSONEncoder.default(obj)
+
+
+def serialize_stacks(stack_groups):
+ """
+ Serialize list of stack groups to string (using JSON format).
+
+ :param stack_groups: list of stack groups.
+ :return: JSON serialized to string
+ """
+ return json.dumps(stack_groups, cls=StackEncoder)
+
+
+def deserialize_stacks(stack_groups_str):
+ """
+ Restore JSON-serialized stack data into stack groups.
+
+ :param stack_groups_str: JSON-serialized data.
+ :return: list of stack groups
+ """
+ stack_groups_json = json.loads(stack_groups_str)
+ # please do not use `map` hell here, it's impossible to debug
+ all_stacks = [
+ [stack_factory(stack) for stack in stacks]
+ for stacks in stack_groups_json
+ ]
+ return all_stacks
diff --git a/library/python/coredump_filter/core_proc.js b/library/python/coredump_filter/core_proc.js
new file mode 100644
index 0000000000..15413adeae
--- /dev/null
+++ b/library/python/coredump_filter/core_proc.js
@@ -0,0 +1,21 @@
+
+
+$(document).ready(function() {
+ $('#show-same-stacks').click(function() {
+ var stacks = $('.same-hash');
+ for (var i = 0; i < stacks.length; ++i)
+ $(stacks[i]).show();
+ $('#show-same-stacks').hide();
+ $('#hide-same-stacks').show();
+ return false;
+ });
+
+ $('#hide-same-stacks').click(function() {
+ var stacks = $('.same-hash');
+ for (var i = 0; i < stacks.length; ++i)
+ $(stacks[i]).hide();
+ $('#hide-same-stacks').hide();
+ $('#show-same-stacks').show();
+ return false;
+ });
+});
diff --git a/library/python/coredump_filter/epilog.html b/library/python/coredump_filter/epilog.html
new file mode 100644
index 0000000000..b317cc2a91
--- /dev/null
+++ b/library/python/coredump_filter/epilog.html
@@ -0,0 +1,2 @@
+ </body>
+</html> \ No newline at end of file
diff --git a/library/python/coredump_filter/prolog.html b/library/python/coredump_filter/prolog.html
new file mode 100644
index 0000000000..f102a7210d
--- /dev/null
+++ b/library/python/coredump_filter/prolog.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <style>{style}</style>
+ <script src="https://yastatic.net/jquery/1.7.1/jquery.min.js"></script>
+ <script>{coredump_js}</script>
+ </head>
+ <body>
+ <h1>Coredump report generated on {timestamp} by Coredump/traceback filter
+ <i><a href="http://mvel.at.yandex-team.ru/2373">Tri Korochki</a></i>
+ ({version})
+ </h1>
+ <h3>Author: <a href="https://staff.yandex-team.ru/mvel">mvel@</a> aka Mikhail Veltishchev</h3>
+ <h3>&copy; Yandex LLC. All rights reversed</h3>
+ <div class="legend">
+ <ul style="line-height: 22px">
+ <li><span class="important-100">Problem</span> stacks</li>
+ <li><span class="important-75">Suspicious</span> stacks</li>
+ <li><span class="important-50">Active</span> stacks</li>
+ <li><span class="important-25">Non-active</span> stacks</li>
+ </ul>
+ <a class="show-same-hash" id="show-same-stacks" href="#">Show same stacks</a>
+ <a class="show-same-hash" id="hide-same-stacks" href="#" style="display: none">Hide same stacks</a>
+ </div>
diff --git a/library/python/coredump_filter/styles.css b/library/python/coredump_filter/styles.css
new file mode 100644
index 0000000000..fdd09ce09e
--- /dev/null
+++ b/library/python/coredump_filter/styles.css
@@ -0,0 +1,116 @@
+body {
+ font-family: sans-serif;
+ font-size: 12px;
+}
+
+a {
+ text-decoration: none;
+ color: #486DEC;
+}
+
+.frame {
+ color: #7f7f7f;
+ display: inline-block;
+ width: 30px;
+}
+
+.addr {
+ color: #999999;
+ padding-right: 10px;
+}
+
+.func {
+ color: #7f0000;
+ word-wrap: normal;
+}
+
+.source {
+ color: #007f00;
+}
+
+.symbol {
+ color: #0000ff;
+}
+
+h1 {
+ font-size: 1.5em;
+ margin-top: .1em;
+ margin-bottom: .2em;
+}
+
+h3 {
+ font-size: 1em;
+ margin-top: .1em;
+ margin-bottom: .2em;
+}
+
+pre {
+ overflow-x: auto;
+ margin: 6px 0px 6px 0px;
+ padding: 0px 12px 6px 12px;
+ position: relative;
+}
+
+pre.important-25 {
+ background-color: #eeeeee;
+}
+
+span.important-25 {
+ background-color: #eeeeee;
+ padding: 3px;
+}
+
+pre.important-50 {
+ background-color: #e7ffe7;
+}
+
+span.important-50 {
+ background-color: #e7ffe7;
+ padding: 3px;
+}
+
+pre.important-75 {
+ background-color: #ffffcc;
+}
+
+span.important-75 {
+ background-color: #ffffcc;
+ padding: 3px;
+}
+
+pre.important-100 {
+ background-color: #ffdddd;
+}
+
+span.important-100 {
+ background-color: #ffdddd;
+ padding: 3px;
+}
+
+a.show-same-hash {
+ padding: 0px 20px 0px 20px;
+}
+
+/* hidden by default */
+.same-hash {
+ display: none;
+}
+
+span.hash {
+ position: absolute;
+ top: 3px; right: 3px;
+}
+
+div.legend {
+ position: absolute;
+ z-index: 1;
+ top: 5px;
+ right: 8px;
+ border: 1px solid #7f7f7f;
+ border-radius: 3px;
+ padding: 0px 20px 3px 0px;
+}
+
+div.legend ul {
+ margin: 3px 0px 3px 0px;
+}
diff --git a/library/python/coredump_filter/ya.make b/library/python/coredump_filter/ya.make
new file mode 100644
index 0000000000..fc8ec1a45f
--- /dev/null
+++ b/library/python/coredump_filter/ya.make
@@ -0,0 +1,23 @@
+PY23_LIBRARY()
+
+PY_SRCS(
+ __init__.py
+)
+
+RESOURCE_FILES(
+ PREFIX library/python/coredump_filter/
+ core_proc.js
+ epilog.html
+ prolog.html
+ styles.css
+)
+
+IF(PYTHON2)
+ PEERDIR(contrib/deprecated/python/enum34)
+ENDIF()
+
+END()
+
+RECURSE(
+ tests
+)
diff --git a/library/python/json/__init__.py b/library/python/json/__init__.py
new file mode 100644
index 0000000000..c6420d5e6d
--- /dev/null
+++ b/library/python/json/__init__.py
@@ -0,0 +1,44 @@
+from library.python.json.loads import loads as _loads
+from simplejson import loads as _sj_loads
+
+
+def loads(*args, **kwargs):
+ try:
+ return _loads(*args, **kwargs)
+ except Exception as e:
+ if 'invalid syntax at token' in str(e):
+ kwargs.pop('intern_keys', None)
+ kwargs.pop('intern_vals', None)
+ kwargs.pop('may_unicode', None)
+ return _sj_loads(*args, **kwargs)
+
+ raise
+
+
+from simplejson import load, dump, dumps # noqa
+
+
+def read_file(file_name, **kwargs):
+ """
+ Read file and return its parsed json contents.
+
+ All kwargs will be proxied to `json.load` method as is.
+
+ :param file_name: file with json contents
+ :return: parsed json contents
+ """
+ with open(file_name) as f:
+ return load(f, **kwargs)
+
+
+def write_file(file_name, contents, **kwargs):
+ """
+ Dump json data to file.
+
+ All kwargs will be proxied to `json.dump` method as is.
+
+ :param file_name: file to dump to
+ :param contents: JSON-serializable object
+ """
+ with open(file_name, "w") as f:
+ dump(contents, f, **kwargs)
diff --git a/library/python/json/loads.cpp b/library/python/json/loads.cpp
new file mode 100644
index 0000000000..19cdb096ae
--- /dev/null
+++ b/library/python/json/loads.cpp
@@ -0,0 +1,246 @@
+#include "loads.h"
+
+#include <Python.h>
+
+#include <library/cpp/json/fast_sax/parser.h>
+
+#include <util/generic/algorithm.h>
+#include <util/generic/stack.h>
+#include <util/generic/vector.h>
+#include <util/generic/ylimits.h>
+#include <util/string/ascii.h>
+
+using namespace NJson;
+
+namespace {
+ enum EKind {
+ Undefined,
+ Array,
+ Dict,
+ Value,
+ Key,
+ };
+
+ static inline TStringBuf ToStr(EKind kind) noexcept {
+ switch (kind) {
+ case Undefined:
+ return TStringBuf("Undefined");
+
+ case Array:
+ return TStringBuf("Array");
+
+ case Dict:
+ return TStringBuf("Dict");
+
+ case Value:
+ return TStringBuf("Value");
+
+ case Key:
+ return TStringBuf("Key");
+ }
+
+ Y_UNREACHABLE();
+ }
+
+ struct TUnref {
+ static inline void Destroy(PyObject* o) noexcept {
+ Py_XDECREF(o);
+ }
+ };
+
+ using TObjectPtr = TAutoPtr<PyObject, TUnref>;
+
+ static inline TObjectPtr BuildBool(bool val) noexcept {
+ if (val) {
+ Py_RETURN_TRUE;
+ }
+
+ Py_RETURN_FALSE;
+ }
+
+ // Translate python exceptions from object-creating functions into c++ exceptions
+ // Such errors are reported by returning nullptr
+ // When a python error is set and C++ exception is caught by Cython wrapper,
+ // Python exception is propagated, while C++ exception is discarded.
+ PyObject* CheckNewObject(PyObject* obj) {
+ Y_ENSURE(obj != nullptr, "got python exception");
+ return obj;
+ }
+
+ void CheckRetcode(int retcode) {
+ Y_ENSURE(retcode == 0, "got python exception");
+ }
+
+ static inline TObjectPtr BuildSmall(long val) {
+#if PY_VERSION_HEX >= 0x03000000
+ return CheckNewObject(PyLong_FromLong(val));
+#else
+ return CheckNewObject(PyInt_FromLong(val));
+#endif
+ }
+
+ PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) {
+#if PY_VERSION_HEX >= 0x03000000
+ Y_UNUSED(mayUnicode);
+ PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size());
+ if (intern) {
+ PyUnicode_InternInPlace(&pyStr);
+ }
+#else
+ const bool needUnicode = mayUnicode && !AllOf(str, IsAscii);
+ PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size())
+ : PyString_FromStringAndSize(str.data(), str.size());
+ if (intern && !needUnicode) {
+ PyString_InternInPlace(&pyStr);
+ }
+#endif
+ return pyStr;
+ }
+
+ struct TVal {
+ EKind Kind = Undefined;
+ TObjectPtr Val;
+
+ inline TVal() noexcept
+ : Kind(Undefined)
+ {
+ }
+
+ inline TVal(EKind kind, TObjectPtr val) noexcept
+ : Kind(kind)
+ , Val(val)
+ {
+ }
+ };
+
+ static inline TObjectPtr NoneRef() noexcept {
+ Py_RETURN_NONE;
+ }
+
+ struct TContext: public TJsonCallbacks {
+ const bool InternKeys;
+ const bool InternVals;
+ const bool MayUnicode;
+ TStack<TVal, TVector<TVal>> S;
+
+ inline TContext(bool internKeys, bool internVals, bool mayUnicode)
+ : TJsonCallbacks(true)
+ , InternKeys(internKeys)
+ , InternVals(internVals)
+ , MayUnicode(mayUnicode)
+ {
+ S.emplace();
+ }
+
+ inline bool Consume(TObjectPtr o) {
+ auto& t = S.top();
+
+ if (t.Kind == Array) {
+ CheckRetcode(PyList_Append(t.Val.Get(), o.Get()));
+ } else if (t.Kind == Key) {
+ auto key = S.top().Val;
+
+ S.pop();
+
+ CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get()));
+ } else {
+ t = TVal(Value, o);
+ }
+
+ return true;
+ }
+
+ inline TObjectPtr Pop(EKind expect) {
+ auto res = S.top();
+
+ S.pop();
+
+ if (res.Kind != expect) {
+ ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")";
+ }
+
+ return res.Val;
+ }
+
+ inline void Push(EKind kind, TObjectPtr object) {
+ S.push(TVal(kind, object));
+ }
+
+ virtual bool OnNull() {
+ return Consume(NoneRef());
+ }
+
+ virtual bool OnBoolean(bool v) {
+ return Consume(BuildBool(v));
+ }
+
+ virtual bool OnInteger(long long v) {
+ if (v >= (long long)Min<long>()) {
+ return Consume(BuildSmall((long)v));
+ }
+
+ return Consume(CheckNewObject(PyLong_FromLongLong(v)));
+ }
+
+ virtual bool OnUInteger(unsigned long long v) {
+ if (v <= (unsigned long long)Max<long>()) {
+ return Consume(BuildSmall((long)v));
+ }
+
+ return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v)));
+ }
+
+ virtual bool OnDouble(double v) {
+ return Consume(CheckNewObject(PyFloat_FromDouble(v)));
+ }
+
+ virtual bool OnString(const TStringBuf& v) {
+ return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode)));
+ }
+
+ virtual bool OnOpenMap() {
+ Push(Dict, CheckNewObject(PyDict_New()));
+
+ return true;
+ }
+
+ virtual bool OnCloseMap() {
+ return Consume(Pop(Dict));
+ }
+
+ virtual bool OnMapKey(const TStringBuf& k) {
+ Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode)));
+ return true;
+ }
+
+ virtual bool OnOpenArray() {
+ Push(Array, CheckNewObject(PyList_New(0)));
+
+ return true;
+ }
+
+ virtual bool OnCloseArray() {
+ return Consume(Pop(Array));
+ }
+ };
+}
+
+PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) {
+ TContext ctx(internKeys, internVals, mayUnicode);
+
+ if (!len) {
+ ythrow yexception() << "parse error: zero length input string";
+ }
+
+ if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) {
+ ythrow yexception() << "parse error";
+ }
+
+ auto& s = ctx.S;
+
+ if (!s || s.top().Kind != Value) {
+ ythrow yexception() << "shit happen";
+ }
+
+ return s.top().Val.Release();
+}
diff --git a/library/python/json/loads.h b/library/python/json/loads.h
new file mode 100644
index 0000000000..62dcdf6f21
--- /dev/null
+++ b/library/python/json/loads.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include <Python.h>
+
+PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys = false, bool internVals = false, bool mayUnicode = false);
diff --git a/library/python/json/loads.pyx b/library/python/json/loads.pyx
new file mode 100644
index 0000000000..82e5c6dce7
--- /dev/null
+++ b/library/python/json/loads.pyx
@@ -0,0 +1,14 @@
+from libcpp cimport bool
+
+cdef extern from "library/python/json/loads.h":
+ object LoadJsonFromString(const char*, size_t, bool internKeys, bool internVals, bool mayUnicode) except +
+
+
+def loads(s, intern_keys = False, intern_vals = False, may_unicode = False):
+ if isinstance(s, unicode):
+ s = s.encode('utf-8')
+
+ try:
+ return LoadJsonFromString(s, len(s), intern_keys, intern_vals, may_unicode)
+ except Exception as e:
+ raise ValueError(str(e))
diff --git a/library/python/json/ya.make b/library/python/json/ya.make
new file mode 100644
index 0000000000..74a82de9d8
--- /dev/null
+++ b/library/python/json/ya.make
@@ -0,0 +1,17 @@
+PY23_LIBRARY()
+
+PEERDIR(
+ contrib/python/simplejson
+ library/cpp/json/fast_sax
+)
+
+PY_SRCS(
+ __init__.py
+ loads.pyx
+)
+
+SRCS(
+ loads.cpp
+)
+
+END()
diff --git a/library/python/mlockall/__init__.py b/library/python/mlockall/__init__.py
new file mode 100644
index 0000000000..4d867d692e
--- /dev/null
+++ b/library/python/mlockall/__init__.py
@@ -0,0 +1,10 @@
+import sys
+
+
+def mlockall_current():
+ if not sys.platform.startswith('linux'):
+ return -1
+
+ import library.python.mlockall.mlockall as ml
+
+ return ml.mlockall_current()
diff --git a/library/python/mlockall/mlockall.pyx b/library/python/mlockall/mlockall.pyx
new file mode 100644
index 0000000000..b35d661a42
--- /dev/null
+++ b/library/python/mlockall/mlockall.pyx
@@ -0,0 +1,19 @@
+cdef extern from "<util/system/error.h>":
+ int LastSystemError()
+
+cdef extern from "<util/system/mlock.h>":
+ cdef enum ELockAllMemoryFlag:
+ LockCurrentMemory
+ LockFutureMemory
+ cppclass ELockAllMemoryFlags:
+ operator=(ELockAllMemoryFlag)
+ void LockAllMemory(ELockAllMemoryFlags flags) except+
+
+def mlockall_current():
+ cdef ELockAllMemoryFlags flags
+ try:
+ flags = LockCurrentMemory
+ LockAllMemory(flags)
+ return 0
+ except Exception:
+ return LastSystemError()
diff --git a/library/python/mlockall/ya.make b/library/python/mlockall/ya.make
new file mode 100644
index 0000000000..96eba051ab
--- /dev/null
+++ b/library/python/mlockall/ya.make
@@ -0,0 +1,14 @@
+PY23_LIBRARY()
+
+PY_SRCS(
+ __init__.py
+)
+
+IF (OS_LINUX)
+ PY_SRCS(
+ mlockall.pyx
+ )
+ENDIF()
+
+END()
+
diff --git a/library/python/nstools/__init__.py b/library/python/nstools/__init__.py
new file mode 100644
index 0000000000..34cc0f9574
--- /dev/null
+++ b/library/python/nstools/__init__.py
@@ -0,0 +1,6 @@
+from .nstools import unshare_ns, move_to_ns
+
+__all__ = [
+ 'unshare_ns',
+ 'move_to_ns'
+]
diff --git a/library/python/nstools/nstools.pyx b/library/python/nstools/nstools.pyx
new file mode 100644
index 0000000000..5ef30373ff
--- /dev/null
+++ b/library/python/nstools/nstools.pyx
@@ -0,0 +1,28 @@
+from cpython.exc cimport PyErr_SetFromErrno
+
+cdef extern from "<sched.h>" nogil:
+ int setns(int fd, int mode)
+ int unshare(int flags)
+
+ cpdef enum:
+ Fs "CLONE_FS"
+ # Cgroup "CLONE_NEWCGROUP"
+ Ipc "CLONE_NEWIPC"
+ Network "CLONE_NEWNET"
+ Mount "CLONE_NEWNS"
+ Pid "CLONE_NEWPID"
+ User "CLONE_NEWUSER"
+ Uts "CLONE_NEWUTS"
+
+def unshare_ns(int flags):
+ cdef int ret = unshare(flags)
+ if ret != 0:
+ PyErr_SetFromErrno(OSError)
+
+def move_to_ns(object fileobject, int mode):
+ if not isinstance(fileobject, int):
+ fileobject = fileobject.fileno()
+
+ cdef int ret = setns(fileobject, mode)
+ if ret != 0:
+ PyErr_SetFromErrno(OSError)
diff --git a/library/python/nstools/ya.make b/library/python/nstools/ya.make
new file mode 100644
index 0000000000..49eddeb919
--- /dev/null
+++ b/library/python/nstools/ya.make
@@ -0,0 +1,14 @@
+PY23_LIBRARY()
+
+IF(OS_LINUX)
+PY_SRCS(
+ __init__.py
+ nstools.pyx
+)
+ELSE()
+PY_SRCS(
+ nstools.py
+)
+ENDIF()
+
+END()
diff --git a/library/python/par_apply/__init__.py b/library/python/par_apply/__init__.py
new file mode 100644
index 0000000000..19b89ae843
--- /dev/null
+++ b/library/python/par_apply/__init__.py
@@ -0,0 +1,114 @@
+import sys
+import threading
+import six
+
+from six.moves import queue
+
+
+def par_apply(seq, func, thr_num, join_polling=None):
+ if thr_num < 2:
+ for x in seq:
+ yield func(x)
+
+ return
+
+ in_q = queue.Queue()
+ out_q = queue.Queue()
+
+ def enumerate_blocks():
+ n = 0
+
+ for b in seq:
+ yield n, [b]
+ n += 1
+
+ yield n, None
+
+ def iter_out():
+ n = 0
+ d = {}
+
+ while True:
+ if n in d:
+ r = d[n]
+ del d[n]
+ n += 1
+
+ yield r
+ else:
+ res = out_q.get()
+
+ d[res[0]] = res
+
+ out_iter = iter_out()
+
+ def wait_block():
+ for x in out_iter:
+ return x
+
+ def iter_compressed():
+ p = 0
+
+ for n, b in enumerate_blocks():
+ in_q.put((n, b))
+
+ while n > p + (thr_num * 2):
+ p, b, c = wait_block()
+
+ if not b:
+ return
+
+ yield p, c
+
+ while True:
+ p, b, c = wait_block()
+
+ if not b:
+ return
+
+ yield p, c
+
+ def proc():
+ while True:
+ data = in_q.get()
+
+ if data is None:
+ return
+
+ n, b = data
+
+ if b:
+ try:
+ res = (func(b[0]), None)
+ except Exception:
+ res = (None, sys.exc_info())
+ else:
+ res = (None, None)
+
+ out_q.put((n, b, res))
+
+ thrs = [threading.Thread(target=proc) for i in range(0, thr_num)]
+
+ for t in thrs:
+ t.start()
+
+ try:
+ for p, c in iter_compressed():
+ res, err = c
+
+ if err:
+ six.reraise(*err)
+
+ yield res
+ finally:
+ for t in thrs:
+ in_q.put(None)
+
+ for t in thrs:
+ if join_polling is not None:
+ while True:
+ t.join(join_polling)
+ if not t.is_alive():
+ break
+ else:
+ t.join()
diff --git a/library/python/par_apply/ya.make b/library/python/par_apply/ya.make
new file mode 100644
index 0000000000..b14592ab79
--- /dev/null
+++ b/library/python/par_apply/ya.make
@@ -0,0 +1,11 @@
+PY23_LIBRARY()
+
+PEERDIR(
+ contrib/python/six
+)
+
+PY_SRCS(
+ __init__.py
+)
+
+END()
diff --git a/library/python/symbols/libmagic/syms.cpp b/library/python/symbols/libmagic/syms.cpp
new file mode 100644
index 0000000000..839441ae14
--- /dev/null
+++ b/library/python/symbols/libmagic/syms.cpp
@@ -0,0 +1,19 @@
+#include <contrib/libs/libmagic/src/magic.h>
+
+#include <library/python/symbols/registry/syms.h>
+
+BEGIN_SYMS("magic")
+SYM(magic_open)
+SYM(magic_close)
+SYM(magic_error)
+SYM(magic_errno)
+SYM(magic_file)
+SYM(magic_buffer)
+SYM(magic_load)
+SYM(magic_setflags)
+SYM(magic_check)
+SYM(magic_compile)
+SYM(magic_descriptor)
+SYM(magic_list)
+SYM(magic_version)
+END_SYMS()
diff --git a/library/python/symbols/libmagic/ya.make b/library/python/symbols/libmagic/ya.make
new file mode 100644
index 0000000000..a248603a41
--- /dev/null
+++ b/library/python/symbols/libmagic/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+PEERDIR(
+ contrib/libs/libmagic
+ library/python/symbols/registry
+)
+
+SRCS(
+ GLOBAL syms.cpp
+)
+
+END()
diff --git a/library/python/testing/coverage_utils/__init__.py b/library/python/testing/coverage_utils/__init__.py
new file mode 100644
index 0000000000..3313eee7b5
--- /dev/null
+++ b/library/python/testing/coverage_utils/__init__.py
@@ -0,0 +1,14 @@
+import re
+
+
+def make_filter(prefix_filter, exclude_regexp):
+ filters = []
+ if prefix_filter:
+ filters.append(lambda x: x.startswith(prefix_filter))
+ if exclude_regexp:
+ regexp = re.compile(exclude_regexp)
+ filters.append(lambda x: not regexp.search(x))
+
+ if filters:
+ return lambda x: all(pred(x) for pred in filters)
+ return lambda x: True
diff --git a/library/python/testing/coverage_utils/ya.make b/library/python/testing/coverage_utils/ya.make
new file mode 100644
index 0000000000..3582136180
--- /dev/null
+++ b/library/python/testing/coverage_utils/ya.make
@@ -0,0 +1,5 @@
+PY23_LIBRARY()
+
+PY_SRCS(__init__.py)
+
+END()
diff --git a/library/python/testing/system_info/__init__.py b/library/python/testing/system_info/__init__.py
new file mode 100644
index 0000000000..8bad854d97
--- /dev/null
+++ b/library/python/testing/system_info/__init__.py
@@ -0,0 +1,204 @@
+import collections
+import psutil
+from functools import wraps
+
+
+def safe(name):
+ def decorator_safe(func):
+ """
+ Decorator for try-catch on string assembly
+ """
+
+ @wraps(func)
+ def wrap_safe(*args, **kwargs):
+ try:
+ return func(*args, **kwargs)
+ except Exception as e:
+ return "Failed to get {}: {}".format(name, e)
+
+ return wrap_safe
+
+ return decorator_safe
+
+
+def get_proc_attrib(attr):
+ if callable(attr):
+ try:
+ return attr()
+ except psutil.Error:
+ return None
+ else:
+ return attr
+
+
+@safe("cpu/mem info")
+def _cpu_mem_str():
+ vm = psutil.virtual_memory()
+ cpu_tp = psutil.cpu_times_percent(0.1)
+
+ str_items = []
+ str_items.append(
+ "CPU: Idle: {}% User: {}% System: {}% IOwait: {}%\n".format(
+ cpu_tp.idle, cpu_tp.user, cpu_tp.system, cpu_tp.iowait
+ )
+ )
+
+ str_items.append(
+ "MEM: total {} Gb available: {} Gb used: {} Gb free: {} Gb active: {} Gb inactive: {} Gb shared: {} Gb\n".format(
+ round(vm.total / 1e9, 2),
+ round(vm.available / 1e9, 2),
+ round(vm.used / 1e9, 2),
+ round(vm.free / 1e9, 2),
+ round(vm.active / 1e9, 2),
+ round(vm.inactive / 1e9, 2),
+ round(vm.shared / 1e9, 2),
+ )
+ )
+
+ str_items.append("Used swap: {}%\n".format(psutil.swap_memory().percent))
+
+ return "".join(str_items)
+
+
+@safe("processes tree")
+def _proc_tree_str():
+ tree = collections.defaultdict(list)
+ for p in psutil.process_iter():
+ try:
+ tree[p.ppid()].append(p.pid)
+ except (psutil.NoSuchProcess, psutil.ZombieProcess):
+ pass
+ # on systems supporting PID 0, PID 0's parent is usually 0
+ if 0 in tree and 0 in tree[0]:
+ tree[0].remove(0)
+
+ return _print_proc_tree(min(tree), tree)
+
+
+def _print_proc_tree(parent_root, tree, indent_root=''):
+ stack = [(parent_root, indent_root, "")]
+ str_items = list()
+
+ while len(stack) > 0:
+ try:
+ parent, indent, prefix = stack.pop()
+ p = psutil.Process(parent)
+ name = get_proc_attrib(p.name)
+ str_items.append("{}({}, '{}'".format(prefix, parent, name if name else '?'))
+
+ exe = get_proc_attrib(p.exe)
+ if exe:
+ str_items.append(" [{}]".format(exe))
+
+ str_items.append(") ")
+ str_items.append(" st: {}".format(p.status()))
+ str_items.append(" mem: {}%".format(round(p.memory_percent(), 2)))
+
+ ndfs = get_proc_attrib(p.num_fds)
+ if ndfs and ndfs > 0:
+ str_items.append(" fds: {}".format(ndfs))
+
+ conns = get_proc_attrib(p.connections)
+ if conns and len(conns) > 1:
+ str_items.append(" num con: {}".format(len(conns)))
+
+ ths = get_proc_attrib(p.num_threads)
+ if ths and ths > 1:
+ str_items.append(" threads: {}".format(ths))
+
+ str_items.append("\n")
+ except psutil.Error:
+ name = "?"
+ str_items.append("({}, '{}')\n".format(parent, name))
+
+ if parent not in tree:
+ continue
+
+ child = tree[parent][-1]
+ stack.append((child, indent + " ", indent + "`_ "))
+
+ children = tree[parent][:-1]
+ children.reverse()
+ for child in children:
+ stack.append((child, indent + "| ", indent + "|- "))
+
+ return "".join(str_items)
+
+
+@safe("network info")
+def _network_conn_str():
+ str_items = list()
+
+ counters = psutil.net_io_counters()
+ str_items.append(
+ "\nPackSent: {} PackRecv: {} ErrIn: {} ErrOut: {} DropIn: {} DropOut: {}\n\n".format(
+ counters.packets_sent,
+ counters.packets_recv,
+ counters.errin,
+ counters.errout,
+ counters.dropin,
+ counters.dropout,
+ )
+ )
+
+ ifaces = psutil.net_if_addrs()
+ conns = psutil.net_connections()
+ list_ip = collections.defaultdict(list)
+ for con in conns:
+ list_ip[con.laddr.ip].append(con)
+
+ for name, addrs in ifaces.iteritems():
+ str_items.append("{}:\n".format(name))
+
+ for ip in addrs:
+ str_items.append(" {}".format(ip.address))
+ if ip.netmask:
+ str_items.append(" mask={}".format(ip.netmask))
+ if ip.broadcast:
+ str_items.append(" bc={}".format(ip.broadcast))
+ str_items.append("\n")
+
+ for con in list_ip[ip.address]:
+ str_items.append(" {}".format(con.laddr.port))
+ if con.raddr:
+ str_items.append(" <--> {} : {}".format(con.raddr.ip, con.raddr.port))
+ str_items.append(" (stat: {}".format(con.status))
+ if con.pid:
+ str_items.append(" proc: {} (pid={})".format(psutil.Process(con.pid).exe(), con.pid))
+ str_items.append(")\n")
+
+ del list_ip[ip.address]
+
+ str_items.append("***\n")
+ for ip, conns in list_ip.iteritems():
+ str_items.append(" {}\n".format(ip))
+
+ for con in conns:
+ str_items.append(" {}".format(con.laddr.port))
+ if con.raddr:
+ str_items.append(" <--> {} : {}".format(con.raddr.ip, con.raddr.port))
+ str_items.append(" (stat: {}".format(con.status))
+ if con.pid:
+ str_items.append(" proc: {} (pid={})".format(psutil.Process(con.pid).exe(), con.pid))
+ str_items.append(")\n")
+
+ return "".join(str_items)
+
+
+@safe("info")
+def get_system_info():
+ str_items = list()
+
+ str_items.append("\n --- CPU MEM --- \n")
+ str_items.append(_cpu_mem_str())
+ str_items.append("\n")
+
+ str_items.append("\n --- PROCESSES TREE --- \n")
+ str_items.append(_proc_tree_str())
+ str_items.append("\n")
+
+ str_items.append("\n --- NETWORK INFO --- \n")
+ str_items.append(_network_conn_str())
+ str_items.append("\n")
+
+ return "".join(str_items)
diff --git a/library/python/testing/system_info/ya.make b/library/python/testing/system_info/ya.make
new file mode 100644
index 0000000000..f655db8ebe
--- /dev/null
+++ b/library/python/testing/system_info/ya.make
@@ -0,0 +1,15 @@
+PY23_LIBRARY()
+
+PY_SRCS(__init__.py)
+
+PEERDIR(
+ contrib/python/psutil
+)
+
+STYLE_PYTHON()
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)