aboutsummaryrefslogtreecommitdiffstats
path: root/library/python
diff options
context:
space:
mode:
authorsay <say@yandex-team.com>2023-12-08 17:40:48 +0300
committersay <say@yandex-team.com>2023-12-08 19:58:59 +0300
commit914f57e3243f53dd89dd3adb4d8b6d35c47f46ce (patch)
tree98a1f1f1f5e2c38db3a78da10aeb7eb7d4e952e0 /library/python
parente61293d91ee7c923944f627d8e1138bcb17cacad (diff)
downloadydb-914f57e3243f53dd89dd3adb4d8b6d35c47f46ce.tar.gz
Make support_retries() and get_type() methods of object instead of class ones. Reduce get_type_name() usage.
Diffstat (limited to 'library/python')
-rw-r--r--library/python/archive/__init__.py266
-rw-r--r--library/python/archive/ya.make19
-rw-r--r--library/python/cityhash/cityhash.pyx75
-rw-r--r--library/python/cityhash/hash.cpp32
-rw-r--r--library/python/cityhash/hash.h6
-rw-r--r--library/python/cityhash/ya.make16
-rw-r--r--library/python/codecs/__codecs.pyx61
-rw-r--r--library/python/codecs/__init__.py1
-rw-r--r--library/python/codecs/ya.make16
-rw-r--r--library/python/color/README.md9
-rw-r--r--library/python/color/__init__.py92
-rw-r--r--library/python/color/ya.make13
-rw-r--r--library/python/compress/__init__.py147
-rw-r--r--library/python/compress/ya.make16
-rw-r--r--library/python/coredump_filter/README.md19
-rw-r--r--library/python/coredump_filter/__init__.py1500
-rw-r--r--library/python/coredump_filter/core_proc.js21
-rw-r--r--library/python/coredump_filter/epilog.html2
-rw-r--r--library/python/coredump_filter/prolog.html24
-rw-r--r--library/python/coredump_filter/styles.css116
-rw-r--r--library/python/coredump_filter/ya.make23
-rw-r--r--library/python/json/__init__.py44
-rw-r--r--library/python/json/loads.cpp246
-rw-r--r--library/python/json/loads.h5
-rw-r--r--library/python/json/loads.pyx14
-rw-r--r--library/python/json/ya.make17
-rw-r--r--library/python/par_apply/__init__.py114
-rw-r--r--library/python/par_apply/ya.make11
28 files changed, 0 insertions, 2925 deletions
diff --git a/library/python/archive/__init__.py b/library/python/archive/__init__.py
deleted file mode 100644
index a6e032ff4c..0000000000
--- a/library/python/archive/__init__.py
+++ /dev/null
@@ -1,266 +0,0 @@
-import errno
-import logging
-import os
-import random
-import shutil
-import stat
-import string
-import sys
-
-import six
-
-import libarchive
-import libarchive._libarchive as _libarchive
-
-from pathlib2 import PurePath
-
-logger = logging.getLogger(__name__)
-
-GZIP = "gzip"
-ZSTD = "zstd"
-
-ENCODING = "utf-8"
-
-
-class ConfigureError(Exception):
- pass
-
-
-class Level(object):
- def __init__(self, level):
- self.level = level
-
-
-class Compression(object):
- Fast = Level(1)
- Default = Level(2)
- Best = Level(3)
-
-
-def get_compression_level(filter_name, level):
- if level is None or not filter_name:
- return None
- elif isinstance(level, Level):
- level = {
- GZIP: {
- Compression.Fast: 1,
- Compression.Default: 6,
- Compression.Best: 9,
- },
- ZSTD: {
- Compression.Fast: 1,
- Compression.Default: 3,
- Compression.Best: 22,
- },
- }[filter_name][level]
- return level
-
-
-def encode(value, encoding):
- return value.encode(encoding)
-
-
-def extract_tar(tar_file_path, output_dir, strip_components=None, fail_on_duplicates=True):
- output_dir = encode(output_dir, ENCODING)
- _make_dirs(output_dir)
- with libarchive.Archive(tar_file_path, mode="rb") as tarfile:
- for e in tarfile:
- p = _strip_prefix(e.pathname, strip_components)
- if not p:
- continue
- dest = os.path.join(output_dir, encode(p, ENCODING))
- if e.pathname.endswith("/"):
- _make_dirs(dest)
- continue
-
- if strip_components and fail_on_duplicates:
- if os.path.exists(dest):
- raise Exception(
- "The file {} is duplicated because of strip_components={}".format(dest, strip_components)
- )
-
- _make_dirs(os.path.dirname(dest))
-
- if e.ishardlink():
- src = os.path.join(output_dir, _strip_prefix(e.hardlink, strip_components))
- _hardlink(src, dest)
- continue
- if e.issym():
- src = _strip_prefix(e.linkname, strip_components)
- _symlink(src, dest)
- continue
-
- with open(dest, "wb") as f:
- if hasattr(os, "fchmod"):
- os.fchmod(f.fileno(), e.mode & 0o7777)
- libarchive.call_and_check(
- _libarchive.archive_read_data_into_fd,
- tarfile._a,
- tarfile._a,
- f.fileno(),
- )
-
-
-def _strip_prefix(path, strip_components):
- if not strip_components:
- return path
- p = PurePath(path)
- stripped = str(p.relative_to(*p.parts[:strip_components]))
- return '' if stripped == '.' else stripped
-
-
-def tar(
- paths,
- output,
- compression_filter=None,
- compression_level=None,
- fixed_mtime=None,
- onerror=None,
- postprocess=None,
- dereference=False,
-):
- if isinstance(paths, six.string_types):
- paths = [paths]
-
- if isinstance(output, six.string_types):
- temp_tar_path, stream = (
- output + "." + "".join(random.sample(string.ascii_lowercase, 8)),
- None,
- )
- else:
- temp_tar_path, stream = None, output
-
- compression_level = get_compression_level(compression_filter, compression_level)
-
- try:
- if compression_filter:
- filter_name = compression_filter
- if compression_level is not None:
- filter_opts = {"compression-level": str(compression_level)}
- else:
- filter_opts = {}
- # force gzip don't store mtime of the original file being compressed (http://www.gzip.org/zlib/rfc-gzip.html#file-format)
- if fixed_mtime is not None and compression_filter == GZIP:
- filter_opts["timestamp"] = ""
- else:
- filter_name = filter_opts = None
-
- with libarchive.Archive(
- stream or temp_tar_path,
- mode="wb",
- format="gnu",
- filter=filter_name,
- filter_opts=filter_opts,
- fixed_mtime=fixed_mtime,
- ) as tarfile:
- # determine order if fixed_mtime is specified to produce stable archive
- paths = paths if fixed_mtime is None else sorted(paths)
-
- for p in paths:
- if type(p) == tuple:
- path, arcname = p
- else:
- path, arcname = p, os.path.basename(p)
-
- if os.path.isdir(path):
- for root, dirs, files in os.walk(path, followlinks=dereference):
- if fixed_mtime is None:
- entries = dirs + files
- else:
- entries = sorted(dirs) + sorted(files)
-
- reldir = os.path.relpath(root, path)
- for f in entries:
- _writepath(
- tarfile,
- os.path.join(root, f),
- os.path.normpath(os.path.join(arcname, reldir, f)),
- onerror,
- postprocess,
- dereference,
- )
- else:
- if not os.path.exists(path):
- raise OSError("Specified path doesn't exist: {}".format(path))
- _writepath(tarfile, path, arcname, onerror, postprocess, dereference)
-
- if temp_tar_path:
- os.rename(temp_tar_path, output)
- except Exception:
- if temp_tar_path and os.path.exists(temp_tar_path):
- os.remove(temp_tar_path)
- raise
-
-
-def _writepath(tarfile, src, dst, onerror, postprocess, dereference):
- def tar_writepath(src, dst):
- st = os.lstat(src)
- if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode) or stat.S_ISLNK(st.st_mode):
- if dereference and stat.S_ISLNK(st.st_mode):
- src = os.path.realpath(src)
-
- tarfile.writepath(src, dst)
-
- if postprocess:
- postprocess(src, dst, st.st_mode)
- else:
- logger.debug("Skipping non-regular file '%s' (stat: %s)", src, st)
-
- try:
- return tar_writepath(src, dst)
- except Exception as e:
- if isinstance(e, OSError) and e.errno == errno.ENOENT:
- logger.debug(
- "Skipping missing file '%s' - looks like directory content has changed during archiving",
- src,
- )
- return
-
- if onerror:
- if onerror(src, dst, sys.exc_info()):
- return tar_writepath(src, dst)
- else:
- raise
-
-
-def check_tar(tar_file_path):
- if os.path.isfile(tar_file_path) or os.path.islink(tar_file_path):
- return libarchive.is_archive(tar_file_path)
- return False
-
-
-def _make_dirs(path):
- try:
- os.makedirs(path)
- except OSError as e:
- if e.errno != errno.EEXIST or not os.path.isdir(path):
- raise
-
-
-def _hardlink(src, dst):
- if hasattr(os, "link"):
- os.link(src, dst)
- else:
- shutil.copyfile(src, dst)
-
-
-def _symlink(src, dst):
- if hasattr(os, "symlink"):
- os.symlink(src, dst)
- else:
- # Windows specific case - we cannot copy file right now,
- # because it doesn't exist yet (and would be met later in the archive) or symlink is broken.
- # Act like tar and tarfile - skip such symlinks
- if os.path.exists(src):
- shutil.copytree(src, dst)
-
-
-def get_archive_filter_name(filename):
- filters = libarchive.get_archive_filter_names(filename)
- # https://a.yandex-team.ru/arc/trunk/arcadia/contrib/libs/libarchive/libarchive/archive_read.c?rev=5800047#L522
- assert filters[-1] == "none", filters
- if len(filters) == 1:
- return None
- if len(filters) == 2:
- return filters[0]
- raise Exception("Archive has chain of filter: {}".format(filters))
diff --git a/library/python/archive/ya.make b/library/python/archive/ya.make
deleted file mode 100644
index 5b86a45a42..0000000000
--- a/library/python/archive/ya.make
+++ /dev/null
@@ -1,19 +0,0 @@
-PY23_LIBRARY()
-
-STYLE_PYTHON()
-
-PY_SRCS(
- __init__.py
-)
-
-PEERDIR(
- contrib/python/pathlib2
- contrib/python/python-libarchive
-)
-
-END()
-
-RECURSE_FOR_TESTS(
- benchmark
- test
-)
diff --git a/library/python/cityhash/cityhash.pyx b/library/python/cityhash/cityhash.pyx
deleted file mode 100644
index 6f0046f0d7..0000000000
--- a/library/python/cityhash/cityhash.pyx
+++ /dev/null
@@ -1,75 +0,0 @@
-from libcpp.pair cimport pair
-
-cdef extern from "util/system/types.h":
- ctypedef unsigned long ui64
-
-
-cdef extern from "util/digest/city.h":
- ui64 CityHash64(const char* buf, size_t len) nogil
- pair[ui64, ui64] CityHash128(const char* buf, size_t len) nogil
- ui64 CityHash64WithSeed(const char* buf, size_t len, ui64 seed) nogil
-
-
-cdef extern from "library/python/cityhash/hash.h":
- ui64 FileCityHash128WithSeedHigh64(const char* fpath) nogil except+
- ui64 FileCityHash64(const char* fpath) nogil except+
-
-
-def hash64(content):
- cdef const char* s = content
- cdef size_t size = len(content)
- cdef ui64 res = 0
-
- if size > 128:
- with nogil:
- res = CityHash64(s, size)
- else:
- res = CityHash64(s, size)
-
- return res
-
-def hash128(content):
- cdef const char* s = content
- cdef size_t size = len(content)
- cdef pair[ui64, ui64] res = pair[ui64, ui64](0, 0)
-
- if size > 128:
- with nogil:
- res = CityHash128(s, size)
- else:
- res = CityHash128(s, size)
- return res
-
-
-def hash64seed(content, seed):
- cdef const char* s = content
- cdef size_t size = len(content)
- cdef ui64 _seed = seed;
-
- if size > 128:
- with nogil:
- res = CityHash64WithSeed(s, size, _seed)
- else:
- res = CityHash64WithSeed(s, size, _seed)
-
- return res
-
-
-def filehash64(path):
- cdef const char* p = path
- cdef ui64 res = 0
-
- with nogil:
- res = FileCityHash64(p)
-
- return res
-
-
-def filehash128high64(path):
- cdef const char* p = path
- cdef ui64 res = 0
-
- with nogil:
- res = FileCityHash128WithSeedHigh64(p)
-
- return res
diff --git a/library/python/cityhash/hash.cpp b/library/python/cityhash/hash.cpp
deleted file mode 100644
index 17bd3a75f3..0000000000
--- a/library/python/cityhash/hash.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "hash.h"
-
-#include <util/digest/city.h>
-#include <util/generic/string.h>
-#include <util/memory/blob.h>
-#include <util/system/file.h>
-#include <util/system/fstat.h>
-
-void ReadFile(const char* fpath, TBlob& blob) {
- TFile f(TString{fpath}, RdOnly | Seq);
- const TFileStat fs(f);
- auto size = fs.Size;
-
- if (size < (64 << 10)) {
- blob = TBlob::FromFileContent(f, 0, size);
- } else {
- blob = TBlob::FromFile(f);
- }
-}
-
-ui64 FileCityHash128WithSeedHigh64(const char* fpath) {
- TBlob blob;
- ReadFile(fpath, blob);
- const uint128 hash = CityHash128WithSeed((const char*)blob.Data(), blob.Size(), uint128(0, blob.Size()));
- return Uint128High64(hash);
-}
-
-ui64 FileCityHash64(const char* fpath) {
- TBlob blob;
- ReadFile(fpath, blob);
- return CityHash64(static_cast<const char*>(blob.Data()), blob.Size());
-}
diff --git a/library/python/cityhash/hash.h b/library/python/cityhash/hash.h
deleted file mode 100644
index 64b22ba74b..0000000000
--- a/library/python/cityhash/hash.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-#include <util/system/defaults.h>
-
-ui64 FileCityHash128WithSeedHigh64(const char* fpath);
-ui64 FileCityHash64(const char* fpath);
diff --git a/library/python/cityhash/ya.make b/library/python/cityhash/ya.make
deleted file mode 100644
index 7948e19389..0000000000
--- a/library/python/cityhash/ya.make
+++ /dev/null
@@ -1,16 +0,0 @@
-PY23_LIBRARY()
-
-SRCS(
- hash.cpp
-)
-
-PY_SRCS(
- TOP_LEVEL
- cityhash.pyx
-)
-
-END()
-
-RECURSE_FOR_TESTS(
- test
-)
diff --git a/library/python/codecs/__codecs.pyx b/library/python/codecs/__codecs.pyx
deleted file mode 100644
index 42ec37fe88..0000000000
--- a/library/python/codecs/__codecs.pyx
+++ /dev/null
@@ -1,61 +0,0 @@
-import six
-
-from libcpp cimport bool
-
-from util.generic.string cimport TString, TStringBuf
-
-
-def to_bytes(s):
- try:
- return s.encode('utf-8')
- except AttributeError:
- pass
-
- return s
-
-
-def from_bytes(s):
- if six.PY3:
- return s.decode('utf-8')
-
- return s
-
-
-cdef extern from "library/cpp/blockcodecs/codecs.h" namespace "NBlockCodecs":
- cdef cppclass ICodec:
- void Encode(TStringBuf data, TString& res) nogil
- void Decode(TStringBuf data, TString& res) nogil
-
- cdef const ICodec* Codec(const TStringBuf& name) except +
- cdef TString ListAllCodecsAsString() except +
-
-
-def dumps(name, data):
- name = to_bytes(name)
-
- cdef const ICodec* codec = Codec(TStringBuf(name, len(name)))
- cdef TString res
- cdef TStringBuf cdata = TStringBuf(data, len(data))
-
- with nogil:
- codec.Encode(cdata, res)
-
- return res.c_str()[:res.length()]
-
-
-def loads(name, data):
- name = to_bytes(name)
-
- cdef const ICodec* codec = Codec(TStringBuf(name, len(name)))
- cdef TString res
- cdef TStringBuf cdata = TStringBuf(data, len(data))
-
- with nogil:
- codec.Decode(cdata, res)
-
- return res.c_str()[:res.length()]
-
-def list_all_codecs():
- cdef TString res = ListAllCodecsAsString()
-
- return from_bytes(res.c_str()[:res.length()]).split(',')
diff --git a/library/python/codecs/__init__.py b/library/python/codecs/__init__.py
deleted file mode 100644
index b9fb00deb0..0000000000
--- a/library/python/codecs/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from __codecs import loads, dumps, list_all_codecs # noqa
diff --git a/library/python/codecs/ya.make b/library/python/codecs/ya.make
deleted file mode 100644
index f42d115d5d..0000000000
--- a/library/python/codecs/ya.make
+++ /dev/null
@@ -1,16 +0,0 @@
-PY23_LIBRARY()
-
-PEERDIR(
- library/cpp/blockcodecs
- contrib/python/six
-)
-
-PY_SRCS(
- __init__.py
-)
-
-BUILDWITH_CYTHON_CPP(__codecs.pyx)
-
-PY_REGISTER(__codecs)
-
-END()
diff --git a/library/python/color/README.md b/library/python/color/README.md
deleted file mode 100644
index 9deae40092..0000000000
--- a/library/python/color/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-Форк ((termcolor https://github.com/termcolor/termcolor/)) для PY23 с дополнительным функционалом.
-
-Может быть использован для конвертации текстовых спецификаций цвета (например, из markup) в esc-последовательности для корректного отображения в терминале.
-
-Пример использования:
-```python
-from library.python.color import tcolor
-tcolor("some text", "green-bold-on_red") -> '\x1b[32m\x1b[41m\x1b[1msome text\x1b[0m'
-```
diff --git a/library/python/color/__init__.py b/library/python/color/__init__.py
deleted file mode 100644
index a70234945e..0000000000
--- a/library/python/color/__init__.py
+++ /dev/null
@@ -1,92 +0,0 @@
-from __future__ import print_function
-
-import copy
-import os
-
-from termcolor import ATTRIBUTES, COLORS, HIGHLIGHTS, RESET
-
-__all__ = [
- "ATTRIBUTES",
- "COLORS",
- "HIGHLIGHTS",
- "RESET",
- "colored",
- "cprint",
- "tcolor",
- "get_color_by_spec"
-]
-
-ATTRIBUTES = copy.deepcopy(ATTRIBUTES)
-ATTRIBUTES["light"] = ATTRIBUTES['bold']
-
-COLORS = copy.deepcopy(COLORS)
-COLORS['gray'] = COLORS['grey']
-COLORS['purple'] = COLORS['magenta']
-COLORS["reset"] = 0
-
-
-def get_code(code):
- if os.getenv("ANSI_COLORS_DISABLED") is None:
- return "\033[{}m".format(code)
- return ""
-
-
-def get_color_by_spec(color_spec):
- color, on_color, attrs = get_spec(color_spec)
- return get_color(color, on_color, attrs)
-
-
-def get_color(color, on_color, attrs):
- res = ""
-
- if color is not None:
- res += get_code(COLORS[color])
-
- if on_color is not None:
- res += get_code(HIGHLIGHTS[on_color])
-
- if attrs is not None:
- for attr in attrs:
- res += get_code(ATTRIBUTES[attr])
-
- return res
-
-
-def get_spec(color_spec):
- """Parses string text color formatting specification.
-
- Arguments:
- color_spec -- string spec for text color formatting, csv string with
- `color` / `bg_color` / `attr` spec items having "-" as a delimiter.
-
- Returns a tuple: (color, bg-color, attributes list)
-
- Example:
- get_spec("green-bold-on_red") -> (32, 41, [1])
- """
- parts = color_spec.split("-")
- color = None
- on_color = None
- attrs = []
- for part in parts:
- part = part.lower()
- if part in COLORS:
- color = part
- if part in HIGHLIGHTS:
- on_color = part
- if part in ATTRIBUTES:
- attrs.append(part)
- return color, on_color, attrs
-
-
-def tcolor(text, color_spec):
- color, on_color, attrs = get_spec(color_spec)
- return colored(text, color=color, on_color=on_color, attrs=attrs)
-
-
-def colored(text, color=None, on_color=None, attrs=None):
- return get_color(color, on_color, attrs) + text + get_code(COLORS["reset"])
-
-
-def cprint(text, color=None, on_color=None, attrs=None, **kwargs):
- print((colored(text, color, on_color, attrs)), **kwargs)
diff --git a/library/python/color/ya.make b/library/python/color/ya.make
deleted file mode 100644
index ff6740b1d4..0000000000
--- a/library/python/color/ya.make
+++ /dev/null
@@ -1,13 +0,0 @@
-PY23_LIBRARY()
-
-LICENSE(MIT)
-
-PY_SRCS(
- __init__.py
-)
-
-PEERDIR(
- contrib/python/termcolor
-)
-
-END()
diff --git a/library/python/compress/__init__.py b/library/python/compress/__init__.py
deleted file mode 100644
index 380ec47dca..0000000000
--- a/library/python/compress/__init__.py
+++ /dev/null
@@ -1,147 +0,0 @@
-from io import open
-
-import struct
-import json
-import os
-import logging
-
-import library.python.par_apply as lpp
-import library.python.codecs as lpc
-
-
-logger = logging.getLogger('compress')
-
-
-def list_all_codecs():
- return sorted(frozenset(lpc.list_all_codecs()))
-
-
-def find_codec(ext):
- def ext_compress(x):
- return lpc.dumps(ext, x)
-
- def ext_decompress(x):
- return lpc.loads(ext, x)
-
- ext_decompress(ext_compress(b''))
-
- return {'c': ext_compress, 'd': ext_decompress, 'n': ext}
-
-
-def codec_for(path):
- for ext in reversed(path.split('.')):
- try:
- return find_codec(ext)
- except Exception as e:
- logger.debug('in codec_for(): %s', e)
-
- raise Exception('unsupported file %s' % path)
-
-
-def compress(fr, to, codec=None, fopen=open, threads=1):
- if codec:
- codec = find_codec(codec)
- else:
- codec = codec_for(to)
-
- func = codec['c']
-
- def iter_blocks():
- with fopen(fr, 'rb') as f:
- while True:
- chunk = f.read(16 * 1024 * 1024)
-
- if chunk:
- yield chunk
- else:
- yield b''
-
- return
-
- def iter_results():
- info = {
- 'codec': codec['n'],
- }
-
- if fr:
- info['size'] = os.path.getsize(fr)
-
- yield json.dumps(info, sort_keys=True) + '\n'
-
- for c in lpp.par_apply(iter_blocks(), func, threads):
- yield c
-
- with fopen(to, 'wb') as f:
- for c in iter_results():
- logger.debug('complete %s', len(c))
- f.write(struct.pack('<I', len(c)))
-
- try:
- f.write(c)
- except TypeError:
- f.write(c.encode('utf-8'))
-
-
-def decompress(fr, to, codec=None, fopen=open, threads=1):
- def iter_chunks():
- with fopen(fr, 'rb') as f:
- cnt = 0
-
- while True:
- ll = f.read(4)
-
- if ll:
- ll = struct.unpack('<I', ll)[0]
-
- if ll:
- if ll > 100000000:
- raise Exception('broken stream')
-
- yield f.read(ll)
-
- cnt += ll
- else:
- if not cnt:
- raise Exception('empty stream')
-
- return
-
- it = iter_chunks()
- extra = []
-
- for chunk in it:
- hdr = {}
-
- try:
- hdr = json.loads(chunk)
- except Exception as e:
- logger.info('can not parse header, suspect old format: %s', e)
- extra.append(chunk)
-
- break
-
- def resolve_codec():
- if 'codec' in hdr:
- return find_codec(hdr['codec'])
-
- if codec:
- return find_codec(codec)
-
- return codec_for(fr)
-
- dc = resolve_codec()['d']
-
- def iter_all_chunks():
- for x in extra:
- yield x
-
- for x in it:
- yield x
-
- with fopen(to, 'wb') as f:
- for c in lpp.par_apply(iter_all_chunks(), dc, threads):
- if c:
- logger.debug('complete %s', len(c))
- f.write(c)
- else:
- break
diff --git a/library/python/compress/ya.make b/library/python/compress/ya.make
deleted file mode 100644
index bbf2a784e2..0000000000
--- a/library/python/compress/ya.make
+++ /dev/null
@@ -1,16 +0,0 @@
-PY23_LIBRARY()
-
-PEERDIR(
- library/python/codecs
- library/python/par_apply
-)
-
-PY_SRCS(
- __init__.py
-)
-
-END()
-
-RECURSE_FOR_TESTS(
- tests
-)
diff --git a/library/python/coredump_filter/README.md b/library/python/coredump_filter/README.md
deleted file mode 100644
index 87b02e7985..0000000000
--- a/library/python/coredump_filter/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# Coredump Filter
-
-- ABC: https://abc.yandex-team.ru/services/cores/
-
-Библиотека для разбора (парсинга) трейсов отладчика gdb/lldb, python traceback-ов и minidump-ов.
-
-На вход принимает текст трейса, на выходе - распаршенный текст + возможность преобразовать
-его в html-формат для удобства чтения.
-
-
-## Основные клиенты
-- [Агрегатор стектрейсов](https://wiki.yandex-team.ru/cores-aggregation)
-
-
-## Правила разработки
-
-Библиотека написана таким образом, чтобы файл `__init__.py` мог работать
-без внешних зависимостей. Это позволяет использовать библиотеку даже в Ter1-окружениях.
-На данный момент этот инвариант не обложен тестами (и это следует исправить).
diff --git a/library/python/coredump_filter/__init__.py b/library/python/coredump_filter/__init__.py
deleted file mode 100644
index de0830cd43..0000000000
--- a/library/python/coredump_filter/__init__.py
+++ /dev/null
@@ -1,1500 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-from __future__ import print_function
-
-import six
-import enum
-import datetime
-import os
-import re
-import pkgutil
-import sys
-import hashlib
-import json
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class CoredumpMode(enum.Enum):
- GDB = "gdb"
- LLDB = "lldb"
- SDC_ASSERT = "sdc_assert"
-
-
-ARCADIA_ROOT_LINK = "https://a.yandex-team.ru/arc/trunk/arcadia/"
-
-ARCADIA_ROOT_DIRS = [
- # hottest paths
- "/util/",
- "/contrib/",
- "/library/",
- "/kernel/",
- "/build/",
- "/search/",
-
- # "/gcc-4.8.2/",
-
- # system paths
- # "/lib/x86_64-linux-gnu/",
-
- # all other stuff
- "/aapi/",
- "/addappter/",
- "/adfox/",
- "/admins/",
- "/ads/",
- "/adv/",
- "/advq/",
- "/afisha/",
- "/afro/",
- "/alet/",
- "/alice/",
- "/analytics/",
- "/antiadblock/",
- "/antirobot/",
- "/apphost/",
- "/april/",
- "/arc/",
- "/arcanum/",
- "/augur/",
- "/aurora/",
- "/autocheck/",
- "/balancer/",
- "/bass/",
- "/billing/",
- "/bindings/",
- "/browser/",
- "/build/",
- "/bunker/",
- "/caas/",
- "/canvas/",
- "/captcha/",
- "/catboost/",
- "/certs/",
- "/ci/",
- "/clickhouse/",
- "/client_analytics/",
- "/cloud/",
- "/cmicot/",
- "/cmnt/",
- "/comdep_analytics/",
- "/commerce/",
- "/contrib/",
- "/crm/",
- "/crowdsourcing/",
- "/crypta/",
- "/cv/",
- "/datacloud/",
- "/datalens/",
- "/data-ui/",
- "/devtools/",
- "/dict/",
- "/direct/",
- "/disk/",
- "/distribution/",
- "/distribution_interface/",
- "/district/",
- "/dj/",
- "/docs/",
- "/douber/",
- "/drive/",
- "/edadeal/",
- "/education/",
- "/entity/",
- "/ether/",
- "/extdata/",
- "/extsearch/",
- "/FactExtract/",
- "/fintech/",
- "/frontend/",
- "/fuzzing/",
- "/games/",
- "/gencfg/",
- "/geobase/",
- "/geoproduct/",
- "/geosuggest/",
- "/geotargeting/",
- "/glycine/",
- "/groups/",
- "/haas/",
- "/health/",
- "/helpdesk/",
- "/hitman/",
- "/home/",
- "/htf/",
- "/hw_watcher/",
- "/hypercube/",
- "/iaas/",
- "/iceberg/",
- "/infra/",
- "/intranet/",
- "/inventori/",
- "/ipreg/",
- "/irt/",
- "/it-office/",
- "/jdk/",
- "/juggler/",
- "/junk/",
- "/jupytercloud/",
- "/kernel/",
- "/keyboard/",
- "/kikimr/",
- "/kinopoisk/",
- "/kinopoisk-ott/",
- "/laas/",
- "/lbs/",
- "/library/",
- "/load/",
- "/locdoc/",
- "/logbroker/",
- "/logfeller/",
- "/mail/",
- "/mapreduce/",
- "/maps/",
- "/maps_adv/",
- "/market/",
- "/mb/",
- "/mds/",
- "/media/",
- "/media-billing/",
- "/media-crm/",
- "/mediapers/",
- "/mediaplanner/",
- "/mediastat/",
- "/media-stories/",
- "/metrika/",
- "/milab/",
- "/ml/",
- "/mlp/",
- "/mlportal/",
- "/mobile/",
- "/modadvert/",
- "/ms/",
- "/mssngr/",
- "/music/",
- "/musickit/",
- "/netsys/",
- "/nginx/",
- "/nirvana/",
- "/noc/",
- "/ofd/",
- "/offline_data/",
- "/opensource/",
- "/orgvisits/",
- "/ott/",
- "/packages/",
- "/partner/",
- "/passport/",
- "/payplatform/",
- "/paysys/",
- "/plus/",
- "/portal/",
- "/portalytics/",
- "/pythia/",
- "/quality/",
- "/quasar/",
- "/razladki/",
- "/regulargeo/",
- "/release_machine/",
- "/rem/",
- "/repo/",
- "/rnd_toolbox/",
- "/robot/",
- "/rtc/",
- "/rtline/",
- "/rtmapreduce/",
- "/rt-research/",
- "/saas/",
- "/samogon/",
- "/samsara/",
- "/sandbox/",
- "/scarab/",
- "/sdc/",
- "/search/",
- "/security/",
- "/semantic-web/",
- "/serp/",
- "/sitesearch/",
- "/skynet/",
- "/smart_devices/",
- "/smarttv/",
- "/smm/",
- "/solomon/",
- "/specsearches/",
- "/speechkit/",
- "/sport/",
- "/sprav/",
- "/statbox/",
- "/strm/",
- "/suburban-trains/",
- "/sup/",
- "/switch/",
- "/talents/",
- "/tasklet/",
- "/taxi/",
- "/taxi_efficiency/",
- "/testenv/",
- "/testpalm/",
- "/testpers/",
- "/toloka/",
- "/toolbox/",
- "/tools/",
- "/tracker/",
- "/traffic/",
- "/transfer_manager/",
- "/travel/",
- "/trust/",
- "/urfu/",
- "/vcs/",
- "/velocity/",
- "/vendor/",
- "/vh/",
- "/voicetech/",
- "/weather/",
- "/web/",
- "/wmconsole/",
- "/xmlsearch/",
- "/yabs/",
- "/yadoc/",
- "/yandex_io/",
- "/yaphone/",
- "/ydf/",
- "/ydo/",
- "/yp/",
- "/yql/",
- "/ysite/",
- "/yt/",
- "/yweb/",
- "/zen/",
- "/zapravki/",
- "/zen/",
- "/zootopia/",
- "/zora/",
-]
-
-MY_PATH = os.path.dirname(os.path.abspath(__file__))
-
-# 0.2.x uses stable hashing
-CORE_PROC_VERSION = "0.2.1"
-
-ARCADIA_ROOT_SIGN = "$S/"
-SIGNAL_NOT_FOUND = "signal not found"
-
-
-class SourceRoot(object):
- def __init__(self):
- self.root = None
-
- def detect(self, source):
- if not source:
- # For example, regexp_4
- return
-
- if source.startswith("/-S/"):
- return source[4:]
-
- if source.startswith("../"):
- return source
-
- """
- if self.root is not None:
- return self.root
- """
-
- min_pos = 100000
- for root_dir in ARCADIA_ROOT_DIRS:
- pos = source.find(root_dir)
- if pos < 0:
- continue
-
- if pos < min_pos:
- min_pos = pos
-
- if min_pos < len(source):
- self.root = source[:min_pos + 1]
-
- def crop(self, source):
- if not source:
- return ""
-
- # detection attempt
- self.detect(source)
-
- if self.root is not None:
- return source.replace(self.root, ARCADIA_ROOT_SIGN, 1)
-
- # when traceback contains only ??, source root cannot be detected
- return source
-
-
-def highlight_func(s):
- return (
- s
- .replace("=", '<span class="symbol">=</span>')
- .replace("(", '<span class="symbol">(</span>')
- .replace(")", '<span class="symbol">)</span>')
- )
-
-
-class FrameBase(object):
- def __init__(
- self,
- frame_no=None,
- addr="",
- func="",
- source="",
- source_no="",
- func_name="",
- ):
- self.frame_no = frame_no
- self.addr = addr
- self.func = func
- self.source = source
- self.source_no = source_no
- self.func_name = func_name
-
- def __str__(self):
- return "{}\t{}\t{}".format(
- self.frame_no,
- self.func,
- self.source,
- )
-
- def to_json(self):
- return {
- "frame_no": self.frame_no,
- "addr": self.addr,
- "func": self.func,
- "func_name": self.func_name,
- "source": self.source,
- "source_no": self.source_no,
- }
-
- def fingerprint(self):
- return self.func_name
-
- def cropped_source(self):
- return self.source
-
- def raw(self):
- return "{frame} {func} {source}".format(
- frame=self.frame_no,
- func=self.func,
- source=self.source,
- )
-
- def html(self):
- source, source_fmt = self.find_source()
- return (
- '<span class="frame">{frame}</span>'
- '<span class="func">{func}</span> '
- '<span class="source">{source}</span>{source_fmt}\n'.format(
- frame=self.frame_no,
- func=highlight_func(self.func.replace("&", "&amp;").replace("<", "&lt;")),
- source=source,
- source_fmt=source_fmt,
- )
- )
-
-
-class LLDBFrame(FrameBase):
- SOURCE_NO_RE = re.compile(r"(.*?[^\d]):(\d+)")
- FUNC_RE = re.compile(r"(\w+\s)?(\w+[\w,:,_,<,>,\s,*]+).*$")
-
- def __init__(
- self,
- frame_no=None,
- addr="",
- func="",
- source="",
- source_no="",
- func_name="",
- ):
- super(LLDBFrame, self).__init__(
- frame_no=frame_no,
- addr=addr,
- func=func,
- source=source,
- source_no=source_no,
- func_name=func_name,
- )
- # .source calculation
-
- func = func.replace("(anonymous namespace)::", "")
- m = self.FUNC_RE.match(func)
- if m:
- self.func_name = m.group(2) # overwrite func_name if name is in func
-
- if source_no:
- self.source_no = source_no
- self.source = source
- else:
- m = self.SOURCE_NO_RE.match(source)
- if m:
- self.source = m.group(1)
- self.source_no = m.group(2)
-
- def find_source(self):
- """
- :return: pair (source, source_fmt)
- """
- source_fmt = ""
-
- if self.source_no:
- source_fmt = ' +<span class="source-no">{}</span>'.format(self.source_no)
-
- return self.source, source_fmt
-
-
-class GDBFrame(FrameBase):
- SOURCE_NO_RE = re.compile(r"(.*):(\d+)")
- # #7 0x00007f105f3a221d in NAppHost::NTransport::TCoroutineExecutor::Poll (this=0x7f08416a5d00,
- # tasks=empty TVector (capacity=32)) at /-S/apphost/lib/executors/executors.cpp:373
- # We match with non-greedy regex a function name that cannot contain equal sign
- FUNC_RE = re.compile(r"(.*?) \(([a-zA-Z0-9_]+=.*|)\)$") # function with kwarg-params or zero params
-
- def __init__(
- self,
- frame_no=None,
- addr="",
- func="",
- source="",
- source_no="",
- func_name="",
- ):
- super(GDBFrame, self).__init__(
- frame_no=frame_no,
- addr=addr,
- func=func,
- source=source,
- source_no=source_no,
- func_name=func_name,
- )
- if not source_no:
- m = self.SOURCE_NO_RE.match(source)
- if m:
- self.source = m.group(1)
- self.source_no = m.group(2)
- if not func_name:
- m = self.FUNC_RE.match(self.func)
- if m:
- self.func_name = m.group(1)
-
- def find_source(self):
- """
- Returns link to arcadia if source is path in arcadia, else just string with path
- :return: pair (source, source_fmt)
- """
- source_fmt = ""
- source = ""
- link = ""
- dirs = self.source.split("/")
- if len(dirs) > 1 and "/{dir}/".format(dir=dirs[1]) in ARCADIA_ROOT_DIRS:
- link = self.source.replace(ARCADIA_ROOT_SIGN, ARCADIA_ROOT_LINK)
- else:
- source = self.source
- if self.source_no:
- source_fmt = ' +<span class="source-no">{}</span>'.format(self.source_no)
- if link:
- link += "?#L{line}".format(line=self.source_no)
-
- if link:
- source = '<a href="{link}">{source}</a>'.format(
- link=link,
- source=self.source,
- )
- return source, source_fmt
-
-
-class SDCAssertFrame(LLDBFrame):
-
- def __init__(
- self,
- frame_no=None,
- addr="",
- func="",
- source="",
- source_no="",
- func_name="",
- ):
- super(SDCAssertFrame, self).__init__(
- frame_no=frame_no,
- addr=addr,
- func=func,
- source=source,
- source_no=source_no,
- func_name=func_name,
- )
- # .source calculation
-
- self.source = source or ""
- if isinstance(source_no, str) and len(source_no) > 0:
- source_no = int(source_no, 16)
- self.source_no = source_no or ""
-
- m = self.FUNC_RE.match(func)
- if m:
- self.func_name = m.group(2)
-
-
-class Stack(object):
- # priority classes
- LOW_IMPORTANT = 25
- DEFAULT_IMPORTANT = 50
- SUSPICIOUS_IMPORTANT = 75
- MAX_IMPORTANT = 100
-
- # default coredump's type
- mode = CoredumpMode.GDB
-
- max_depth = None
-
- fingerprint_blacklist = [
- # bottom frames
- "raise",
- "abort",
- "__gnu_cxx::__verbose_terminate_handler",
- "_cxxabiv1::__terminate",
- "std::terminate",
- "__cxxabiv1::__cxa_throw",
- # top frames
- "start_thread",
- "clone",
- "??",
- "__clone",
- "__libc_start_main",
- "_start",
- "__nanosleep",
- ]
-
- fingerprint_blacklist_prefix = ()
-
- suspicious_functions = [
- "CheckedDelete",
- "NPrivate::Panic",
- "abort",
- "close_all_fds",
- "__cxa_throw",
- ]
-
- low_important_functions_eq = [
- "poll ()",
- "recvfrom ()",
- "pthread_join ()",
- ]
-
- low_important_functions_match = [
- "TCommonSockOps::SendV",
- "WaitD (",
- "SleepT (",
- "Join (",
- "epoll_wait",
- "nanosleep",
- "pthread_cond_wait",
- "pthread_cond_timedwait",
- "gsignal",
- "std::detail::_",
- "std::type_info",
- "ros::NodeHandle",
- ]
-
- def __init__(
- self,
- lines=None,
- source_root=None,
- thread_ptr=0,
- thread_id=None,
- frames=None,
- important=None,
- stack_fp=None,
- fingerprint_hash=None,
- stream=None,
- mode=None, # type: CoredumpMode
- ignore_bad_frames=True,
- ):
- self.lines = lines
- self.source_root = source_root
- self.thread_ptr = thread_ptr
- self.thread_id = thread_id
- if mode is not None:
- self.mode = mode
-
- self.frames = frames or []
- if self.frames and isinstance(frames[0], dict):
- self.frames = [self.frame_factory(f) for f in self.frames]
- self.important = important or self.DEFAULT_IMPORTANT
- if thread_id == 1:
- self.important = self.MAX_IMPORTANT
- self.fingerprint_hash = fingerprint_hash
- self.stack_fp = stack_fp
- self.stream = stream
- self.ignore_bad_frames = ignore_bad_frames
-
- def to_json(self):
- """Should be symmetric with `from_json`."""
- return {
- "mode": self.mode.value,
- "frames": [frame.to_json() for frame in self.frames],
- "important": self.important,
- }
-
- @staticmethod
- def from_json(stack):
- """Should be symmetric with `to_json`."""
- mode = CoredumpMode(stack.get("mode", CoredumpMode.GDB.value))
- # old serialization format support, should be dropped
- lldb_mode = stack.get("lldb_mode", False)
- if lldb_mode:
- mode = CoredumpMode.LLDB
-
- unpacked_stack = {
- "mode": mode,
- "frames": stack["frames"],
- "important": stack.get("important", Stack.DEFAULT_IMPORTANT),
- }
- return mode, unpacked_stack
-
- def frame_factory(self, args):
- frames = {
- CoredumpMode.GDB: GDBFrame,
- CoredumpMode.LLDB: LLDBFrame,
- CoredumpMode.SDC_ASSERT: SDCAssertFrame,
- }
-
- class_object = frames.get(self.mode)
- if not class_object:
- raise Exception("Invalid mode: {}".format(self.mode.value))
-
- return class_object(**args)
-
- def low_important(self):
- return self.important <= self.LOW_IMPORTANT
-
- def check_importance(self, frame):
- # raised priority cannot be lowered
- if self.important > self.DEFAULT_IMPORTANT:
- return
-
- # detect suspicious stacks
- for name in self.suspicious_functions:
- if name in frame.func:
- self.important = self.SUSPICIOUS_IMPORTANT
- return
-
- for name in self.low_important_functions_eq:
- if name == frame.func:
- self.important = self.LOW_IMPORTANT
-
- for name in self.low_important_functions_match:
- if name in frame.func:
- self.important = self.LOW_IMPORTANT
-
- def push_frame(self, frame):
- self.check_importance(frame)
- # ignore duplicated frames
- if len(self.frames) and self.frames[-1].frame_no == frame.frame_no:
- return
- self.frames.append(frame)
-
- def parse(self):
- """
- Parse one stack
- """
- assert self.lines is not None
- assert self.source_root is not None
-
- for line in self.lines:
- match_found = False
- for regexp in self.REGEXPS:
- m = regexp.match(line)
- if m:
- frame_args = m.groupdict()
- if "source" in frame_args:
- frame_args["source"] = self.source_root.crop(frame_args["source"])
-
- self.push_frame(self.frame_factory(frame_args))
- match_found = True
- break
-
- if not match_found:
- self.bad_frame(line)
-
- def bad_frame(self, line):
- if self.ignore_bad_frames:
- logger.warning("Bad frame: %s", line)
- return
-
- raise Exception("Bad frame: `{}`, frame `{}`".format(
- line,
- self.debug(return_result=True),
- ))
-
- def debug(self, return_result=False):
- if self.low_important():
- return ""
-
- res = "\n".join([str(f) for f in self.frames])
- res += "----------------------------- DEBUG END\n"
- if return_result:
- return res
-
- self.stream.write(res)
-
- def raw(self):
- return "\n".join([frame.raw() for frame in self.frames])
-
- def html(self, same_hash=False, same_count=1, return_result=False):
- ans = ""
- pre_class = "important-" + str(self.important)
- if same_hash:
- pre_class += " same-hash"
-
- ans += '<pre class="{0}">'.format(pre_class)
- if not same_hash:
- ans += '<a name="stack{0}"></a>'.format(self.hash())
-
- ans += '<span class="hash"><a href="#stack{0}">#{0}</a>, {1} stack(s) with same hash</span>\n'.format(
- self.hash(), same_count,
- )
-
- for f in self.frames:
- ans += f.html()
- ans += "</pre>\n"
-
- if return_result:
- return ans
-
- self.stream.write(ans)
-
- def fingerprint(self, max_num=None):
- """
- Stack fingerprint: concatenation of non-common stack frames
- FIXME: wipe away `max_num`
- """
- stack_fp = list()
- len_frames = min((max_num or len(self.frames)), len(self.frames))
-
- for f in self.frames[:len_frames]:
- fp = f.fingerprint()
- if not fp:
- continue
-
- if fp in self.fingerprint_blacklist:
- continue
-
- if fp.startswith(self.fingerprint_blacklist_prefix):
- continue
-
- if fp in stack_fp:
- # FIXME: optimize duplicate remover: check only previous frame
- # see also `push_frame`
- continue
-
- stack_fp.append(fp.strip())
-
- if self.max_depth is not None and len(stack_fp) >= self.max_depth:
- break
-
- return "\n".join(stack_fp)
-
- def simple_html(self, num_frames=None):
- if not num_frames:
- num_frames = len(self.frames)
- pre_class = "important-0"
- ans = '<pre class="{0}">'.format(pre_class)
- for i in range(min(len(self.frames), num_frames)):
- ans += self.frames[i].html()
- ans += "</pre>\n"
- return ans
-
- def __str__(self):
- return "\n".join(map(str, self.frames))
-
- def hash(self, max_num=None):
- """
- Entire stack hash for merging same stacks
- """
- if self.fingerprint_hash is None:
- self.fingerprint_hash = int(hashlib.md5(self.fingerprint(max_num).encode("utf-8")).hexdigest()[0:15], 16)
-
- return self.fingerprint_hash
-
-
-class GDBStack(Stack):
-
- mode = CoredumpMode.GDB
-
- REGEXPS = [
- # #6 0x0000000001d9203e in NAsio::TIOService::TImpl::Run (this=0x137b1ec00) at /place/
- # sandbox-data/srcdir/arcadia_cache/library/neh/asio/io_service_impl.cpp:77
-
- re.compile(
- r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*) at (?P<source>.*)"
- ),
-
- # #5 TCondVar::WaitD (this=this@entry=0x10196b2b8, mutex=..., deadLine=..., deadLine@entry=...)
- # at /place/sandbox-data/srcdir/arcadia_cache/util/system/condvar.cpp:150
- re.compile(
- r"#(?P<frame_no>\d+)[ \t]+(?P<func>.*) at (?P<source>/.*)"
- ),
-
- # #0 0x00007faf8eb31d84 in pthread_cond_wait@@GLIBC_2.3.2 ()
- # from /lib/x86_64-linux-gnu/libpthread.so.0
- re.compile(
- r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*) from (?P<source>.*)"
- ),
-
- # #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
- re.compile(
- r"#(?P<frame_no>\d+)[ \t]+ (?P<func>.*) at (?P<source>.*)"
- ),
-
- # #10 0x0000000000000000 in ?? ()
- re.compile(
- r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*)"
- ),
- ]
-
-
-class LLDBStack(Stack):
-
- mode = CoredumpMode.LLDB
-
- REGEXPS = [
- # 0x00007fd7b300a886 libthird_Uparty_Sros_Sros_Ucomm_Sclients_Sroscpp_Sliblibroscpp.so`
- # std::thread::_State_impl<std::thread::_Invoker<std::tuple<ros::PollManager::PollManager()::$_1> > >::_M_run()
- # [inlined] ros::PollManager::threadFunc(this=0x00007fd7b30dab20) at poll_manager.cpp:75:16 # noqa
- re.compile(
- r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+).+inlined]\s(?P<func>.+)\sat\s(?P<source>.+)"
- ),
-
- re.compile(
- r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+).+?`(?P<func>.+)\sat\s(?P<source>.+)"
- ),
-
- # * frame #0: 0x00007fd7aee51f47 libc.so.6`gsignal + 199
- re.compile(
- r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+)\s(?P<source>.+)`(?P<func>.+)\s\+\s(?P<source_no>\d+)"
- ),
- ]
-
- # Take not more than `max_depth` non-filtered frames into fingerprint
- # See CORES-180
- max_depth = 10
-
- fingerprint_blacklist = Stack.fingerprint_blacklist + [
- "ros::ros_wallsleep",
- ]
-
- fingerprint_blacklist_prefix = Stack.fingerprint_blacklist_prefix + (
- "___lldb_unnamed_symbol",
- "__gnu_cxx",
- "__gthread",
- "__pthread",
- "decltype",
- "myriapoda::BuildersRunner",
- "non",
- "std::_Function_handler",
- "std::_Sp_counted_ptr_inplace",
- "std::__invoke_impl",
- "std::__invoke_result",
- "std::__shared_ptr",
- "std::conditional",
- "std::shared_ptr",
- "std::thread::_Invoker",
- "std::thread::_State_impl",
- "yandex::sdc::assert_details_",
- )
-
- suspicious_functions = Stack.suspicious_functions + [
- "Xml",
- "boost",
- "ros",
- "supernode",
- "tensorflow",
- "yandex::sdc",
- ]
-
-
-class PythonStack(Stack):
-
- REGEXPS = [
- re.compile(
- r'File "(?P<source>.*)", line (?P<source_no>\d+), in (?P<func_name>.*)'
- ),
- ]
-
-
-class SDCAssertStack(LLDBStack):
-
- mode = CoredumpMode.SDC_ASSERT
-
- REGEXPS = [
- # 0: ./modules/_shared/libcore_Stools_Slibassert.so(yandex::sdc::assert_details_::PanicV(char const*,
- # long, char const*, char const*, bool, char const*, __va_list_tag*)
- # +0x2aa)[0x7fb83268feaa]
- re.compile(
- r"(?P<frame_no>\d+):\s(?P<source>.+.so)\((?P<func>.+)\+(?P<source_no>.+).+\[(?P<addr>0x[0-9a-f]+)"
- ),
-
- re.compile(
- r"(?P<frame_no>\d+):\s(?P<source>\w+)\((?P<func>.+)\+(?P<source_no>.+).+\[(?P<addr>0x[0-9a-f]+)"
- )
- ]
-
-
-def parse_python_traceback(trace):
- trace = trace.replace("/home/zomb-sandbox/client/", "/")
- trace = trace.replace("/home/zomb-sandbox/tasks/", "/sandbox/")
- trace = trace.split("\n")
- exception = trace[-1] # noqa: F841
- trace = trace[1: -1]
- pairs = zip(trace[::2], trace[1::2])
- stack = Stack(lines=[])
- for frame_no, (path, row) in enumerate(pairs):
- # FIXME: wrap into generic tracer
- m = PythonStack.REGEXPS[0].match(path.strip())
- if m:
- frame_args = m.groupdict()
- if not frame_args["source"].startswith("/"):
- frame_args["source"] = "/" + frame_args["source"]
- frame_args["frame_no"] = str(frame_no)
- frame_args["func"] = row.strip()
- stack.push_frame(GDBFrame(**frame_args))
- return [[stack]], [[stack.raw()]], 6
-
-
-def stack_factory(stack):
- mode, unpacked_stack = Stack.from_json(stack)
-
- if mode == CoredumpMode.GDB:
- return GDBStack(**unpacked_stack)
- elif mode == CoredumpMode.LLDB:
- return LLDBStack(**unpacked_stack)
- elif mode == CoredumpMode.SDC_ASSERT:
- return SDCAssertStack(**unpacked_stack)
-
- raise Exception("Invalid stack mode: {}. ".format(mode))
-
-
-def _read_file(file_name):
- with open(file_name) as f:
- return f.read()
-
-
-def _file_contents(file_name):
- """Return file (or resource) contents as unicode string."""
- if getattr(sys, "is_standalone_binary", False):
- try:
- contents = pkgutil.get_data(__package__, file_name)
- except Exception:
- raise IOError("Failed to find resource: " + file_name)
- else:
- if not os.path.exists(file_name):
- file_name = os.path.join(MY_PATH, file_name)
- contents = _read_file(file_name)
- # py23 compatibility
- if not isinstance(contents, six.text_type):
- contents = contents.decode("utf-8")
- return contents
-
-
-def html_prolog(stream, timestamp):
- prolog = _file_contents("prolog.html")
- assert isinstance(prolog, six.string_types)
- stream.write(prolog.format(
- style=_file_contents("styles.css"),
- coredump_js=_file_contents("core_proc.js"),
- version=CORE_PROC_VERSION,
- timestamp=timestamp,
- ))
-
-
-def html_epilog(stream):
- stream.write(_file_contents("epilog.html"))
-
-
-def detect_coredump_mode(core_text):
- if len(core_text) == 0:
- raise Exception("Text stacktrace is blank")
-
- if "Panic at unixtime" in core_text:
- return CoredumpMode.SDC_ASSERT
-
- if "(lldb)" in core_text:
- return CoredumpMode.LLDB
-
- return CoredumpMode.GDB
-
-
-def filter_stack_dump(
- core_text=None,
- stack_file_name=None,
- use_fingerprint=False,
- sandbox_failed_task_id=None,
- output_stream=None,
- timestamp=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
- ignore_bad_frames=True,
-):
- """New interface for stacktrace filtering. Preferred to use."""
- if not core_text and not stack_file_name:
- raise ValueError("Either `core_text` or `stack_file_name` should be passed to `filter_stack_dump`. ")
-
- if core_text is not None and stack_file_name:
- raise ValueError("Only one of `core_text` and `stack_file_name` cannot be specified for `filter_stack_dump`. ")
-
- if stack_file_name:
- core_text = _read_file(stack_file_name)
- # further processing uses `core_text` only
-
- mode = detect_coredump_mode(core_text)
- core_lines = core_text.split("\n")
-
- return filter_stackdump(
- file_lines=core_lines,
- ignore_bad_frames=ignore_bad_frames,
- mode=mode,
- sandbox_failed_task_id=sandbox_failed_task_id,
- stream=output_stream,
- timestamp=timestamp,
- use_fingerprint=use_fingerprint,
- use_stream=output_stream is not None,
- )
-
-
-class StackDumperBase(object):
-
- SANDBOX_TASK_RE = re.compile(r".*/[0-9a-f]/[0-9a-f]/([0-9]+)/.*")
- MAX_SAME_STACKS = 30
-
- def __init__(
- self,
- use_fingerprint,
- sandbox_failed_task_id,
- stream,
- use_stream,
- file_lines,
- timestamp,
- mode,
- file_name=None,
- ignore_bad_frames=True,
- ):
- self.source_root = SourceRoot()
- self.use_fingerprint = use_fingerprint
- self.sandbox_task_id = None
- self.sandbox_failed_task_id = sandbox_failed_task_id
- self.stream = stream or sys.stdout
- self.use_stream = use_stream
- self.file_name = file_name
- self.file_lines = file_lines
- self.timestamp = timestamp
- self.ignore_bad_frames = ignore_bad_frames
- self.stack_class = self.get_stack_class(mode)
-
- self.signal = SIGNAL_NOT_FOUND
- self.stacks = []
- self._main_info = []
-
- @staticmethod
- def is_ignored_line(line):
- raise NotImplementedError("Not implemented static method `is_ignored_line`. ")
-
- @staticmethod
- def get_stack_class(mode):
- exist_modes = {}
- for cls in [GDBStack, LLDBStack, SDCAssertStack]:
- current_mode = cls.mode
- if current_mode in exist_modes:
- raise Exception("Duplicate modes are disallowed. Repeated mode: `{}`".format(current_mode.value))
- exist_modes[current_mode] = cls
-
- if mode not in exist_modes:
- raise Exception("Unexpected coredump processing mode: `{}`".format(mode.value))
-
- return exist_modes[mode]
-
- def check_signal(self, line):
- raise NotImplementedError("Not implemented `check_signal`.")
-
- def set_sandbox_task_id(self, task_id):
- self.sandbox_task_id = task_id
-
- def add_main_line(self, line):
- self._main_info.append(line)
-
- def add_stack(self, stack_lines, thread_id):
- if not stack_lines:
- return
-
- stack = self.stack_class(
- lines=stack_lines,
- source_root=self.source_root,
- thread_id=thread_id,
- stream=self.stream,
- ignore_bad_frames=self.ignore_bad_frames,
- )
- self.stacks.append(stack)
-
- def dump(self):
- if self.file_lines is None:
- # FIXME(mvel): LLDB is not handled here
- self.file_lines = get_parsable_gdb_text(_read_file(self.file_name))
-
- self._collect_stacks()
-
- for stack in self.stacks:
- stack.parse()
- # stack.debug()
-
- if self.use_stream:
- if self.use_fingerprint:
- for stack in self.stacks:
- self.stream.write(stack.fingerprint() + "\n")
- self.stream.write("--------------------------------------\n")
- return
- else:
- html_prolog(self.stream, self.timestamp)
-
- if self.sandbox_task_id is not None:
- self.stream.write(
- '<div style="padding-top: 6px; font-size: 18px; font-weight: bold;">'
- 'Coredumped binary build task: '
- '<a href="https://sandbox.yandex-team.ru/task/{0}">{0}</a></div>\n'.format(
- self.sandbox_task_id
- )
- )
-
- if self.sandbox_failed_task_id is not None:
- self.stream.write(
- '<div style="padding-top: 6px; font-size: 18px; font-weight: bold;">'
- 'Sandbox failed task: '
- '<a href="https://sandbox.yandex-team.ru/task/{0}">{0}</a></div>\n'.format(
- self.sandbox_failed_task_id
- )
- )
-
- pre_class = ""
- self.stream.write('<pre class="{0}">\n'.format(pre_class))
- for line in self._main_info:
- self.stream.write(line.replace("&", "&amp;").replace("<", "&lt;") + "\n")
- self.stream.write("</pre>\n")
-
- sorted_stacks = sorted(self.stacks, key=lambda x: (x.important, x.fingerprint()), reverse=True)
-
- prev_hash = None
- all_hash_stacks = []
- cur_hash_stacks = []
- for stack in sorted_stacks:
- if stack.hash() == 0:
- continue
-
- if stack.hash() == prev_hash:
- if len(cur_hash_stacks) < self.MAX_SAME_STACKS:
- # do not collect too much
- cur_hash_stacks.append(stack)
- continue
-
- # hash changed
- if cur_hash_stacks:
- all_hash_stacks.append(cur_hash_stacks)
-
- prev_hash = stack.hash()
- cur_hash_stacks = [stack, ]
-
- # push last
- if cur_hash_stacks:
- all_hash_stacks.append(cur_hash_stacks)
-
- if self.use_stream:
- for cur_hash_stacks in all_hash_stacks:
- same_hash = False
- for stack in cur_hash_stacks:
- stack.html(same_hash=same_hash, same_count=len(cur_hash_stacks))
- same_hash = True
-
- html_epilog(self.stream)
- else:
- raw_hash_stacks = [
- [stack.raw() for stack in common_hash_stacks]
- for common_hash_stacks in all_hash_stacks
- ]
- return all_hash_stacks, raw_hash_stacks, self.signal
-
- def _collect_stacks(self):
- stack_lines = []
- stack_detected = False
- thread_id = None
-
- for line in self.file_lines:
- line = line.strip()
- if self.is_ignored_line(line):
- continue
-
- if "Core was generated" in line:
- m = self.SANDBOX_TASK_RE.match(line)
- if m:
- self.set_sandbox_task_id(int(m.group(1)))
-
- self.check_signal(line)
-
- # [Switching to thread 55 (Thread 0x7f100a94c700 (LWP 21034))]
- # Thread 584 (Thread 0x7ff363c03700 (LWP 2124)):
-
- # see test2 and test3
- tm = self.THREAD_RE.match(line)
- if tm:
- stack_detected = True
- self.add_stack(
- stack_lines=stack_lines,
- thread_id=thread_id,
- )
- stack_lines = []
- thread_id = int(tm.group(1))
- continue
-
- if stack_detected:
- stack_lines.append(line)
- else:
- self.add_main_line(line)
-
- # parse last stack
- self.add_stack(
- stack_lines=stack_lines,
- thread_id=thread_id,
- )
-
-
-class StackDumperGDB(StackDumperBase):
-
- SIGNAL_FLAG = "Program terminated with signal"
- THREAD_RE = re.compile(r".*[Tt]hread (\d+) .*")
- LINE_IN = re.compile(r"\d+\tin ")
-
- def is_ignored_line(self, line):
- if not line:
- return True
-
- if line.startswith("[New "):
- # LWP, Thread, process
- return True
-
- if line.startswith("[Thread "):
- return True
-
- if line.startswith("Using "):
- return True
-
- if line.startswith("warning:"):
- return True
-
- if line.startswith("Python Exception"):
- # TODO: handle this more carefully
- return True
-
- if line[0] != "#" and "No such file or directory" in line:
- return True
-
- if self.LINE_IN.match(line):
- # see test1.txt for example
- # 641 in /place/sandbox-data/srcdir/arcadia/library/coroutine/engine/impl.h
- return True
-
- return False
-
- def check_signal(self, line):
- if self.SIGNAL_FLAG in line:
- self.signal = line[line.find(self.SIGNAL_FLAG) + len(self.SIGNAL_FLAG):].split(",")[0]
-
-
-class StackDumperLLDB(StackDumperBase):
-
- SIGNAL_FLAG = "stop reason = signal"
-
- THREAD_RE = re.compile(r".*thread #(\d+), .*")
-
- SKIP_LINES = {
- "(lldb) bt all",
- "(lldb) script import sys",
- "(lldb) target create",
- "Core file",
-
- # Drop signal interceptor call
- # * frame #0: 0x00007efd49042fb7 libc.so.6`__GI___libc_sigaction at sigaction.c:54
- # TODO(epsilond1): Set MAX_IMPORTANT for some thread
- "__GI___libc_sigaction",
-
- # Drop unnamed symbols at lines like
- # frame #4: 0x00007fd8054156df libstdc++.so.6`___lldb_unnamed_symbol440$$libstdc++.so.6 + 15
- "$$",
- }
-
- @staticmethod
- def is_ignored_line(line):
- if not line:
- return True
-
- for skip_line in StackDumperLLDB.SKIP_LINES:
- if skip_line in line:
- return True
- return False
-
- def check_signal(self, line):
- if self.SIGNAL_FLAG in line and self.signal == SIGNAL_NOT_FOUND:
- self.signal = line.split()[-1]
-
-
-class StackDumperSDCAssert(StackDumperBase):
-
- THREAD_RE = re.compile(
- r"(\d+)(:\s)"
- )
-
- def is_ignored_line(self, line):
- if not line:
- return True
-
- return not re.match(self.THREAD_RE, line)
-
- def check_signal(self, line):
- self.signal = SIGNAL_NOT_FOUND
-
- def _collect_stacks(self):
- stack_lines = []
- for line in self.file_lines:
- line = line.strip()
- if self.is_ignored_line(line):
- continue
- stack_lines.append(line)
- self.check_signal(line)
-
- self.add_stack(
- stack_lines=stack_lines,
- thread_id=0,
- )
-
-
-def filter_stackdump(
- file_name=None,
- use_fingerprint=False,
- sandbox_failed_task_id=None,
- stream=None,
- file_lines=None,
- use_stream=True,
- timestamp=None,
- ignore_bad_frames=True,
- mode=None,
-):
- if mode is None and file_name is not None:
- mode = detect_coredump_mode(_read_file(file_name))
- if mode == CoredumpMode.GDB:
- stack_dumper_cls = StackDumperGDB
- elif mode == CoredumpMode.LLDB:
- stack_dumper_cls = StackDumperLLDB
- elif mode == CoredumpMode.SDC_ASSERT:
- stack_dumper_cls = StackDumperSDCAssert
- else:
- raise Exception("Invalid mode: {}".format(mode.value))
-
- dumper = stack_dumper_cls(
- file_name=file_name,
- use_fingerprint=use_fingerprint,
- sandbox_failed_task_id=sandbox_failed_task_id,
- stream=stream,
- use_stream=use_stream,
- file_lines=file_lines,
- timestamp=timestamp,
- ignore_bad_frames=ignore_bad_frames,
- mode=mode,
- )
-
- return dumper.dump()
-
-
-def get_parsable_gdb_text(core_text):
- # FIXME(mvel): Check encoding?
- # core_text = core_text.encode("ascii", "ignore").decode("ascii")
- core_text = (
- core_text
- # .replace("#", "\n#") # bug here
- .replace("No core", "\nNo core")
- .replace("[New", "\n[New")
- .replace("\n\n", "\n")
- )
-
- return core_text.split("\n")
-
-
-if __name__ == "__main__":
- if len(sys.argv) < 2:
- sys.stderr.write(
- """Traceback filter "Tri Korochki"
-https://wiki.yandex-team.ru/cores-aggregation/
-Usage:
- core_proc.py <traceback.txt> [-f|--fingerprint]
- core_proc.py -v|--version
-"""
- )
- sys.exit(1)
-
- if sys.argv[1] == "--version" or sys.argv[1] == "-v":
- if os.system("svn info 2>/dev/null | grep '^Revision'") != 0:
- print(CORE_PROC_VERSION)
- sys.exit(0)
-
- sandbox_failed_task_id = None
-
- use_fingerprint = False
- if len(sys.argv) >= 3:
- if sys.argv[2] == "-f" or sys.argv[2] == "--fingerprint":
- use_fingerprint = True
- sandbox_failed_task_id = sys.argv[2]
-
- filter_stack_dump(
- core_text=_read_file(sys.argv[1]),
- use_fingerprint=use_fingerprint,
- sandbox_failed_task_id=sandbox_failed_task_id,
- output_stream=sys.stdout,
- )
-
-
-"""
-Stack group is a `Stack` objects list with the same hash (fingerprint).
-"""
-
-
-class StackEncoder(json.JSONEncoder):
- """Stack JSON serializer."""
-
- def default(self, obj):
- if isinstance(obj, Stack):
- return obj.to_json()
-
- return json.JSONEncoder.default(obj)
-
-
-def serialize_stacks(stack_groups):
- """
- Serialize list of stack groups to string (using JSON format).
-
- :param stack_groups: list of stack groups.
- :return: JSON serialized to string
- """
- return json.dumps(stack_groups, cls=StackEncoder)
-
-
-def deserialize_stacks(stack_groups_str):
- """
- Restore JSON-serialized stack data into stack groups.
-
- :param stack_groups_str: JSON-serialized data.
- :return: list of stack groups
- """
- stack_groups_json = json.loads(stack_groups_str)
- # please do not use `map` hell here, it's impossible to debug
- all_stacks = [
- [stack_factory(stack) for stack in stacks]
- for stacks in stack_groups_json
- ]
- return all_stacks
diff --git a/library/python/coredump_filter/core_proc.js b/library/python/coredump_filter/core_proc.js
deleted file mode 100644
index 15413adeae..0000000000
--- a/library/python/coredump_filter/core_proc.js
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-$(document).ready(function() {
- $('#show-same-stacks').click(function() {
- var stacks = $('.same-hash');
- for (var i = 0; i < stacks.length; ++i)
- $(stacks[i]).show();
- $('#show-same-stacks').hide();
- $('#hide-same-stacks').show();
- return false;
- });
-
- $('#hide-same-stacks').click(function() {
- var stacks = $('.same-hash');
- for (var i = 0; i < stacks.length; ++i)
- $(stacks[i]).hide();
- $('#hide-same-stacks').hide();
- $('#show-same-stacks').show();
- return false;
- });
-});
diff --git a/library/python/coredump_filter/epilog.html b/library/python/coredump_filter/epilog.html
deleted file mode 100644
index b317cc2a91..0000000000
--- a/library/python/coredump_filter/epilog.html
+++ /dev/null
@@ -1,2 +0,0 @@
- </body>
-</html> \ No newline at end of file
diff --git a/library/python/coredump_filter/prolog.html b/library/python/coredump_filter/prolog.html
deleted file mode 100644
index f102a7210d..0000000000
--- a/library/python/coredump_filter/prolog.html
+++ /dev/null
@@ -1,24 +0,0 @@
-<!DOCTYPE html>
-<html>
- <head>
- <style>{style}</style>
- <script src="https://yastatic.net/jquery/1.7.1/jquery.min.js"></script>
- <script>{coredump_js}</script>
- </head>
- <body>
- <h1>Coredump report generated on {timestamp} by Coredump/traceback filter
- <i><a href="http://mvel.at.yandex-team.ru/2373">Tri Korochki</a></i>
- ({version})
- </h1>
- <h3>Author: <a href="https://staff.yandex-team.ru/mvel">mvel@</a> aka Mikhail Veltishchev</h3>
- <h3>&copy; Yandex LLC. All rights reversed</h3>
- <div class="legend">
- <ul style="line-height: 22px">
- <li><span class="important-100">Problem</span> stacks</li>
- <li><span class="important-75">Suspicious</span> stacks</li>
- <li><span class="important-50">Active</span> stacks</li>
- <li><span class="important-25">Non-active</span> stacks</li>
- </ul>
- <a class="show-same-hash" id="show-same-stacks" href="#">Show same stacks</a>
- <a class="show-same-hash" id="hide-same-stacks" href="#" style="display: none">Hide same stacks</a>
- </div>
diff --git a/library/python/coredump_filter/styles.css b/library/python/coredump_filter/styles.css
deleted file mode 100644
index fdd09ce09e..0000000000
--- a/library/python/coredump_filter/styles.css
+++ /dev/null
@@ -1,116 +0,0 @@
-body {
- font-family: sans-serif;
- font-size: 12px;
-}
-
-a {
- text-decoration: none;
- color: #486DEC;
-}
-
-.frame {
- color: #7f7f7f;
- display: inline-block;
- width: 30px;
-}
-
-.addr {
- color: #999999;
- padding-right: 10px;
-}
-
-.func {
- color: #7f0000;
- word-wrap: normal;
-}
-
-.source {
- color: #007f00;
-}
-
-.symbol {
- color: #0000ff;
-}
-
-h1 {
- font-size: 1.5em;
- margin-top: .1em;
- margin-bottom: .2em;
-}
-
-h3 {
- font-size: 1em;
- margin-top: .1em;
- margin-bottom: .2em;
-}
-
-pre {
- overflow-x: auto;
- margin: 6px 0px 6px 0px;
- padding: 0px 12px 6px 12px;
- position: relative;
-}
-
-pre.important-25 {
- background-color: #eeeeee;
-}
-
-span.important-25 {
- background-color: #eeeeee;
- padding: 3px;
-}
-
-pre.important-50 {
- background-color: #e7ffe7;
-}
-
-span.important-50 {
- background-color: #e7ffe7;
- padding: 3px;
-}
-
-pre.important-75 {
- background-color: #ffffcc;
-}
-
-span.important-75 {
- background-color: #ffffcc;
- padding: 3px;
-}
-
-pre.important-100 {
- background-color: #ffdddd;
-}
-
-span.important-100 {
- background-color: #ffdddd;
- padding: 3px;
-}
-
-a.show-same-hash {
- padding: 0px 20px 0px 20px;
-}
-
-/* hidden by default */
-.same-hash {
- display: none;
-}
-
-span.hash {
- position: absolute;
- top: 3px; right: 3px;
-}
-
-div.legend {
- position: absolute;
- z-index: 1;
- top: 5px;
- right: 8px;
- border: 1px solid #7f7f7f;
- border-radius: 3px;
- padding: 0px 20px 3px 0px;
-}
-
-div.legend ul {
- margin: 3px 0px 3px 0px;
-}
diff --git a/library/python/coredump_filter/ya.make b/library/python/coredump_filter/ya.make
deleted file mode 100644
index fc8ec1a45f..0000000000
--- a/library/python/coredump_filter/ya.make
+++ /dev/null
@@ -1,23 +0,0 @@
-PY23_LIBRARY()
-
-PY_SRCS(
- __init__.py
-)
-
-RESOURCE_FILES(
- PREFIX library/python/coredump_filter/
- core_proc.js
- epilog.html
- prolog.html
- styles.css
-)
-
-IF(PYTHON2)
- PEERDIR(contrib/deprecated/python/enum34)
-ENDIF()
-
-END()
-
-RECURSE(
- tests
-)
diff --git a/library/python/json/__init__.py b/library/python/json/__init__.py
deleted file mode 100644
index c6420d5e6d..0000000000
--- a/library/python/json/__init__.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from library.python.json.loads import loads as _loads
-from simplejson import loads as _sj_loads
-
-
-def loads(*args, **kwargs):
- try:
- return _loads(*args, **kwargs)
- except Exception as e:
- if 'invalid syntax at token' in str(e):
- kwargs.pop('intern_keys', None)
- kwargs.pop('intern_vals', None)
- kwargs.pop('may_unicode', None)
- return _sj_loads(*args, **kwargs)
-
- raise
-
-
-from simplejson import load, dump, dumps # noqa
-
-
-def read_file(file_name, **kwargs):
- """
- Read file and return its parsed json contents.
-
- All kwargs will be proxied to `json.load` method as is.
-
- :param file_name: file with json contents
- :return: parsed json contents
- """
- with open(file_name) as f:
- return load(f, **kwargs)
-
-
-def write_file(file_name, contents, **kwargs):
- """
- Dump json data to file.
-
- All kwargs will be proxied to `json.dump` method as is.
-
- :param file_name: file to dump to
- :param contents: JSON-serializable object
- """
- with open(file_name, "w") as f:
- dump(contents, f, **kwargs)
diff --git a/library/python/json/loads.cpp b/library/python/json/loads.cpp
deleted file mode 100644
index 19cdb096ae..0000000000
--- a/library/python/json/loads.cpp
+++ /dev/null
@@ -1,246 +0,0 @@
-#include "loads.h"
-
-#include <Python.h>
-
-#include <library/cpp/json/fast_sax/parser.h>
-
-#include <util/generic/algorithm.h>
-#include <util/generic/stack.h>
-#include <util/generic/vector.h>
-#include <util/generic/ylimits.h>
-#include <util/string/ascii.h>
-
-using namespace NJson;
-
-namespace {
- enum EKind {
- Undefined,
- Array,
- Dict,
- Value,
- Key,
- };
-
- static inline TStringBuf ToStr(EKind kind) noexcept {
- switch (kind) {
- case Undefined:
- return TStringBuf("Undefined");
-
- case Array:
- return TStringBuf("Array");
-
- case Dict:
- return TStringBuf("Dict");
-
- case Value:
- return TStringBuf("Value");
-
- case Key:
- return TStringBuf("Key");
- }
-
- Y_UNREACHABLE();
- }
-
- struct TUnref {
- static inline void Destroy(PyObject* o) noexcept {
- Py_XDECREF(o);
- }
- };
-
- using TObjectPtr = TAutoPtr<PyObject, TUnref>;
-
- static inline TObjectPtr BuildBool(bool val) noexcept {
- if (val) {
- Py_RETURN_TRUE;
- }
-
- Py_RETURN_FALSE;
- }
-
- // Translate python exceptions from object-creating functions into c++ exceptions
- // Such errors are reported by returning nullptr
- // When a python error is set and C++ exception is caught by Cython wrapper,
- // Python exception is propagated, while C++ exception is discarded.
- PyObject* CheckNewObject(PyObject* obj) {
- Y_ENSURE(obj != nullptr, "got python exception");
- return obj;
- }
-
- void CheckRetcode(int retcode) {
- Y_ENSURE(retcode == 0, "got python exception");
- }
-
- static inline TObjectPtr BuildSmall(long val) {
-#if PY_VERSION_HEX >= 0x03000000
- return CheckNewObject(PyLong_FromLong(val));
-#else
- return CheckNewObject(PyInt_FromLong(val));
-#endif
- }
-
- PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) {
-#if PY_VERSION_HEX >= 0x03000000
- Y_UNUSED(mayUnicode);
- PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size());
- if (intern) {
- PyUnicode_InternInPlace(&pyStr);
- }
-#else
- const bool needUnicode = mayUnicode && !AllOf(str, IsAscii);
- PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size())
- : PyString_FromStringAndSize(str.data(), str.size());
- if (intern && !needUnicode) {
- PyString_InternInPlace(&pyStr);
- }
-#endif
- return pyStr;
- }
-
- struct TVal {
- EKind Kind = Undefined;
- TObjectPtr Val;
-
- inline TVal() noexcept
- : Kind(Undefined)
- {
- }
-
- inline TVal(EKind kind, TObjectPtr val) noexcept
- : Kind(kind)
- , Val(val)
- {
- }
- };
-
- static inline TObjectPtr NoneRef() noexcept {
- Py_RETURN_NONE;
- }
-
- struct TContext: public TJsonCallbacks {
- const bool InternKeys;
- const bool InternVals;
- const bool MayUnicode;
- TStack<TVal, TVector<TVal>> S;
-
- inline TContext(bool internKeys, bool internVals, bool mayUnicode)
- : TJsonCallbacks(true)
- , InternKeys(internKeys)
- , InternVals(internVals)
- , MayUnicode(mayUnicode)
- {
- S.emplace();
- }
-
- inline bool Consume(TObjectPtr o) {
- auto& t = S.top();
-
- if (t.Kind == Array) {
- CheckRetcode(PyList_Append(t.Val.Get(), o.Get()));
- } else if (t.Kind == Key) {
- auto key = S.top().Val;
-
- S.pop();
-
- CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get()));
- } else {
- t = TVal(Value, o);
- }
-
- return true;
- }
-
- inline TObjectPtr Pop(EKind expect) {
- auto res = S.top();
-
- S.pop();
-
- if (res.Kind != expect) {
- ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")";
- }
-
- return res.Val;
- }
-
- inline void Push(EKind kind, TObjectPtr object) {
- S.push(TVal(kind, object));
- }
-
- virtual bool OnNull() {
- return Consume(NoneRef());
- }
-
- virtual bool OnBoolean(bool v) {
- return Consume(BuildBool(v));
- }
-
- virtual bool OnInteger(long long v) {
- if (v >= (long long)Min<long>()) {
- return Consume(BuildSmall((long)v));
- }
-
- return Consume(CheckNewObject(PyLong_FromLongLong(v)));
- }
-
- virtual bool OnUInteger(unsigned long long v) {
- if (v <= (unsigned long long)Max<long>()) {
- return Consume(BuildSmall((long)v));
- }
-
- return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v)));
- }
-
- virtual bool OnDouble(double v) {
- return Consume(CheckNewObject(PyFloat_FromDouble(v)));
- }
-
- virtual bool OnString(const TStringBuf& v) {
- return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode)));
- }
-
- virtual bool OnOpenMap() {
- Push(Dict, CheckNewObject(PyDict_New()));
-
- return true;
- }
-
- virtual bool OnCloseMap() {
- return Consume(Pop(Dict));
- }
-
- virtual bool OnMapKey(const TStringBuf& k) {
- Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode)));
- return true;
- }
-
- virtual bool OnOpenArray() {
- Push(Array, CheckNewObject(PyList_New(0)));
-
- return true;
- }
-
- virtual bool OnCloseArray() {
- return Consume(Pop(Array));
- }
- };
-}
-
-PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) {
- TContext ctx(internKeys, internVals, mayUnicode);
-
- if (!len) {
- ythrow yexception() << "parse error: zero length input string";
- }
-
- if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) {
- ythrow yexception() << "parse error";
- }
-
- auto& s = ctx.S;
-
- if (!s || s.top().Kind != Value) {
- ythrow yexception() << "shit happen";
- }
-
- return s.top().Val.Release();
-}
diff --git a/library/python/json/loads.h b/library/python/json/loads.h
deleted file mode 100644
index 62dcdf6f21..0000000000
--- a/library/python/json/loads.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#pragma once
-
-#include <Python.h>
-
-PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys = false, bool internVals = false, bool mayUnicode = false);
diff --git a/library/python/json/loads.pyx b/library/python/json/loads.pyx
deleted file mode 100644
index 82e5c6dce7..0000000000
--- a/library/python/json/loads.pyx
+++ /dev/null
@@ -1,14 +0,0 @@
-from libcpp cimport bool
-
-cdef extern from "library/python/json/loads.h":
- object LoadJsonFromString(const char*, size_t, bool internKeys, bool internVals, bool mayUnicode) except +
-
-
-def loads(s, intern_keys = False, intern_vals = False, may_unicode = False):
- if isinstance(s, unicode):
- s = s.encode('utf-8')
-
- try:
- return LoadJsonFromString(s, len(s), intern_keys, intern_vals, may_unicode)
- except Exception as e:
- raise ValueError(str(e))
diff --git a/library/python/json/ya.make b/library/python/json/ya.make
deleted file mode 100644
index 74a82de9d8..0000000000
--- a/library/python/json/ya.make
+++ /dev/null
@@ -1,17 +0,0 @@
-PY23_LIBRARY()
-
-PEERDIR(
- contrib/python/simplejson
- library/cpp/json/fast_sax
-)
-
-PY_SRCS(
- __init__.py
- loads.pyx
-)
-
-SRCS(
- loads.cpp
-)
-
-END()
diff --git a/library/python/par_apply/__init__.py b/library/python/par_apply/__init__.py
deleted file mode 100644
index 19b89ae843..0000000000
--- a/library/python/par_apply/__init__.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import sys
-import threading
-import six
-
-from six.moves import queue
-
-
-def par_apply(seq, func, thr_num, join_polling=None):
- if thr_num < 2:
- for x in seq:
- yield func(x)
-
- return
-
- in_q = queue.Queue()
- out_q = queue.Queue()
-
- def enumerate_blocks():
- n = 0
-
- for b in seq:
- yield n, [b]
- n += 1
-
- yield n, None
-
- def iter_out():
- n = 0
- d = {}
-
- while True:
- if n in d:
- r = d[n]
- del d[n]
- n += 1
-
- yield r
- else:
- res = out_q.get()
-
- d[res[0]] = res
-
- out_iter = iter_out()
-
- def wait_block():
- for x in out_iter:
- return x
-
- def iter_compressed():
- p = 0
-
- for n, b in enumerate_blocks():
- in_q.put((n, b))
-
- while n > p + (thr_num * 2):
- p, b, c = wait_block()
-
- if not b:
- return
-
- yield p, c
-
- while True:
- p, b, c = wait_block()
-
- if not b:
- return
-
- yield p, c
-
- def proc():
- while True:
- data = in_q.get()
-
- if data is None:
- return
-
- n, b = data
-
- if b:
- try:
- res = (func(b[0]), None)
- except Exception:
- res = (None, sys.exc_info())
- else:
- res = (None, None)
-
- out_q.put((n, b, res))
-
- thrs = [threading.Thread(target=proc) for i in range(0, thr_num)]
-
- for t in thrs:
- t.start()
-
- try:
- for p, c in iter_compressed():
- res, err = c
-
- if err:
- six.reraise(*err)
-
- yield res
- finally:
- for t in thrs:
- in_q.put(None)
-
- for t in thrs:
- if join_polling is not None:
- while True:
- t.join(join_polling)
- if not t.is_alive():
- break
- else:
- t.join()
diff --git a/library/python/par_apply/ya.make b/library/python/par_apply/ya.make
deleted file mode 100644
index b14592ab79..0000000000
--- a/library/python/par_apply/ya.make
+++ /dev/null
@@ -1,11 +0,0 @@
-PY23_LIBRARY()
-
-PEERDIR(
- contrib/python/six
-)
-
-PY_SRCS(
- __init__.py
-)
-
-END()