diff options
author | say <say@yandex-team.com> | 2023-12-08 17:40:48 +0300 |
---|---|---|
committer | say <say@yandex-team.com> | 2023-12-08 19:58:59 +0300 |
commit | 914f57e3243f53dd89dd3adb4d8b6d35c47f46ce (patch) | |
tree | 98a1f1f1f5e2c38db3a78da10aeb7eb7d4e952e0 /library/python | |
parent | e61293d91ee7c923944f627d8e1138bcb17cacad (diff) | |
download | ydb-914f57e3243f53dd89dd3adb4d8b6d35c47f46ce.tar.gz |
Make support_retries() and get_type() methods of object instead of class ones. Reduce get_type_name() usage.
Diffstat (limited to 'library/python')
28 files changed, 0 insertions, 2925 deletions
diff --git a/library/python/archive/__init__.py b/library/python/archive/__init__.py deleted file mode 100644 index a6e032ff4c..0000000000 --- a/library/python/archive/__init__.py +++ /dev/null @@ -1,266 +0,0 @@ -import errno -import logging -import os -import random -import shutil -import stat -import string -import sys - -import six - -import libarchive -import libarchive._libarchive as _libarchive - -from pathlib2 import PurePath - -logger = logging.getLogger(__name__) - -GZIP = "gzip" -ZSTD = "zstd" - -ENCODING = "utf-8" - - -class ConfigureError(Exception): - pass - - -class Level(object): - def __init__(self, level): - self.level = level - - -class Compression(object): - Fast = Level(1) - Default = Level(2) - Best = Level(3) - - -def get_compression_level(filter_name, level): - if level is None or not filter_name: - return None - elif isinstance(level, Level): - level = { - GZIP: { - Compression.Fast: 1, - Compression.Default: 6, - Compression.Best: 9, - }, - ZSTD: { - Compression.Fast: 1, - Compression.Default: 3, - Compression.Best: 22, - }, - }[filter_name][level] - return level - - -def encode(value, encoding): - return value.encode(encoding) - - -def extract_tar(tar_file_path, output_dir, strip_components=None, fail_on_duplicates=True): - output_dir = encode(output_dir, ENCODING) - _make_dirs(output_dir) - with libarchive.Archive(tar_file_path, mode="rb") as tarfile: - for e in tarfile: - p = _strip_prefix(e.pathname, strip_components) - if not p: - continue - dest = os.path.join(output_dir, encode(p, ENCODING)) - if e.pathname.endswith("/"): - _make_dirs(dest) - continue - - if strip_components and fail_on_duplicates: - if os.path.exists(dest): - raise Exception( - "The file {} is duplicated because of strip_components={}".format(dest, strip_components) - ) - - _make_dirs(os.path.dirname(dest)) - - if e.ishardlink(): - src = os.path.join(output_dir, _strip_prefix(e.hardlink, strip_components)) - _hardlink(src, dest) - continue - if e.issym(): - src = _strip_prefix(e.linkname, strip_components) - _symlink(src, dest) - continue - - with open(dest, "wb") as f: - if hasattr(os, "fchmod"): - os.fchmod(f.fileno(), e.mode & 0o7777) - libarchive.call_and_check( - _libarchive.archive_read_data_into_fd, - tarfile._a, - tarfile._a, - f.fileno(), - ) - - -def _strip_prefix(path, strip_components): - if not strip_components: - return path - p = PurePath(path) - stripped = str(p.relative_to(*p.parts[:strip_components])) - return '' if stripped == '.' else stripped - - -def tar( - paths, - output, - compression_filter=None, - compression_level=None, - fixed_mtime=None, - onerror=None, - postprocess=None, - dereference=False, -): - if isinstance(paths, six.string_types): - paths = [paths] - - if isinstance(output, six.string_types): - temp_tar_path, stream = ( - output + "." + "".join(random.sample(string.ascii_lowercase, 8)), - None, - ) - else: - temp_tar_path, stream = None, output - - compression_level = get_compression_level(compression_filter, compression_level) - - try: - if compression_filter: - filter_name = compression_filter - if compression_level is not None: - filter_opts = {"compression-level": str(compression_level)} - else: - filter_opts = {} - # force gzip don't store mtime of the original file being compressed (http://www.gzip.org/zlib/rfc-gzip.html#file-format) - if fixed_mtime is not None and compression_filter == GZIP: - filter_opts["timestamp"] = "" - else: - filter_name = filter_opts = None - - with libarchive.Archive( - stream or temp_tar_path, - mode="wb", - format="gnu", - filter=filter_name, - filter_opts=filter_opts, - fixed_mtime=fixed_mtime, - ) as tarfile: - # determine order if fixed_mtime is specified to produce stable archive - paths = paths if fixed_mtime is None else sorted(paths) - - for p in paths: - if type(p) == tuple: - path, arcname = p - else: - path, arcname = p, os.path.basename(p) - - if os.path.isdir(path): - for root, dirs, files in os.walk(path, followlinks=dereference): - if fixed_mtime is None: - entries = dirs + files - else: - entries = sorted(dirs) + sorted(files) - - reldir = os.path.relpath(root, path) - for f in entries: - _writepath( - tarfile, - os.path.join(root, f), - os.path.normpath(os.path.join(arcname, reldir, f)), - onerror, - postprocess, - dereference, - ) - else: - if not os.path.exists(path): - raise OSError("Specified path doesn't exist: {}".format(path)) - _writepath(tarfile, path, arcname, onerror, postprocess, dereference) - - if temp_tar_path: - os.rename(temp_tar_path, output) - except Exception: - if temp_tar_path and os.path.exists(temp_tar_path): - os.remove(temp_tar_path) - raise - - -def _writepath(tarfile, src, dst, onerror, postprocess, dereference): - def tar_writepath(src, dst): - st = os.lstat(src) - if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode) or stat.S_ISLNK(st.st_mode): - if dereference and stat.S_ISLNK(st.st_mode): - src = os.path.realpath(src) - - tarfile.writepath(src, dst) - - if postprocess: - postprocess(src, dst, st.st_mode) - else: - logger.debug("Skipping non-regular file '%s' (stat: %s)", src, st) - - try: - return tar_writepath(src, dst) - except Exception as e: - if isinstance(e, OSError) and e.errno == errno.ENOENT: - logger.debug( - "Skipping missing file '%s' - looks like directory content has changed during archiving", - src, - ) - return - - if onerror: - if onerror(src, dst, sys.exc_info()): - return tar_writepath(src, dst) - else: - raise - - -def check_tar(tar_file_path): - if os.path.isfile(tar_file_path) or os.path.islink(tar_file_path): - return libarchive.is_archive(tar_file_path) - return False - - -def _make_dirs(path): - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST or not os.path.isdir(path): - raise - - -def _hardlink(src, dst): - if hasattr(os, "link"): - os.link(src, dst) - else: - shutil.copyfile(src, dst) - - -def _symlink(src, dst): - if hasattr(os, "symlink"): - os.symlink(src, dst) - else: - # Windows specific case - we cannot copy file right now, - # because it doesn't exist yet (and would be met later in the archive) or symlink is broken. - # Act like tar and tarfile - skip such symlinks - if os.path.exists(src): - shutil.copytree(src, dst) - - -def get_archive_filter_name(filename): - filters = libarchive.get_archive_filter_names(filename) - # https://a.yandex-team.ru/arc/trunk/arcadia/contrib/libs/libarchive/libarchive/archive_read.c?rev=5800047#L522 - assert filters[-1] == "none", filters - if len(filters) == 1: - return None - if len(filters) == 2: - return filters[0] - raise Exception("Archive has chain of filter: {}".format(filters)) diff --git a/library/python/archive/ya.make b/library/python/archive/ya.make deleted file mode 100644 index 5b86a45a42..0000000000 --- a/library/python/archive/ya.make +++ /dev/null @@ -1,19 +0,0 @@ -PY23_LIBRARY() - -STYLE_PYTHON() - -PY_SRCS( - __init__.py -) - -PEERDIR( - contrib/python/pathlib2 - contrib/python/python-libarchive -) - -END() - -RECURSE_FOR_TESTS( - benchmark - test -) diff --git a/library/python/cityhash/cityhash.pyx b/library/python/cityhash/cityhash.pyx deleted file mode 100644 index 6f0046f0d7..0000000000 --- a/library/python/cityhash/cityhash.pyx +++ /dev/null @@ -1,75 +0,0 @@ -from libcpp.pair cimport pair - -cdef extern from "util/system/types.h": - ctypedef unsigned long ui64 - - -cdef extern from "util/digest/city.h": - ui64 CityHash64(const char* buf, size_t len) nogil - pair[ui64, ui64] CityHash128(const char* buf, size_t len) nogil - ui64 CityHash64WithSeed(const char* buf, size_t len, ui64 seed) nogil - - -cdef extern from "library/python/cityhash/hash.h": - ui64 FileCityHash128WithSeedHigh64(const char* fpath) nogil except+ - ui64 FileCityHash64(const char* fpath) nogil except+ - - -def hash64(content): - cdef const char* s = content - cdef size_t size = len(content) - cdef ui64 res = 0 - - if size > 128: - with nogil: - res = CityHash64(s, size) - else: - res = CityHash64(s, size) - - return res - -def hash128(content): - cdef const char* s = content - cdef size_t size = len(content) - cdef pair[ui64, ui64] res = pair[ui64, ui64](0, 0) - - if size > 128: - with nogil: - res = CityHash128(s, size) - else: - res = CityHash128(s, size) - return res - - -def hash64seed(content, seed): - cdef const char* s = content - cdef size_t size = len(content) - cdef ui64 _seed = seed; - - if size > 128: - with nogil: - res = CityHash64WithSeed(s, size, _seed) - else: - res = CityHash64WithSeed(s, size, _seed) - - return res - - -def filehash64(path): - cdef const char* p = path - cdef ui64 res = 0 - - with nogil: - res = FileCityHash64(p) - - return res - - -def filehash128high64(path): - cdef const char* p = path - cdef ui64 res = 0 - - with nogil: - res = FileCityHash128WithSeedHigh64(p) - - return res diff --git a/library/python/cityhash/hash.cpp b/library/python/cityhash/hash.cpp deleted file mode 100644 index 17bd3a75f3..0000000000 --- a/library/python/cityhash/hash.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "hash.h" - -#include <util/digest/city.h> -#include <util/generic/string.h> -#include <util/memory/blob.h> -#include <util/system/file.h> -#include <util/system/fstat.h> - -void ReadFile(const char* fpath, TBlob& blob) { - TFile f(TString{fpath}, RdOnly | Seq); - const TFileStat fs(f); - auto size = fs.Size; - - if (size < (64 << 10)) { - blob = TBlob::FromFileContent(f, 0, size); - } else { - blob = TBlob::FromFile(f); - } -} - -ui64 FileCityHash128WithSeedHigh64(const char* fpath) { - TBlob blob; - ReadFile(fpath, blob); - const uint128 hash = CityHash128WithSeed((const char*)blob.Data(), blob.Size(), uint128(0, blob.Size())); - return Uint128High64(hash); -} - -ui64 FileCityHash64(const char* fpath) { - TBlob blob; - ReadFile(fpath, blob); - return CityHash64(static_cast<const char*>(blob.Data()), blob.Size()); -} diff --git a/library/python/cityhash/hash.h b/library/python/cityhash/hash.h deleted file mode 100644 index 64b22ba74b..0000000000 --- a/library/python/cityhash/hash.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once - -#include <util/system/defaults.h> - -ui64 FileCityHash128WithSeedHigh64(const char* fpath); -ui64 FileCityHash64(const char* fpath); diff --git a/library/python/cityhash/ya.make b/library/python/cityhash/ya.make deleted file mode 100644 index 7948e19389..0000000000 --- a/library/python/cityhash/ya.make +++ /dev/null @@ -1,16 +0,0 @@ -PY23_LIBRARY() - -SRCS( - hash.cpp -) - -PY_SRCS( - TOP_LEVEL - cityhash.pyx -) - -END() - -RECURSE_FOR_TESTS( - test -) diff --git a/library/python/codecs/__codecs.pyx b/library/python/codecs/__codecs.pyx deleted file mode 100644 index 42ec37fe88..0000000000 --- a/library/python/codecs/__codecs.pyx +++ /dev/null @@ -1,61 +0,0 @@ -import six - -from libcpp cimport bool - -from util.generic.string cimport TString, TStringBuf - - -def to_bytes(s): - try: - return s.encode('utf-8') - except AttributeError: - pass - - return s - - -def from_bytes(s): - if six.PY3: - return s.decode('utf-8') - - return s - - -cdef extern from "library/cpp/blockcodecs/codecs.h" namespace "NBlockCodecs": - cdef cppclass ICodec: - void Encode(TStringBuf data, TString& res) nogil - void Decode(TStringBuf data, TString& res) nogil - - cdef const ICodec* Codec(const TStringBuf& name) except + - cdef TString ListAllCodecsAsString() except + - - -def dumps(name, data): - name = to_bytes(name) - - cdef const ICodec* codec = Codec(TStringBuf(name, len(name))) - cdef TString res - cdef TStringBuf cdata = TStringBuf(data, len(data)) - - with nogil: - codec.Encode(cdata, res) - - return res.c_str()[:res.length()] - - -def loads(name, data): - name = to_bytes(name) - - cdef const ICodec* codec = Codec(TStringBuf(name, len(name))) - cdef TString res - cdef TStringBuf cdata = TStringBuf(data, len(data)) - - with nogil: - codec.Decode(cdata, res) - - return res.c_str()[:res.length()] - -def list_all_codecs(): - cdef TString res = ListAllCodecsAsString() - - return from_bytes(res.c_str()[:res.length()]).split(',') diff --git a/library/python/codecs/__init__.py b/library/python/codecs/__init__.py deleted file mode 100644 index b9fb00deb0..0000000000 --- a/library/python/codecs/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __codecs import loads, dumps, list_all_codecs # noqa diff --git a/library/python/codecs/ya.make b/library/python/codecs/ya.make deleted file mode 100644 index f42d115d5d..0000000000 --- a/library/python/codecs/ya.make +++ /dev/null @@ -1,16 +0,0 @@ -PY23_LIBRARY() - -PEERDIR( - library/cpp/blockcodecs - contrib/python/six -) - -PY_SRCS( - __init__.py -) - -BUILDWITH_CYTHON_CPP(__codecs.pyx) - -PY_REGISTER(__codecs) - -END() diff --git a/library/python/color/README.md b/library/python/color/README.md deleted file mode 100644 index 9deae40092..0000000000 --- a/library/python/color/README.md +++ /dev/null @@ -1,9 +0,0 @@ -Форк ((termcolor https://github.com/termcolor/termcolor/)) для PY23 с дополнительным функционалом. - -Может быть использован для конвертации текстовых спецификаций цвета (например, из markup) в esc-последовательности для корректного отображения в терминале. - -Пример использования: -```python -from library.python.color import tcolor -tcolor("some text", "green-bold-on_red") -> '\x1b[32m\x1b[41m\x1b[1msome text\x1b[0m' -``` diff --git a/library/python/color/__init__.py b/library/python/color/__init__.py deleted file mode 100644 index a70234945e..0000000000 --- a/library/python/color/__init__.py +++ /dev/null @@ -1,92 +0,0 @@ -from __future__ import print_function - -import copy -import os - -from termcolor import ATTRIBUTES, COLORS, HIGHLIGHTS, RESET - -__all__ = [ - "ATTRIBUTES", - "COLORS", - "HIGHLIGHTS", - "RESET", - "colored", - "cprint", - "tcolor", - "get_color_by_spec" -] - -ATTRIBUTES = copy.deepcopy(ATTRIBUTES) -ATTRIBUTES["light"] = ATTRIBUTES['bold'] - -COLORS = copy.deepcopy(COLORS) -COLORS['gray'] = COLORS['grey'] -COLORS['purple'] = COLORS['magenta'] -COLORS["reset"] = 0 - - -def get_code(code): - if os.getenv("ANSI_COLORS_DISABLED") is None: - return "\033[{}m".format(code) - return "" - - -def get_color_by_spec(color_spec): - color, on_color, attrs = get_spec(color_spec) - return get_color(color, on_color, attrs) - - -def get_color(color, on_color, attrs): - res = "" - - if color is not None: - res += get_code(COLORS[color]) - - if on_color is not None: - res += get_code(HIGHLIGHTS[on_color]) - - if attrs is not None: - for attr in attrs: - res += get_code(ATTRIBUTES[attr]) - - return res - - -def get_spec(color_spec): - """Parses string text color formatting specification. - - Arguments: - color_spec -- string spec for text color formatting, csv string with - `color` / `bg_color` / `attr` spec items having "-" as a delimiter. - - Returns a tuple: (color, bg-color, attributes list) - - Example: - get_spec("green-bold-on_red") -> (32, 41, [1]) - """ - parts = color_spec.split("-") - color = None - on_color = None - attrs = [] - for part in parts: - part = part.lower() - if part in COLORS: - color = part - if part in HIGHLIGHTS: - on_color = part - if part in ATTRIBUTES: - attrs.append(part) - return color, on_color, attrs - - -def tcolor(text, color_spec): - color, on_color, attrs = get_spec(color_spec) - return colored(text, color=color, on_color=on_color, attrs=attrs) - - -def colored(text, color=None, on_color=None, attrs=None): - return get_color(color, on_color, attrs) + text + get_code(COLORS["reset"]) - - -def cprint(text, color=None, on_color=None, attrs=None, **kwargs): - print((colored(text, color, on_color, attrs)), **kwargs) diff --git a/library/python/color/ya.make b/library/python/color/ya.make deleted file mode 100644 index ff6740b1d4..0000000000 --- a/library/python/color/ya.make +++ /dev/null @@ -1,13 +0,0 @@ -PY23_LIBRARY() - -LICENSE(MIT) - -PY_SRCS( - __init__.py -) - -PEERDIR( - contrib/python/termcolor -) - -END() diff --git a/library/python/compress/__init__.py b/library/python/compress/__init__.py deleted file mode 100644 index 380ec47dca..0000000000 --- a/library/python/compress/__init__.py +++ /dev/null @@ -1,147 +0,0 @@ -from io import open - -import struct -import json -import os -import logging - -import library.python.par_apply as lpp -import library.python.codecs as lpc - - -logger = logging.getLogger('compress') - - -def list_all_codecs(): - return sorted(frozenset(lpc.list_all_codecs())) - - -def find_codec(ext): - def ext_compress(x): - return lpc.dumps(ext, x) - - def ext_decompress(x): - return lpc.loads(ext, x) - - ext_decompress(ext_compress(b'')) - - return {'c': ext_compress, 'd': ext_decompress, 'n': ext} - - -def codec_for(path): - for ext in reversed(path.split('.')): - try: - return find_codec(ext) - except Exception as e: - logger.debug('in codec_for(): %s', e) - - raise Exception('unsupported file %s' % path) - - -def compress(fr, to, codec=None, fopen=open, threads=1): - if codec: - codec = find_codec(codec) - else: - codec = codec_for(to) - - func = codec['c'] - - def iter_blocks(): - with fopen(fr, 'rb') as f: - while True: - chunk = f.read(16 * 1024 * 1024) - - if chunk: - yield chunk - else: - yield b'' - - return - - def iter_results(): - info = { - 'codec': codec['n'], - } - - if fr: - info['size'] = os.path.getsize(fr) - - yield json.dumps(info, sort_keys=True) + '\n' - - for c in lpp.par_apply(iter_blocks(), func, threads): - yield c - - with fopen(to, 'wb') as f: - for c in iter_results(): - logger.debug('complete %s', len(c)) - f.write(struct.pack('<I', len(c))) - - try: - f.write(c) - except TypeError: - f.write(c.encode('utf-8')) - - -def decompress(fr, to, codec=None, fopen=open, threads=1): - def iter_chunks(): - with fopen(fr, 'rb') as f: - cnt = 0 - - while True: - ll = f.read(4) - - if ll: - ll = struct.unpack('<I', ll)[0] - - if ll: - if ll > 100000000: - raise Exception('broken stream') - - yield f.read(ll) - - cnt += ll - else: - if not cnt: - raise Exception('empty stream') - - return - - it = iter_chunks() - extra = [] - - for chunk in it: - hdr = {} - - try: - hdr = json.loads(chunk) - except Exception as e: - logger.info('can not parse header, suspect old format: %s', e) - extra.append(chunk) - - break - - def resolve_codec(): - if 'codec' in hdr: - return find_codec(hdr['codec']) - - if codec: - return find_codec(codec) - - return codec_for(fr) - - dc = resolve_codec()['d'] - - def iter_all_chunks(): - for x in extra: - yield x - - for x in it: - yield x - - with fopen(to, 'wb') as f: - for c in lpp.par_apply(iter_all_chunks(), dc, threads): - if c: - logger.debug('complete %s', len(c)) - f.write(c) - else: - break diff --git a/library/python/compress/ya.make b/library/python/compress/ya.make deleted file mode 100644 index bbf2a784e2..0000000000 --- a/library/python/compress/ya.make +++ /dev/null @@ -1,16 +0,0 @@ -PY23_LIBRARY() - -PEERDIR( - library/python/codecs - library/python/par_apply -) - -PY_SRCS( - __init__.py -) - -END() - -RECURSE_FOR_TESTS( - tests -) diff --git a/library/python/coredump_filter/README.md b/library/python/coredump_filter/README.md deleted file mode 100644 index 87b02e7985..0000000000 --- a/library/python/coredump_filter/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Coredump Filter - -- ABC: https://abc.yandex-team.ru/services/cores/ - -Библиотека для разбора (парсинга) трейсов отладчика gdb/lldb, python traceback-ов и minidump-ов. - -На вход принимает текст трейса, на выходе - распаршенный текст + возможность преобразовать -его в html-формат для удобства чтения. - - -## Основные клиенты -- [Агрегатор стектрейсов](https://wiki.yandex-team.ru/cores-aggregation) - - -## Правила разработки - -Библиотека написана таким образом, чтобы файл `__init__.py` мог работать -без внешних зависимостей. Это позволяет использовать библиотеку даже в Ter1-окружениях. -На данный момент этот инвариант не обложен тестами (и это следует исправить). diff --git a/library/python/coredump_filter/__init__.py b/library/python/coredump_filter/__init__.py deleted file mode 100644 index de0830cd43..0000000000 --- a/library/python/coredump_filter/__init__.py +++ /dev/null @@ -1,1500 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from __future__ import print_function - -import six -import enum -import datetime -import os -import re -import pkgutil -import sys -import hashlib -import json -import logging - -logger = logging.getLogger(__name__) - - -class CoredumpMode(enum.Enum): - GDB = "gdb" - LLDB = "lldb" - SDC_ASSERT = "sdc_assert" - - -ARCADIA_ROOT_LINK = "https://a.yandex-team.ru/arc/trunk/arcadia/" - -ARCADIA_ROOT_DIRS = [ - # hottest paths - "/util/", - "/contrib/", - "/library/", - "/kernel/", - "/build/", - "/search/", - - # "/gcc-4.8.2/", - - # system paths - # "/lib/x86_64-linux-gnu/", - - # all other stuff - "/aapi/", - "/addappter/", - "/adfox/", - "/admins/", - "/ads/", - "/adv/", - "/advq/", - "/afisha/", - "/afro/", - "/alet/", - "/alice/", - "/analytics/", - "/antiadblock/", - "/antirobot/", - "/apphost/", - "/april/", - "/arc/", - "/arcanum/", - "/augur/", - "/aurora/", - "/autocheck/", - "/balancer/", - "/bass/", - "/billing/", - "/bindings/", - "/browser/", - "/build/", - "/bunker/", - "/caas/", - "/canvas/", - "/captcha/", - "/catboost/", - "/certs/", - "/ci/", - "/clickhouse/", - "/client_analytics/", - "/cloud/", - "/cmicot/", - "/cmnt/", - "/comdep_analytics/", - "/commerce/", - "/contrib/", - "/crm/", - "/crowdsourcing/", - "/crypta/", - "/cv/", - "/datacloud/", - "/datalens/", - "/data-ui/", - "/devtools/", - "/dict/", - "/direct/", - "/disk/", - "/distribution/", - "/distribution_interface/", - "/district/", - "/dj/", - "/docs/", - "/douber/", - "/drive/", - "/edadeal/", - "/education/", - "/entity/", - "/ether/", - "/extdata/", - "/extsearch/", - "/FactExtract/", - "/fintech/", - "/frontend/", - "/fuzzing/", - "/games/", - "/gencfg/", - "/geobase/", - "/geoproduct/", - "/geosuggest/", - "/geotargeting/", - "/glycine/", - "/groups/", - "/haas/", - "/health/", - "/helpdesk/", - "/hitman/", - "/home/", - "/htf/", - "/hw_watcher/", - "/hypercube/", - "/iaas/", - "/iceberg/", - "/infra/", - "/intranet/", - "/inventori/", - "/ipreg/", - "/irt/", - "/it-office/", - "/jdk/", - "/juggler/", - "/junk/", - "/jupytercloud/", - "/kernel/", - "/keyboard/", - "/kikimr/", - "/kinopoisk/", - "/kinopoisk-ott/", - "/laas/", - "/lbs/", - "/library/", - "/load/", - "/locdoc/", - "/logbroker/", - "/logfeller/", - "/mail/", - "/mapreduce/", - "/maps/", - "/maps_adv/", - "/market/", - "/mb/", - "/mds/", - "/media/", - "/media-billing/", - "/media-crm/", - "/mediapers/", - "/mediaplanner/", - "/mediastat/", - "/media-stories/", - "/metrika/", - "/milab/", - "/ml/", - "/mlp/", - "/mlportal/", - "/mobile/", - "/modadvert/", - "/ms/", - "/mssngr/", - "/music/", - "/musickit/", - "/netsys/", - "/nginx/", - "/nirvana/", - "/noc/", - "/ofd/", - "/offline_data/", - "/opensource/", - "/orgvisits/", - "/ott/", - "/packages/", - "/partner/", - "/passport/", - "/payplatform/", - "/paysys/", - "/plus/", - "/portal/", - "/portalytics/", - "/pythia/", - "/quality/", - "/quasar/", - "/razladki/", - "/regulargeo/", - "/release_machine/", - "/rem/", - "/repo/", - "/rnd_toolbox/", - "/robot/", - "/rtc/", - "/rtline/", - "/rtmapreduce/", - "/rt-research/", - "/saas/", - "/samogon/", - "/samsara/", - "/sandbox/", - "/scarab/", - "/sdc/", - "/search/", - "/security/", - "/semantic-web/", - "/serp/", - "/sitesearch/", - "/skynet/", - "/smart_devices/", - "/smarttv/", - "/smm/", - "/solomon/", - "/specsearches/", - "/speechkit/", - "/sport/", - "/sprav/", - "/statbox/", - "/strm/", - "/suburban-trains/", - "/sup/", - "/switch/", - "/talents/", - "/tasklet/", - "/taxi/", - "/taxi_efficiency/", - "/testenv/", - "/testpalm/", - "/testpers/", - "/toloka/", - "/toolbox/", - "/tools/", - "/tracker/", - "/traffic/", - "/transfer_manager/", - "/travel/", - "/trust/", - "/urfu/", - "/vcs/", - "/velocity/", - "/vendor/", - "/vh/", - "/voicetech/", - "/weather/", - "/web/", - "/wmconsole/", - "/xmlsearch/", - "/yabs/", - "/yadoc/", - "/yandex_io/", - "/yaphone/", - "/ydf/", - "/ydo/", - "/yp/", - "/yql/", - "/ysite/", - "/yt/", - "/yweb/", - "/zen/", - "/zapravki/", - "/zen/", - "/zootopia/", - "/zora/", -] - -MY_PATH = os.path.dirname(os.path.abspath(__file__)) - -# 0.2.x uses stable hashing -CORE_PROC_VERSION = "0.2.1" - -ARCADIA_ROOT_SIGN = "$S/" -SIGNAL_NOT_FOUND = "signal not found" - - -class SourceRoot(object): - def __init__(self): - self.root = None - - def detect(self, source): - if not source: - # For example, regexp_4 - return - - if source.startswith("/-S/"): - return source[4:] - - if source.startswith("../"): - return source - - """ - if self.root is not None: - return self.root - """ - - min_pos = 100000 - for root_dir in ARCADIA_ROOT_DIRS: - pos = source.find(root_dir) - if pos < 0: - continue - - if pos < min_pos: - min_pos = pos - - if min_pos < len(source): - self.root = source[:min_pos + 1] - - def crop(self, source): - if not source: - return "" - - # detection attempt - self.detect(source) - - if self.root is not None: - return source.replace(self.root, ARCADIA_ROOT_SIGN, 1) - - # when traceback contains only ??, source root cannot be detected - return source - - -def highlight_func(s): - return ( - s - .replace("=", '<span class="symbol">=</span>') - .replace("(", '<span class="symbol">(</span>') - .replace(")", '<span class="symbol">)</span>') - ) - - -class FrameBase(object): - def __init__( - self, - frame_no=None, - addr="", - func="", - source="", - source_no="", - func_name="", - ): - self.frame_no = frame_no - self.addr = addr - self.func = func - self.source = source - self.source_no = source_no - self.func_name = func_name - - def __str__(self): - return "{}\t{}\t{}".format( - self.frame_no, - self.func, - self.source, - ) - - def to_json(self): - return { - "frame_no": self.frame_no, - "addr": self.addr, - "func": self.func, - "func_name": self.func_name, - "source": self.source, - "source_no": self.source_no, - } - - def fingerprint(self): - return self.func_name - - def cropped_source(self): - return self.source - - def raw(self): - return "{frame} {func} {source}".format( - frame=self.frame_no, - func=self.func, - source=self.source, - ) - - def html(self): - source, source_fmt = self.find_source() - return ( - '<span class="frame">{frame}</span>' - '<span class="func">{func}</span> ' - '<span class="source">{source}</span>{source_fmt}\n'.format( - frame=self.frame_no, - func=highlight_func(self.func.replace("&", "&").replace("<", "<")), - source=source, - source_fmt=source_fmt, - ) - ) - - -class LLDBFrame(FrameBase): - SOURCE_NO_RE = re.compile(r"(.*?[^\d]):(\d+)") - FUNC_RE = re.compile(r"(\w+\s)?(\w+[\w,:,_,<,>,\s,*]+).*$") - - def __init__( - self, - frame_no=None, - addr="", - func="", - source="", - source_no="", - func_name="", - ): - super(LLDBFrame, self).__init__( - frame_no=frame_no, - addr=addr, - func=func, - source=source, - source_no=source_no, - func_name=func_name, - ) - # .source calculation - - func = func.replace("(anonymous namespace)::", "") - m = self.FUNC_RE.match(func) - if m: - self.func_name = m.group(2) # overwrite func_name if name is in func - - if source_no: - self.source_no = source_no - self.source = source - else: - m = self.SOURCE_NO_RE.match(source) - if m: - self.source = m.group(1) - self.source_no = m.group(2) - - def find_source(self): - """ - :return: pair (source, source_fmt) - """ - source_fmt = "" - - if self.source_no: - source_fmt = ' +<span class="source-no">{}</span>'.format(self.source_no) - - return self.source, source_fmt - - -class GDBFrame(FrameBase): - SOURCE_NO_RE = re.compile(r"(.*):(\d+)") - # #7 0x00007f105f3a221d in NAppHost::NTransport::TCoroutineExecutor::Poll (this=0x7f08416a5d00, - # tasks=empty TVector (capacity=32)) at /-S/apphost/lib/executors/executors.cpp:373 - # We match with non-greedy regex a function name that cannot contain equal sign - FUNC_RE = re.compile(r"(.*?) \(([a-zA-Z0-9_]+=.*|)\)$") # function with kwarg-params or zero params - - def __init__( - self, - frame_no=None, - addr="", - func="", - source="", - source_no="", - func_name="", - ): - super(GDBFrame, self).__init__( - frame_no=frame_no, - addr=addr, - func=func, - source=source, - source_no=source_no, - func_name=func_name, - ) - if not source_no: - m = self.SOURCE_NO_RE.match(source) - if m: - self.source = m.group(1) - self.source_no = m.group(2) - if not func_name: - m = self.FUNC_RE.match(self.func) - if m: - self.func_name = m.group(1) - - def find_source(self): - """ - Returns link to arcadia if source is path in arcadia, else just string with path - :return: pair (source, source_fmt) - """ - source_fmt = "" - source = "" - link = "" - dirs = self.source.split("/") - if len(dirs) > 1 and "/{dir}/".format(dir=dirs[1]) in ARCADIA_ROOT_DIRS: - link = self.source.replace(ARCADIA_ROOT_SIGN, ARCADIA_ROOT_LINK) - else: - source = self.source - if self.source_no: - source_fmt = ' +<span class="source-no">{}</span>'.format(self.source_no) - if link: - link += "?#L{line}".format(line=self.source_no) - - if link: - source = '<a href="{link}">{source}</a>'.format( - link=link, - source=self.source, - ) - return source, source_fmt - - -class SDCAssertFrame(LLDBFrame): - - def __init__( - self, - frame_no=None, - addr="", - func="", - source="", - source_no="", - func_name="", - ): - super(SDCAssertFrame, self).__init__( - frame_no=frame_no, - addr=addr, - func=func, - source=source, - source_no=source_no, - func_name=func_name, - ) - # .source calculation - - self.source = source or "" - if isinstance(source_no, str) and len(source_no) > 0: - source_no = int(source_no, 16) - self.source_no = source_no or "" - - m = self.FUNC_RE.match(func) - if m: - self.func_name = m.group(2) - - -class Stack(object): - # priority classes - LOW_IMPORTANT = 25 - DEFAULT_IMPORTANT = 50 - SUSPICIOUS_IMPORTANT = 75 - MAX_IMPORTANT = 100 - - # default coredump's type - mode = CoredumpMode.GDB - - max_depth = None - - fingerprint_blacklist = [ - # bottom frames - "raise", - "abort", - "__gnu_cxx::__verbose_terminate_handler", - "_cxxabiv1::__terminate", - "std::terminate", - "__cxxabiv1::__cxa_throw", - # top frames - "start_thread", - "clone", - "??", - "__clone", - "__libc_start_main", - "_start", - "__nanosleep", - ] - - fingerprint_blacklist_prefix = () - - suspicious_functions = [ - "CheckedDelete", - "NPrivate::Panic", - "abort", - "close_all_fds", - "__cxa_throw", - ] - - low_important_functions_eq = [ - "poll ()", - "recvfrom ()", - "pthread_join ()", - ] - - low_important_functions_match = [ - "TCommonSockOps::SendV", - "WaitD (", - "SleepT (", - "Join (", - "epoll_wait", - "nanosleep", - "pthread_cond_wait", - "pthread_cond_timedwait", - "gsignal", - "std::detail::_", - "std::type_info", - "ros::NodeHandle", - ] - - def __init__( - self, - lines=None, - source_root=None, - thread_ptr=0, - thread_id=None, - frames=None, - important=None, - stack_fp=None, - fingerprint_hash=None, - stream=None, - mode=None, # type: CoredumpMode - ignore_bad_frames=True, - ): - self.lines = lines - self.source_root = source_root - self.thread_ptr = thread_ptr - self.thread_id = thread_id - if mode is not None: - self.mode = mode - - self.frames = frames or [] - if self.frames and isinstance(frames[0], dict): - self.frames = [self.frame_factory(f) for f in self.frames] - self.important = important or self.DEFAULT_IMPORTANT - if thread_id == 1: - self.important = self.MAX_IMPORTANT - self.fingerprint_hash = fingerprint_hash - self.stack_fp = stack_fp - self.stream = stream - self.ignore_bad_frames = ignore_bad_frames - - def to_json(self): - """Should be symmetric with `from_json`.""" - return { - "mode": self.mode.value, - "frames": [frame.to_json() for frame in self.frames], - "important": self.important, - } - - @staticmethod - def from_json(stack): - """Should be symmetric with `to_json`.""" - mode = CoredumpMode(stack.get("mode", CoredumpMode.GDB.value)) - # old serialization format support, should be dropped - lldb_mode = stack.get("lldb_mode", False) - if lldb_mode: - mode = CoredumpMode.LLDB - - unpacked_stack = { - "mode": mode, - "frames": stack["frames"], - "important": stack.get("important", Stack.DEFAULT_IMPORTANT), - } - return mode, unpacked_stack - - def frame_factory(self, args): - frames = { - CoredumpMode.GDB: GDBFrame, - CoredumpMode.LLDB: LLDBFrame, - CoredumpMode.SDC_ASSERT: SDCAssertFrame, - } - - class_object = frames.get(self.mode) - if not class_object: - raise Exception("Invalid mode: {}".format(self.mode.value)) - - return class_object(**args) - - def low_important(self): - return self.important <= self.LOW_IMPORTANT - - def check_importance(self, frame): - # raised priority cannot be lowered - if self.important > self.DEFAULT_IMPORTANT: - return - - # detect suspicious stacks - for name in self.suspicious_functions: - if name in frame.func: - self.important = self.SUSPICIOUS_IMPORTANT - return - - for name in self.low_important_functions_eq: - if name == frame.func: - self.important = self.LOW_IMPORTANT - - for name in self.low_important_functions_match: - if name in frame.func: - self.important = self.LOW_IMPORTANT - - def push_frame(self, frame): - self.check_importance(frame) - # ignore duplicated frames - if len(self.frames) and self.frames[-1].frame_no == frame.frame_no: - return - self.frames.append(frame) - - def parse(self): - """ - Parse one stack - """ - assert self.lines is not None - assert self.source_root is not None - - for line in self.lines: - match_found = False - for regexp in self.REGEXPS: - m = regexp.match(line) - if m: - frame_args = m.groupdict() - if "source" in frame_args: - frame_args["source"] = self.source_root.crop(frame_args["source"]) - - self.push_frame(self.frame_factory(frame_args)) - match_found = True - break - - if not match_found: - self.bad_frame(line) - - def bad_frame(self, line): - if self.ignore_bad_frames: - logger.warning("Bad frame: %s", line) - return - - raise Exception("Bad frame: `{}`, frame `{}`".format( - line, - self.debug(return_result=True), - )) - - def debug(self, return_result=False): - if self.low_important(): - return "" - - res = "\n".join([str(f) for f in self.frames]) - res += "----------------------------- DEBUG END\n" - if return_result: - return res - - self.stream.write(res) - - def raw(self): - return "\n".join([frame.raw() for frame in self.frames]) - - def html(self, same_hash=False, same_count=1, return_result=False): - ans = "" - pre_class = "important-" + str(self.important) - if same_hash: - pre_class += " same-hash" - - ans += '<pre class="{0}">'.format(pre_class) - if not same_hash: - ans += '<a name="stack{0}"></a>'.format(self.hash()) - - ans += '<span class="hash"><a href="#stack{0}">#{0}</a>, {1} stack(s) with same hash</span>\n'.format( - self.hash(), same_count, - ) - - for f in self.frames: - ans += f.html() - ans += "</pre>\n" - - if return_result: - return ans - - self.stream.write(ans) - - def fingerprint(self, max_num=None): - """ - Stack fingerprint: concatenation of non-common stack frames - FIXME: wipe away `max_num` - """ - stack_fp = list() - len_frames = min((max_num or len(self.frames)), len(self.frames)) - - for f in self.frames[:len_frames]: - fp = f.fingerprint() - if not fp: - continue - - if fp in self.fingerprint_blacklist: - continue - - if fp.startswith(self.fingerprint_blacklist_prefix): - continue - - if fp in stack_fp: - # FIXME: optimize duplicate remover: check only previous frame - # see also `push_frame` - continue - - stack_fp.append(fp.strip()) - - if self.max_depth is not None and len(stack_fp) >= self.max_depth: - break - - return "\n".join(stack_fp) - - def simple_html(self, num_frames=None): - if not num_frames: - num_frames = len(self.frames) - pre_class = "important-0" - ans = '<pre class="{0}">'.format(pre_class) - for i in range(min(len(self.frames), num_frames)): - ans += self.frames[i].html() - ans += "</pre>\n" - return ans - - def __str__(self): - return "\n".join(map(str, self.frames)) - - def hash(self, max_num=None): - """ - Entire stack hash for merging same stacks - """ - if self.fingerprint_hash is None: - self.fingerprint_hash = int(hashlib.md5(self.fingerprint(max_num).encode("utf-8")).hexdigest()[0:15], 16) - - return self.fingerprint_hash - - -class GDBStack(Stack): - - mode = CoredumpMode.GDB - - REGEXPS = [ - # #6 0x0000000001d9203e in NAsio::TIOService::TImpl::Run (this=0x137b1ec00) at /place/ - # sandbox-data/srcdir/arcadia_cache/library/neh/asio/io_service_impl.cpp:77 - - re.compile( - r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*) at (?P<source>.*)" - ), - - # #5 TCondVar::WaitD (this=this@entry=0x10196b2b8, mutex=..., deadLine=..., deadLine@entry=...) - # at /place/sandbox-data/srcdir/arcadia_cache/util/system/condvar.cpp:150 - re.compile( - r"#(?P<frame_no>\d+)[ \t]+(?P<func>.*) at (?P<source>/.*)" - ), - - # #0 0x00007faf8eb31d84 in pthread_cond_wait@@GLIBC_2.3.2 () - # from /lib/x86_64-linux-gnu/libpthread.so.0 - re.compile( - r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*) from (?P<source>.*)" - ), - - # #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 - re.compile( - r"#(?P<frame_no>\d+)[ \t]+ (?P<func>.*) at (?P<source>.*)" - ), - - # #10 0x0000000000000000 in ?? () - re.compile( - r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*)" - ), - ] - - -class LLDBStack(Stack): - - mode = CoredumpMode.LLDB - - REGEXPS = [ - # 0x00007fd7b300a886 libthird_Uparty_Sros_Sros_Ucomm_Sclients_Sroscpp_Sliblibroscpp.so` - # std::thread::_State_impl<std::thread::_Invoker<std::tuple<ros::PollManager::PollManager()::$_1> > >::_M_run() - # [inlined] ros::PollManager::threadFunc(this=0x00007fd7b30dab20) at poll_manager.cpp:75:16 # noqa - re.compile( - r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+).+inlined]\s(?P<func>.+)\sat\s(?P<source>.+)" - ), - - re.compile( - r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+).+?`(?P<func>.+)\sat\s(?P<source>.+)" - ), - - # * frame #0: 0x00007fd7aee51f47 libc.so.6`gsignal + 199 - re.compile( - r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+)\s(?P<source>.+)`(?P<func>.+)\s\+\s(?P<source_no>\d+)" - ), - ] - - # Take not more than `max_depth` non-filtered frames into fingerprint - # See CORES-180 - max_depth = 10 - - fingerprint_blacklist = Stack.fingerprint_blacklist + [ - "ros::ros_wallsleep", - ] - - fingerprint_blacklist_prefix = Stack.fingerprint_blacklist_prefix + ( - "___lldb_unnamed_symbol", - "__gnu_cxx", - "__gthread", - "__pthread", - "decltype", - "myriapoda::BuildersRunner", - "non", - "std::_Function_handler", - "std::_Sp_counted_ptr_inplace", - "std::__invoke_impl", - "std::__invoke_result", - "std::__shared_ptr", - "std::conditional", - "std::shared_ptr", - "std::thread::_Invoker", - "std::thread::_State_impl", - "yandex::sdc::assert_details_", - ) - - suspicious_functions = Stack.suspicious_functions + [ - "Xml", - "boost", - "ros", - "supernode", - "tensorflow", - "yandex::sdc", - ] - - -class PythonStack(Stack): - - REGEXPS = [ - re.compile( - r'File "(?P<source>.*)", line (?P<source_no>\d+), in (?P<func_name>.*)' - ), - ] - - -class SDCAssertStack(LLDBStack): - - mode = CoredumpMode.SDC_ASSERT - - REGEXPS = [ - # 0: ./modules/_shared/libcore_Stools_Slibassert.so(yandex::sdc::assert_details_::PanicV(char const*, - # long, char const*, char const*, bool, char const*, __va_list_tag*) - # +0x2aa)[0x7fb83268feaa] - re.compile( - r"(?P<frame_no>\d+):\s(?P<source>.+.so)\((?P<func>.+)\+(?P<source_no>.+).+\[(?P<addr>0x[0-9a-f]+)" - ), - - re.compile( - r"(?P<frame_no>\d+):\s(?P<source>\w+)\((?P<func>.+)\+(?P<source_no>.+).+\[(?P<addr>0x[0-9a-f]+)" - ) - ] - - -def parse_python_traceback(trace): - trace = trace.replace("/home/zomb-sandbox/client/", "/") - trace = trace.replace("/home/zomb-sandbox/tasks/", "/sandbox/") - trace = trace.split("\n") - exception = trace[-1] # noqa: F841 - trace = trace[1: -1] - pairs = zip(trace[::2], trace[1::2]) - stack = Stack(lines=[]) - for frame_no, (path, row) in enumerate(pairs): - # FIXME: wrap into generic tracer - m = PythonStack.REGEXPS[0].match(path.strip()) - if m: - frame_args = m.groupdict() - if not frame_args["source"].startswith("/"): - frame_args["source"] = "/" + frame_args["source"] - frame_args["frame_no"] = str(frame_no) - frame_args["func"] = row.strip() - stack.push_frame(GDBFrame(**frame_args)) - return [[stack]], [[stack.raw()]], 6 - - -def stack_factory(stack): - mode, unpacked_stack = Stack.from_json(stack) - - if mode == CoredumpMode.GDB: - return GDBStack(**unpacked_stack) - elif mode == CoredumpMode.LLDB: - return LLDBStack(**unpacked_stack) - elif mode == CoredumpMode.SDC_ASSERT: - return SDCAssertStack(**unpacked_stack) - - raise Exception("Invalid stack mode: {}. ".format(mode)) - - -def _read_file(file_name): - with open(file_name) as f: - return f.read() - - -def _file_contents(file_name): - """Return file (or resource) contents as unicode string.""" - if getattr(sys, "is_standalone_binary", False): - try: - contents = pkgutil.get_data(__package__, file_name) - except Exception: - raise IOError("Failed to find resource: " + file_name) - else: - if not os.path.exists(file_name): - file_name = os.path.join(MY_PATH, file_name) - contents = _read_file(file_name) - # py23 compatibility - if not isinstance(contents, six.text_type): - contents = contents.decode("utf-8") - return contents - - -def html_prolog(stream, timestamp): - prolog = _file_contents("prolog.html") - assert isinstance(prolog, six.string_types) - stream.write(prolog.format( - style=_file_contents("styles.css"), - coredump_js=_file_contents("core_proc.js"), - version=CORE_PROC_VERSION, - timestamp=timestamp, - )) - - -def html_epilog(stream): - stream.write(_file_contents("epilog.html")) - - -def detect_coredump_mode(core_text): - if len(core_text) == 0: - raise Exception("Text stacktrace is blank") - - if "Panic at unixtime" in core_text: - return CoredumpMode.SDC_ASSERT - - if "(lldb)" in core_text: - return CoredumpMode.LLDB - - return CoredumpMode.GDB - - -def filter_stack_dump( - core_text=None, - stack_file_name=None, - use_fingerprint=False, - sandbox_failed_task_id=None, - output_stream=None, - timestamp=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - ignore_bad_frames=True, -): - """New interface for stacktrace filtering. Preferred to use.""" - if not core_text and not stack_file_name: - raise ValueError("Either `core_text` or `stack_file_name` should be passed to `filter_stack_dump`. ") - - if core_text is not None and stack_file_name: - raise ValueError("Only one of `core_text` and `stack_file_name` cannot be specified for `filter_stack_dump`. ") - - if stack_file_name: - core_text = _read_file(stack_file_name) - # further processing uses `core_text` only - - mode = detect_coredump_mode(core_text) - core_lines = core_text.split("\n") - - return filter_stackdump( - file_lines=core_lines, - ignore_bad_frames=ignore_bad_frames, - mode=mode, - sandbox_failed_task_id=sandbox_failed_task_id, - stream=output_stream, - timestamp=timestamp, - use_fingerprint=use_fingerprint, - use_stream=output_stream is not None, - ) - - -class StackDumperBase(object): - - SANDBOX_TASK_RE = re.compile(r".*/[0-9a-f]/[0-9a-f]/([0-9]+)/.*") - MAX_SAME_STACKS = 30 - - def __init__( - self, - use_fingerprint, - sandbox_failed_task_id, - stream, - use_stream, - file_lines, - timestamp, - mode, - file_name=None, - ignore_bad_frames=True, - ): - self.source_root = SourceRoot() - self.use_fingerprint = use_fingerprint - self.sandbox_task_id = None - self.sandbox_failed_task_id = sandbox_failed_task_id - self.stream = stream or sys.stdout - self.use_stream = use_stream - self.file_name = file_name - self.file_lines = file_lines - self.timestamp = timestamp - self.ignore_bad_frames = ignore_bad_frames - self.stack_class = self.get_stack_class(mode) - - self.signal = SIGNAL_NOT_FOUND - self.stacks = [] - self._main_info = [] - - @staticmethod - def is_ignored_line(line): - raise NotImplementedError("Not implemented static method `is_ignored_line`. ") - - @staticmethod - def get_stack_class(mode): - exist_modes = {} - for cls in [GDBStack, LLDBStack, SDCAssertStack]: - current_mode = cls.mode - if current_mode in exist_modes: - raise Exception("Duplicate modes are disallowed. Repeated mode: `{}`".format(current_mode.value)) - exist_modes[current_mode] = cls - - if mode not in exist_modes: - raise Exception("Unexpected coredump processing mode: `{}`".format(mode.value)) - - return exist_modes[mode] - - def check_signal(self, line): - raise NotImplementedError("Not implemented `check_signal`.") - - def set_sandbox_task_id(self, task_id): - self.sandbox_task_id = task_id - - def add_main_line(self, line): - self._main_info.append(line) - - def add_stack(self, stack_lines, thread_id): - if not stack_lines: - return - - stack = self.stack_class( - lines=stack_lines, - source_root=self.source_root, - thread_id=thread_id, - stream=self.stream, - ignore_bad_frames=self.ignore_bad_frames, - ) - self.stacks.append(stack) - - def dump(self): - if self.file_lines is None: - # FIXME(mvel): LLDB is not handled here - self.file_lines = get_parsable_gdb_text(_read_file(self.file_name)) - - self._collect_stacks() - - for stack in self.stacks: - stack.parse() - # stack.debug() - - if self.use_stream: - if self.use_fingerprint: - for stack in self.stacks: - self.stream.write(stack.fingerprint() + "\n") - self.stream.write("--------------------------------------\n") - return - else: - html_prolog(self.stream, self.timestamp) - - if self.sandbox_task_id is not None: - self.stream.write( - '<div style="padding-top: 6px; font-size: 18px; font-weight: bold;">' - 'Coredumped binary build task: ' - '<a href="https://sandbox.yandex-team.ru/task/{0}">{0}</a></div>\n'.format( - self.sandbox_task_id - ) - ) - - if self.sandbox_failed_task_id is not None: - self.stream.write( - '<div style="padding-top: 6px; font-size: 18px; font-weight: bold;">' - 'Sandbox failed task: ' - '<a href="https://sandbox.yandex-team.ru/task/{0}">{0}</a></div>\n'.format( - self.sandbox_failed_task_id - ) - ) - - pre_class = "" - self.stream.write('<pre class="{0}">\n'.format(pre_class)) - for line in self._main_info: - self.stream.write(line.replace("&", "&").replace("<", "<") + "\n") - self.stream.write("</pre>\n") - - sorted_stacks = sorted(self.stacks, key=lambda x: (x.important, x.fingerprint()), reverse=True) - - prev_hash = None - all_hash_stacks = [] - cur_hash_stacks = [] - for stack in sorted_stacks: - if stack.hash() == 0: - continue - - if stack.hash() == prev_hash: - if len(cur_hash_stacks) < self.MAX_SAME_STACKS: - # do not collect too much - cur_hash_stacks.append(stack) - continue - - # hash changed - if cur_hash_stacks: - all_hash_stacks.append(cur_hash_stacks) - - prev_hash = stack.hash() - cur_hash_stacks = [stack, ] - - # push last - if cur_hash_stacks: - all_hash_stacks.append(cur_hash_stacks) - - if self.use_stream: - for cur_hash_stacks in all_hash_stacks: - same_hash = False - for stack in cur_hash_stacks: - stack.html(same_hash=same_hash, same_count=len(cur_hash_stacks)) - same_hash = True - - html_epilog(self.stream) - else: - raw_hash_stacks = [ - [stack.raw() for stack in common_hash_stacks] - for common_hash_stacks in all_hash_stacks - ] - return all_hash_stacks, raw_hash_stacks, self.signal - - def _collect_stacks(self): - stack_lines = [] - stack_detected = False - thread_id = None - - for line in self.file_lines: - line = line.strip() - if self.is_ignored_line(line): - continue - - if "Core was generated" in line: - m = self.SANDBOX_TASK_RE.match(line) - if m: - self.set_sandbox_task_id(int(m.group(1))) - - self.check_signal(line) - - # [Switching to thread 55 (Thread 0x7f100a94c700 (LWP 21034))] - # Thread 584 (Thread 0x7ff363c03700 (LWP 2124)): - - # see test2 and test3 - tm = self.THREAD_RE.match(line) - if tm: - stack_detected = True - self.add_stack( - stack_lines=stack_lines, - thread_id=thread_id, - ) - stack_lines = [] - thread_id = int(tm.group(1)) - continue - - if stack_detected: - stack_lines.append(line) - else: - self.add_main_line(line) - - # parse last stack - self.add_stack( - stack_lines=stack_lines, - thread_id=thread_id, - ) - - -class StackDumperGDB(StackDumperBase): - - SIGNAL_FLAG = "Program terminated with signal" - THREAD_RE = re.compile(r".*[Tt]hread (\d+) .*") - LINE_IN = re.compile(r"\d+\tin ") - - def is_ignored_line(self, line): - if not line: - return True - - if line.startswith("[New "): - # LWP, Thread, process - return True - - if line.startswith("[Thread "): - return True - - if line.startswith("Using "): - return True - - if line.startswith("warning:"): - return True - - if line.startswith("Python Exception"): - # TODO: handle this more carefully - return True - - if line[0] != "#" and "No such file or directory" in line: - return True - - if self.LINE_IN.match(line): - # see test1.txt for example - # 641 in /place/sandbox-data/srcdir/arcadia/library/coroutine/engine/impl.h - return True - - return False - - def check_signal(self, line): - if self.SIGNAL_FLAG in line: - self.signal = line[line.find(self.SIGNAL_FLAG) + len(self.SIGNAL_FLAG):].split(",")[0] - - -class StackDumperLLDB(StackDumperBase): - - SIGNAL_FLAG = "stop reason = signal" - - THREAD_RE = re.compile(r".*thread #(\d+), .*") - - SKIP_LINES = { - "(lldb) bt all", - "(lldb) script import sys", - "(lldb) target create", - "Core file", - - # Drop signal interceptor call - # * frame #0: 0x00007efd49042fb7 libc.so.6`__GI___libc_sigaction at sigaction.c:54 - # TODO(epsilond1): Set MAX_IMPORTANT for some thread - "__GI___libc_sigaction", - - # Drop unnamed symbols at lines like - # frame #4: 0x00007fd8054156df libstdc++.so.6`___lldb_unnamed_symbol440$$libstdc++.so.6 + 15 - "$$", - } - - @staticmethod - def is_ignored_line(line): - if not line: - return True - - for skip_line in StackDumperLLDB.SKIP_LINES: - if skip_line in line: - return True - return False - - def check_signal(self, line): - if self.SIGNAL_FLAG in line and self.signal == SIGNAL_NOT_FOUND: - self.signal = line.split()[-1] - - -class StackDumperSDCAssert(StackDumperBase): - - THREAD_RE = re.compile( - r"(\d+)(:\s)" - ) - - def is_ignored_line(self, line): - if not line: - return True - - return not re.match(self.THREAD_RE, line) - - def check_signal(self, line): - self.signal = SIGNAL_NOT_FOUND - - def _collect_stacks(self): - stack_lines = [] - for line in self.file_lines: - line = line.strip() - if self.is_ignored_line(line): - continue - stack_lines.append(line) - self.check_signal(line) - - self.add_stack( - stack_lines=stack_lines, - thread_id=0, - ) - - -def filter_stackdump( - file_name=None, - use_fingerprint=False, - sandbox_failed_task_id=None, - stream=None, - file_lines=None, - use_stream=True, - timestamp=None, - ignore_bad_frames=True, - mode=None, -): - if mode is None and file_name is not None: - mode = detect_coredump_mode(_read_file(file_name)) - if mode == CoredumpMode.GDB: - stack_dumper_cls = StackDumperGDB - elif mode == CoredumpMode.LLDB: - stack_dumper_cls = StackDumperLLDB - elif mode == CoredumpMode.SDC_ASSERT: - stack_dumper_cls = StackDumperSDCAssert - else: - raise Exception("Invalid mode: {}".format(mode.value)) - - dumper = stack_dumper_cls( - file_name=file_name, - use_fingerprint=use_fingerprint, - sandbox_failed_task_id=sandbox_failed_task_id, - stream=stream, - use_stream=use_stream, - file_lines=file_lines, - timestamp=timestamp, - ignore_bad_frames=ignore_bad_frames, - mode=mode, - ) - - return dumper.dump() - - -def get_parsable_gdb_text(core_text): - # FIXME(mvel): Check encoding? - # core_text = core_text.encode("ascii", "ignore").decode("ascii") - core_text = ( - core_text - # .replace("#", "\n#") # bug here - .replace("No core", "\nNo core") - .replace("[New", "\n[New") - .replace("\n\n", "\n") - ) - - return core_text.split("\n") - - -if __name__ == "__main__": - if len(sys.argv) < 2: - sys.stderr.write( - """Traceback filter "Tri Korochki" -https://wiki.yandex-team.ru/cores-aggregation/ -Usage: - core_proc.py <traceback.txt> [-f|--fingerprint] - core_proc.py -v|--version -""" - ) - sys.exit(1) - - if sys.argv[1] == "--version" or sys.argv[1] == "-v": - if os.system("svn info 2>/dev/null | grep '^Revision'") != 0: - print(CORE_PROC_VERSION) - sys.exit(0) - - sandbox_failed_task_id = None - - use_fingerprint = False - if len(sys.argv) >= 3: - if sys.argv[2] == "-f" or sys.argv[2] == "--fingerprint": - use_fingerprint = True - sandbox_failed_task_id = sys.argv[2] - - filter_stack_dump( - core_text=_read_file(sys.argv[1]), - use_fingerprint=use_fingerprint, - sandbox_failed_task_id=sandbox_failed_task_id, - output_stream=sys.stdout, - ) - - -""" -Stack group is a `Stack` objects list with the same hash (fingerprint). -""" - - -class StackEncoder(json.JSONEncoder): - """Stack JSON serializer.""" - - def default(self, obj): - if isinstance(obj, Stack): - return obj.to_json() - - return json.JSONEncoder.default(obj) - - -def serialize_stacks(stack_groups): - """ - Serialize list of stack groups to string (using JSON format). - - :param stack_groups: list of stack groups. - :return: JSON serialized to string - """ - return json.dumps(stack_groups, cls=StackEncoder) - - -def deserialize_stacks(stack_groups_str): - """ - Restore JSON-serialized stack data into stack groups. - - :param stack_groups_str: JSON-serialized data. - :return: list of stack groups - """ - stack_groups_json = json.loads(stack_groups_str) - # please do not use `map` hell here, it's impossible to debug - all_stacks = [ - [stack_factory(stack) for stack in stacks] - for stacks in stack_groups_json - ] - return all_stacks diff --git a/library/python/coredump_filter/core_proc.js b/library/python/coredump_filter/core_proc.js deleted file mode 100644 index 15413adeae..0000000000 --- a/library/python/coredump_filter/core_proc.js +++ /dev/null @@ -1,21 +0,0 @@ - - -$(document).ready(function() { - $('#show-same-stacks').click(function() { - var stacks = $('.same-hash'); - for (var i = 0; i < stacks.length; ++i) - $(stacks[i]).show(); - $('#show-same-stacks').hide(); - $('#hide-same-stacks').show(); - return false; - }); - - $('#hide-same-stacks').click(function() { - var stacks = $('.same-hash'); - for (var i = 0; i < stacks.length; ++i) - $(stacks[i]).hide(); - $('#hide-same-stacks').hide(); - $('#show-same-stacks').show(); - return false; - }); -}); diff --git a/library/python/coredump_filter/epilog.html b/library/python/coredump_filter/epilog.html deleted file mode 100644 index b317cc2a91..0000000000 --- a/library/python/coredump_filter/epilog.html +++ /dev/null @@ -1,2 +0,0 @@ - </body> -</html>
\ No newline at end of file diff --git a/library/python/coredump_filter/prolog.html b/library/python/coredump_filter/prolog.html deleted file mode 100644 index f102a7210d..0000000000 --- a/library/python/coredump_filter/prolog.html +++ /dev/null @@ -1,24 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <style>{style}</style> - <script src="https://yastatic.net/jquery/1.7.1/jquery.min.js"></script> - <script>{coredump_js}</script> - </head> - <body> - <h1>Coredump report generated on {timestamp} by Coredump/traceback filter - <i><a href="http://mvel.at.yandex-team.ru/2373">Tri Korochki</a></i> - ({version}) - </h1> - <h3>Author: <a href="https://staff.yandex-team.ru/mvel">mvel@</a> aka Mikhail Veltishchev</h3> - <h3>© Yandex LLC. All rights reversed</h3> - <div class="legend"> - <ul style="line-height: 22px"> - <li><span class="important-100">Problem</span> stacks</li> - <li><span class="important-75">Suspicious</span> stacks</li> - <li><span class="important-50">Active</span> stacks</li> - <li><span class="important-25">Non-active</span> stacks</li> - </ul> - <a class="show-same-hash" id="show-same-stacks" href="#">Show same stacks</a> - <a class="show-same-hash" id="hide-same-stacks" href="#" style="display: none">Hide same stacks</a> - </div> diff --git a/library/python/coredump_filter/styles.css b/library/python/coredump_filter/styles.css deleted file mode 100644 index fdd09ce09e..0000000000 --- a/library/python/coredump_filter/styles.css +++ /dev/null @@ -1,116 +0,0 @@ -body { - font-family: sans-serif; - font-size: 12px; -} - -a { - text-decoration: none; - color: #486DEC; -} - -.frame { - color: #7f7f7f; - display: inline-block; - width: 30px; -} - -.addr { - color: #999999; - padding-right: 10px; -} - -.func { - color: #7f0000; - word-wrap: normal; -} - -.source { - color: #007f00; -} - -.symbol { - color: #0000ff; -} - -h1 { - font-size: 1.5em; - margin-top: .1em; - margin-bottom: .2em; -} - -h3 { - font-size: 1em; - margin-top: .1em; - margin-bottom: .2em; -} - -pre { - overflow-x: auto; - margin: 6px 0px 6px 0px; - padding: 0px 12px 6px 12px; - position: relative; -} - -pre.important-25 { - background-color: #eeeeee; -} - -span.important-25 { - background-color: #eeeeee; - padding: 3px; -} - -pre.important-50 { - background-color: #e7ffe7; -} - -span.important-50 { - background-color: #e7ffe7; - padding: 3px; -} - -pre.important-75 { - background-color: #ffffcc; -} - -span.important-75 { - background-color: #ffffcc; - padding: 3px; -} - -pre.important-100 { - background-color: #ffdddd; -} - -span.important-100 { - background-color: #ffdddd; - padding: 3px; -} - -a.show-same-hash { - padding: 0px 20px 0px 20px; -} - -/* hidden by default */ -.same-hash { - display: none; -} - -span.hash { - position: absolute; - top: 3px; right: 3px; -} - -div.legend { - position: absolute; - z-index: 1; - top: 5px; - right: 8px; - border: 1px solid #7f7f7f; - border-radius: 3px; - padding: 0px 20px 3px 0px; -} - -div.legend ul { - margin: 3px 0px 3px 0px; -} diff --git a/library/python/coredump_filter/ya.make b/library/python/coredump_filter/ya.make deleted file mode 100644 index fc8ec1a45f..0000000000 --- a/library/python/coredump_filter/ya.make +++ /dev/null @@ -1,23 +0,0 @@ -PY23_LIBRARY() - -PY_SRCS( - __init__.py -) - -RESOURCE_FILES( - PREFIX library/python/coredump_filter/ - core_proc.js - epilog.html - prolog.html - styles.css -) - -IF(PYTHON2) - PEERDIR(contrib/deprecated/python/enum34) -ENDIF() - -END() - -RECURSE( - tests -) diff --git a/library/python/json/__init__.py b/library/python/json/__init__.py deleted file mode 100644 index c6420d5e6d..0000000000 --- a/library/python/json/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -from library.python.json.loads import loads as _loads -from simplejson import loads as _sj_loads - - -def loads(*args, **kwargs): - try: - return _loads(*args, **kwargs) - except Exception as e: - if 'invalid syntax at token' in str(e): - kwargs.pop('intern_keys', None) - kwargs.pop('intern_vals', None) - kwargs.pop('may_unicode', None) - return _sj_loads(*args, **kwargs) - - raise - - -from simplejson import load, dump, dumps # noqa - - -def read_file(file_name, **kwargs): - """ - Read file and return its parsed json contents. - - All kwargs will be proxied to `json.load` method as is. - - :param file_name: file with json contents - :return: parsed json contents - """ - with open(file_name) as f: - return load(f, **kwargs) - - -def write_file(file_name, contents, **kwargs): - """ - Dump json data to file. - - All kwargs will be proxied to `json.dump` method as is. - - :param file_name: file to dump to - :param contents: JSON-serializable object - """ - with open(file_name, "w") as f: - dump(contents, f, **kwargs) diff --git a/library/python/json/loads.cpp b/library/python/json/loads.cpp deleted file mode 100644 index 19cdb096ae..0000000000 --- a/library/python/json/loads.cpp +++ /dev/null @@ -1,246 +0,0 @@ -#include "loads.h" - -#include <Python.h> - -#include <library/cpp/json/fast_sax/parser.h> - -#include <util/generic/algorithm.h> -#include <util/generic/stack.h> -#include <util/generic/vector.h> -#include <util/generic/ylimits.h> -#include <util/string/ascii.h> - -using namespace NJson; - -namespace { - enum EKind { - Undefined, - Array, - Dict, - Value, - Key, - }; - - static inline TStringBuf ToStr(EKind kind) noexcept { - switch (kind) { - case Undefined: - return TStringBuf("Undefined"); - - case Array: - return TStringBuf("Array"); - - case Dict: - return TStringBuf("Dict"); - - case Value: - return TStringBuf("Value"); - - case Key: - return TStringBuf("Key"); - } - - Y_UNREACHABLE(); - } - - struct TUnref { - static inline void Destroy(PyObject* o) noexcept { - Py_XDECREF(o); - } - }; - - using TObjectPtr = TAutoPtr<PyObject, TUnref>; - - static inline TObjectPtr BuildBool(bool val) noexcept { - if (val) { - Py_RETURN_TRUE; - } - - Py_RETURN_FALSE; - } - - // Translate python exceptions from object-creating functions into c++ exceptions - // Such errors are reported by returning nullptr - // When a python error is set and C++ exception is caught by Cython wrapper, - // Python exception is propagated, while C++ exception is discarded. - PyObject* CheckNewObject(PyObject* obj) { - Y_ENSURE(obj != nullptr, "got python exception"); - return obj; - } - - void CheckRetcode(int retcode) { - Y_ENSURE(retcode == 0, "got python exception"); - } - - static inline TObjectPtr BuildSmall(long val) { -#if PY_VERSION_HEX >= 0x03000000 - return CheckNewObject(PyLong_FromLong(val)); -#else - return CheckNewObject(PyInt_FromLong(val)); -#endif - } - - PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) { -#if PY_VERSION_HEX >= 0x03000000 - Y_UNUSED(mayUnicode); - PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size()); - if (intern) { - PyUnicode_InternInPlace(&pyStr); - } -#else - const bool needUnicode = mayUnicode && !AllOf(str, IsAscii); - PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size()) - : PyString_FromStringAndSize(str.data(), str.size()); - if (intern && !needUnicode) { - PyString_InternInPlace(&pyStr); - } -#endif - return pyStr; - } - - struct TVal { - EKind Kind = Undefined; - TObjectPtr Val; - - inline TVal() noexcept - : Kind(Undefined) - { - } - - inline TVal(EKind kind, TObjectPtr val) noexcept - : Kind(kind) - , Val(val) - { - } - }; - - static inline TObjectPtr NoneRef() noexcept { - Py_RETURN_NONE; - } - - struct TContext: public TJsonCallbacks { - const bool InternKeys; - const bool InternVals; - const bool MayUnicode; - TStack<TVal, TVector<TVal>> S; - - inline TContext(bool internKeys, bool internVals, bool mayUnicode) - : TJsonCallbacks(true) - , InternKeys(internKeys) - , InternVals(internVals) - , MayUnicode(mayUnicode) - { - S.emplace(); - } - - inline bool Consume(TObjectPtr o) { - auto& t = S.top(); - - if (t.Kind == Array) { - CheckRetcode(PyList_Append(t.Val.Get(), o.Get())); - } else if (t.Kind == Key) { - auto key = S.top().Val; - - S.pop(); - - CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get())); - } else { - t = TVal(Value, o); - } - - return true; - } - - inline TObjectPtr Pop(EKind expect) { - auto res = S.top(); - - S.pop(); - - if (res.Kind != expect) { - ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")"; - } - - return res.Val; - } - - inline void Push(EKind kind, TObjectPtr object) { - S.push(TVal(kind, object)); - } - - virtual bool OnNull() { - return Consume(NoneRef()); - } - - virtual bool OnBoolean(bool v) { - return Consume(BuildBool(v)); - } - - virtual bool OnInteger(long long v) { - if (v >= (long long)Min<long>()) { - return Consume(BuildSmall((long)v)); - } - - return Consume(CheckNewObject(PyLong_FromLongLong(v))); - } - - virtual bool OnUInteger(unsigned long long v) { - if (v <= (unsigned long long)Max<long>()) { - return Consume(BuildSmall((long)v)); - } - - return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v))); - } - - virtual bool OnDouble(double v) { - return Consume(CheckNewObject(PyFloat_FromDouble(v))); - } - - virtual bool OnString(const TStringBuf& v) { - return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode))); - } - - virtual bool OnOpenMap() { - Push(Dict, CheckNewObject(PyDict_New())); - - return true; - } - - virtual bool OnCloseMap() { - return Consume(Pop(Dict)); - } - - virtual bool OnMapKey(const TStringBuf& k) { - Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode))); - return true; - } - - virtual bool OnOpenArray() { - Push(Array, CheckNewObject(PyList_New(0))); - - return true; - } - - virtual bool OnCloseArray() { - return Consume(Pop(Array)); - } - }; -} - -PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) { - TContext ctx(internKeys, internVals, mayUnicode); - - if (!len) { - ythrow yexception() << "parse error: zero length input string"; - } - - if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) { - ythrow yexception() << "parse error"; - } - - auto& s = ctx.S; - - if (!s || s.top().Kind != Value) { - ythrow yexception() << "shit happen"; - } - - return s.top().Val.Release(); -} diff --git a/library/python/json/loads.h b/library/python/json/loads.h deleted file mode 100644 index 62dcdf6f21..0000000000 --- a/library/python/json/loads.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#include <Python.h> - -PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys = false, bool internVals = false, bool mayUnicode = false); diff --git a/library/python/json/loads.pyx b/library/python/json/loads.pyx deleted file mode 100644 index 82e5c6dce7..0000000000 --- a/library/python/json/loads.pyx +++ /dev/null @@ -1,14 +0,0 @@ -from libcpp cimport bool - -cdef extern from "library/python/json/loads.h": - object LoadJsonFromString(const char*, size_t, bool internKeys, bool internVals, bool mayUnicode) except + - - -def loads(s, intern_keys = False, intern_vals = False, may_unicode = False): - if isinstance(s, unicode): - s = s.encode('utf-8') - - try: - return LoadJsonFromString(s, len(s), intern_keys, intern_vals, may_unicode) - except Exception as e: - raise ValueError(str(e)) diff --git a/library/python/json/ya.make b/library/python/json/ya.make deleted file mode 100644 index 74a82de9d8..0000000000 --- a/library/python/json/ya.make +++ /dev/null @@ -1,17 +0,0 @@ -PY23_LIBRARY() - -PEERDIR( - contrib/python/simplejson - library/cpp/json/fast_sax -) - -PY_SRCS( - __init__.py - loads.pyx -) - -SRCS( - loads.cpp -) - -END() diff --git a/library/python/par_apply/__init__.py b/library/python/par_apply/__init__.py deleted file mode 100644 index 19b89ae843..0000000000 --- a/library/python/par_apply/__init__.py +++ /dev/null @@ -1,114 +0,0 @@ -import sys -import threading -import six - -from six.moves import queue - - -def par_apply(seq, func, thr_num, join_polling=None): - if thr_num < 2: - for x in seq: - yield func(x) - - return - - in_q = queue.Queue() - out_q = queue.Queue() - - def enumerate_blocks(): - n = 0 - - for b in seq: - yield n, [b] - n += 1 - - yield n, None - - def iter_out(): - n = 0 - d = {} - - while True: - if n in d: - r = d[n] - del d[n] - n += 1 - - yield r - else: - res = out_q.get() - - d[res[0]] = res - - out_iter = iter_out() - - def wait_block(): - for x in out_iter: - return x - - def iter_compressed(): - p = 0 - - for n, b in enumerate_blocks(): - in_q.put((n, b)) - - while n > p + (thr_num * 2): - p, b, c = wait_block() - - if not b: - return - - yield p, c - - while True: - p, b, c = wait_block() - - if not b: - return - - yield p, c - - def proc(): - while True: - data = in_q.get() - - if data is None: - return - - n, b = data - - if b: - try: - res = (func(b[0]), None) - except Exception: - res = (None, sys.exc_info()) - else: - res = (None, None) - - out_q.put((n, b, res)) - - thrs = [threading.Thread(target=proc) for i in range(0, thr_num)] - - for t in thrs: - t.start() - - try: - for p, c in iter_compressed(): - res, err = c - - if err: - six.reraise(*err) - - yield res - finally: - for t in thrs: - in_q.put(None) - - for t in thrs: - if join_polling is not None: - while True: - t.join(join_polling) - if not t.is_alive(): - break - else: - t.join() diff --git a/library/python/par_apply/ya.make b/library/python/par_apply/ya.make deleted file mode 100644 index b14592ab79..0000000000 --- a/library/python/par_apply/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -PY23_LIBRARY() - -PEERDIR( - contrib/python/six -) - -PY_SRCS( - __init__.py -) - -END() |