diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 13:26:22 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 15:44:45 +0300 |
commit | 0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch) | |
tree | 291d72dbd7e9865399f668c84d11ed86fb190bbf /library/python | |
parent | cb2c8d75065e5b3c47094067cb4aa407d4813298 (diff) | |
download | ydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz |
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'library/python')
40 files changed, 3285 insertions, 0 deletions
diff --git a/library/python/archive/__init__.py b/library/python/archive/__init__.py new file mode 100644 index 0000000000..a6e032ff4c --- /dev/null +++ b/library/python/archive/__init__.py @@ -0,0 +1,266 @@ +import errno +import logging +import os +import random +import shutil +import stat +import string +import sys + +import six + +import libarchive +import libarchive._libarchive as _libarchive + +from pathlib2 import PurePath + +logger = logging.getLogger(__name__) + +GZIP = "gzip" +ZSTD = "zstd" + +ENCODING = "utf-8" + + +class ConfigureError(Exception): + pass + + +class Level(object): + def __init__(self, level): + self.level = level + + +class Compression(object): + Fast = Level(1) + Default = Level(2) + Best = Level(3) + + +def get_compression_level(filter_name, level): + if level is None or not filter_name: + return None + elif isinstance(level, Level): + level = { + GZIP: { + Compression.Fast: 1, + Compression.Default: 6, + Compression.Best: 9, + }, + ZSTD: { + Compression.Fast: 1, + Compression.Default: 3, + Compression.Best: 22, + }, + }[filter_name][level] + return level + + +def encode(value, encoding): + return value.encode(encoding) + + +def extract_tar(tar_file_path, output_dir, strip_components=None, fail_on_duplicates=True): + output_dir = encode(output_dir, ENCODING) + _make_dirs(output_dir) + with libarchive.Archive(tar_file_path, mode="rb") as tarfile: + for e in tarfile: + p = _strip_prefix(e.pathname, strip_components) + if not p: + continue + dest = os.path.join(output_dir, encode(p, ENCODING)) + if e.pathname.endswith("/"): + _make_dirs(dest) + continue + + if strip_components and fail_on_duplicates: + if os.path.exists(dest): + raise Exception( + "The file {} is duplicated because of strip_components={}".format(dest, strip_components) + ) + + _make_dirs(os.path.dirname(dest)) + + if e.ishardlink(): + src = os.path.join(output_dir, _strip_prefix(e.hardlink, strip_components)) + _hardlink(src, dest) + continue + if e.issym(): + src = _strip_prefix(e.linkname, strip_components) + _symlink(src, dest) + continue + + with open(dest, "wb") as f: + if hasattr(os, "fchmod"): + os.fchmod(f.fileno(), e.mode & 0o7777) + libarchive.call_and_check( + _libarchive.archive_read_data_into_fd, + tarfile._a, + tarfile._a, + f.fileno(), + ) + + +def _strip_prefix(path, strip_components): + if not strip_components: + return path + p = PurePath(path) + stripped = str(p.relative_to(*p.parts[:strip_components])) + return '' if stripped == '.' else stripped + + +def tar( + paths, + output, + compression_filter=None, + compression_level=None, + fixed_mtime=None, + onerror=None, + postprocess=None, + dereference=False, +): + if isinstance(paths, six.string_types): + paths = [paths] + + if isinstance(output, six.string_types): + temp_tar_path, stream = ( + output + "." + "".join(random.sample(string.ascii_lowercase, 8)), + None, + ) + else: + temp_tar_path, stream = None, output + + compression_level = get_compression_level(compression_filter, compression_level) + + try: + if compression_filter: + filter_name = compression_filter + if compression_level is not None: + filter_opts = {"compression-level": str(compression_level)} + else: + filter_opts = {} + # force gzip don't store mtime of the original file being compressed (http://www.gzip.org/zlib/rfc-gzip.html#file-format) + if fixed_mtime is not None and compression_filter == GZIP: + filter_opts["timestamp"] = "" + else: + filter_name = filter_opts = None + + with libarchive.Archive( + stream or temp_tar_path, + mode="wb", + format="gnu", + filter=filter_name, + filter_opts=filter_opts, + fixed_mtime=fixed_mtime, + ) as tarfile: + # determine order if fixed_mtime is specified to produce stable archive + paths = paths if fixed_mtime is None else sorted(paths) + + for p in paths: + if type(p) == tuple: + path, arcname = p + else: + path, arcname = p, os.path.basename(p) + + if os.path.isdir(path): + for root, dirs, files in os.walk(path, followlinks=dereference): + if fixed_mtime is None: + entries = dirs + files + else: + entries = sorted(dirs) + sorted(files) + + reldir = os.path.relpath(root, path) + for f in entries: + _writepath( + tarfile, + os.path.join(root, f), + os.path.normpath(os.path.join(arcname, reldir, f)), + onerror, + postprocess, + dereference, + ) + else: + if not os.path.exists(path): + raise OSError("Specified path doesn't exist: {}".format(path)) + _writepath(tarfile, path, arcname, onerror, postprocess, dereference) + + if temp_tar_path: + os.rename(temp_tar_path, output) + except Exception: + if temp_tar_path and os.path.exists(temp_tar_path): + os.remove(temp_tar_path) + raise + + +def _writepath(tarfile, src, dst, onerror, postprocess, dereference): + def tar_writepath(src, dst): + st = os.lstat(src) + if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode) or stat.S_ISLNK(st.st_mode): + if dereference and stat.S_ISLNK(st.st_mode): + src = os.path.realpath(src) + + tarfile.writepath(src, dst) + + if postprocess: + postprocess(src, dst, st.st_mode) + else: + logger.debug("Skipping non-regular file '%s' (stat: %s)", src, st) + + try: + return tar_writepath(src, dst) + except Exception as e: + if isinstance(e, OSError) and e.errno == errno.ENOENT: + logger.debug( + "Skipping missing file '%s' - looks like directory content has changed during archiving", + src, + ) + return + + if onerror: + if onerror(src, dst, sys.exc_info()): + return tar_writepath(src, dst) + else: + raise + + +def check_tar(tar_file_path): + if os.path.isfile(tar_file_path) or os.path.islink(tar_file_path): + return libarchive.is_archive(tar_file_path) + return False + + +def _make_dirs(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST or not os.path.isdir(path): + raise + + +def _hardlink(src, dst): + if hasattr(os, "link"): + os.link(src, dst) + else: + shutil.copyfile(src, dst) + + +def _symlink(src, dst): + if hasattr(os, "symlink"): + os.symlink(src, dst) + else: + # Windows specific case - we cannot copy file right now, + # because it doesn't exist yet (and would be met later in the archive) or symlink is broken. + # Act like tar and tarfile - skip such symlinks + if os.path.exists(src): + shutil.copytree(src, dst) + + +def get_archive_filter_name(filename): + filters = libarchive.get_archive_filter_names(filename) + # https://a.yandex-team.ru/arc/trunk/arcadia/contrib/libs/libarchive/libarchive/archive_read.c?rev=5800047#L522 + assert filters[-1] == "none", filters + if len(filters) == 1: + return None + if len(filters) == 2: + return filters[0] + raise Exception("Archive has chain of filter: {}".format(filters)) diff --git a/library/python/archive/ya.make b/library/python/archive/ya.make new file mode 100644 index 0000000000..5b86a45a42 --- /dev/null +++ b/library/python/archive/ya.make @@ -0,0 +1,19 @@ +PY23_LIBRARY() + +STYLE_PYTHON() + +PY_SRCS( + __init__.py +) + +PEERDIR( + contrib/python/pathlib2 + contrib/python/python-libarchive +) + +END() + +RECURSE_FOR_TESTS( + benchmark + test +) diff --git a/library/python/cityhash/cityhash.pyx b/library/python/cityhash/cityhash.pyx new file mode 100644 index 0000000000..6f0046f0d7 --- /dev/null +++ b/library/python/cityhash/cityhash.pyx @@ -0,0 +1,75 @@ +from libcpp.pair cimport pair + +cdef extern from "util/system/types.h": + ctypedef unsigned long ui64 + + +cdef extern from "util/digest/city.h": + ui64 CityHash64(const char* buf, size_t len) nogil + pair[ui64, ui64] CityHash128(const char* buf, size_t len) nogil + ui64 CityHash64WithSeed(const char* buf, size_t len, ui64 seed) nogil + + +cdef extern from "library/python/cityhash/hash.h": + ui64 FileCityHash128WithSeedHigh64(const char* fpath) nogil except+ + ui64 FileCityHash64(const char* fpath) nogil except+ + + +def hash64(content): + cdef const char* s = content + cdef size_t size = len(content) + cdef ui64 res = 0 + + if size > 128: + with nogil: + res = CityHash64(s, size) + else: + res = CityHash64(s, size) + + return res + +def hash128(content): + cdef const char* s = content + cdef size_t size = len(content) + cdef pair[ui64, ui64] res = pair[ui64, ui64](0, 0) + + if size > 128: + with nogil: + res = CityHash128(s, size) + else: + res = CityHash128(s, size) + return res + + +def hash64seed(content, seed): + cdef const char* s = content + cdef size_t size = len(content) + cdef ui64 _seed = seed; + + if size > 128: + with nogil: + res = CityHash64WithSeed(s, size, _seed) + else: + res = CityHash64WithSeed(s, size, _seed) + + return res + + +def filehash64(path): + cdef const char* p = path + cdef ui64 res = 0 + + with nogil: + res = FileCityHash64(p) + + return res + + +def filehash128high64(path): + cdef const char* p = path + cdef ui64 res = 0 + + with nogil: + res = FileCityHash128WithSeedHigh64(p) + + return res diff --git a/library/python/cityhash/hash.cpp b/library/python/cityhash/hash.cpp new file mode 100644 index 0000000000..17bd3a75f3 --- /dev/null +++ b/library/python/cityhash/hash.cpp @@ -0,0 +1,32 @@ +#include "hash.h" + +#include <util/digest/city.h> +#include <util/generic/string.h> +#include <util/memory/blob.h> +#include <util/system/file.h> +#include <util/system/fstat.h> + +void ReadFile(const char* fpath, TBlob& blob) { + TFile f(TString{fpath}, RdOnly | Seq); + const TFileStat fs(f); + auto size = fs.Size; + + if (size < (64 << 10)) { + blob = TBlob::FromFileContent(f, 0, size); + } else { + blob = TBlob::FromFile(f); + } +} + +ui64 FileCityHash128WithSeedHigh64(const char* fpath) { + TBlob blob; + ReadFile(fpath, blob); + const uint128 hash = CityHash128WithSeed((const char*)blob.Data(), blob.Size(), uint128(0, blob.Size())); + return Uint128High64(hash); +} + +ui64 FileCityHash64(const char* fpath) { + TBlob blob; + ReadFile(fpath, blob); + return CityHash64(static_cast<const char*>(blob.Data()), blob.Size()); +} diff --git a/library/python/cityhash/hash.h b/library/python/cityhash/hash.h new file mode 100644 index 0000000000..64b22ba74b --- /dev/null +++ b/library/python/cityhash/hash.h @@ -0,0 +1,6 @@ +#pragma once + +#include <util/system/defaults.h> + +ui64 FileCityHash128WithSeedHigh64(const char* fpath); +ui64 FileCityHash64(const char* fpath); diff --git a/library/python/cityhash/ya.make b/library/python/cityhash/ya.make new file mode 100644 index 0000000000..7948e19389 --- /dev/null +++ b/library/python/cityhash/ya.make @@ -0,0 +1,16 @@ +PY23_LIBRARY() + +SRCS( + hash.cpp +) + +PY_SRCS( + TOP_LEVEL + cityhash.pyx +) + +END() + +RECURSE_FOR_TESTS( + test +) diff --git a/library/python/codecs/__codecs.pyx b/library/python/codecs/__codecs.pyx new file mode 100644 index 0000000000..42ec37fe88 --- /dev/null +++ b/library/python/codecs/__codecs.pyx @@ -0,0 +1,61 @@ +import six + +from libcpp cimport bool + +from util.generic.string cimport TString, TStringBuf + + +def to_bytes(s): + try: + return s.encode('utf-8') + except AttributeError: + pass + + return s + + +def from_bytes(s): + if six.PY3: + return s.decode('utf-8') + + return s + + +cdef extern from "library/cpp/blockcodecs/codecs.h" namespace "NBlockCodecs": + cdef cppclass ICodec: + void Encode(TStringBuf data, TString& res) nogil + void Decode(TStringBuf data, TString& res) nogil + + cdef const ICodec* Codec(const TStringBuf& name) except + + cdef TString ListAllCodecsAsString() except + + + +def dumps(name, data): + name = to_bytes(name) + + cdef const ICodec* codec = Codec(TStringBuf(name, len(name))) + cdef TString res + cdef TStringBuf cdata = TStringBuf(data, len(data)) + + with nogil: + codec.Encode(cdata, res) + + return res.c_str()[:res.length()] + + +def loads(name, data): + name = to_bytes(name) + + cdef const ICodec* codec = Codec(TStringBuf(name, len(name))) + cdef TString res + cdef TStringBuf cdata = TStringBuf(data, len(data)) + + with nogil: + codec.Decode(cdata, res) + + return res.c_str()[:res.length()] + +def list_all_codecs(): + cdef TString res = ListAllCodecsAsString() + + return from_bytes(res.c_str()[:res.length()]).split(',') diff --git a/library/python/codecs/__init__.py b/library/python/codecs/__init__.py new file mode 100644 index 0000000000..b9fb00deb0 --- /dev/null +++ b/library/python/codecs/__init__.py @@ -0,0 +1 @@ +from __codecs import loads, dumps, list_all_codecs # noqa diff --git a/library/python/codecs/ya.make b/library/python/codecs/ya.make new file mode 100644 index 0000000000..f42d115d5d --- /dev/null +++ b/library/python/codecs/ya.make @@ -0,0 +1,16 @@ +PY23_LIBRARY() + +PEERDIR( + library/cpp/blockcodecs + contrib/python/six +) + +PY_SRCS( + __init__.py +) + +BUILDWITH_CYTHON_CPP(__codecs.pyx) + +PY_REGISTER(__codecs) + +END() diff --git a/library/python/color/README.md b/library/python/color/README.md new file mode 100644 index 0000000000..9deae40092 --- /dev/null +++ b/library/python/color/README.md @@ -0,0 +1,9 @@ +Форк ((termcolor https://github.com/termcolor/termcolor/)) для PY23 с дополнительным функционалом. + +Может быть использован для конвертации текстовых спецификаций цвета (например, из markup) в esc-последовательности для корректного отображения в терминале. + +Пример использования: +```python +from library.python.color import tcolor +tcolor("some text", "green-bold-on_red") -> '\x1b[32m\x1b[41m\x1b[1msome text\x1b[0m' +``` diff --git a/library/python/color/__init__.py b/library/python/color/__init__.py new file mode 100644 index 0000000000..a70234945e --- /dev/null +++ b/library/python/color/__init__.py @@ -0,0 +1,92 @@ +from __future__ import print_function + +import copy +import os + +from termcolor import ATTRIBUTES, COLORS, HIGHLIGHTS, RESET + +__all__ = [ + "ATTRIBUTES", + "COLORS", + "HIGHLIGHTS", + "RESET", + "colored", + "cprint", + "tcolor", + "get_color_by_spec" +] + +ATTRIBUTES = copy.deepcopy(ATTRIBUTES) +ATTRIBUTES["light"] = ATTRIBUTES['bold'] + +COLORS = copy.deepcopy(COLORS) +COLORS['gray'] = COLORS['grey'] +COLORS['purple'] = COLORS['magenta'] +COLORS["reset"] = 0 + + +def get_code(code): + if os.getenv("ANSI_COLORS_DISABLED") is None: + return "\033[{}m".format(code) + return "" + + +def get_color_by_spec(color_spec): + color, on_color, attrs = get_spec(color_spec) + return get_color(color, on_color, attrs) + + +def get_color(color, on_color, attrs): + res = "" + + if color is not None: + res += get_code(COLORS[color]) + + if on_color is not None: + res += get_code(HIGHLIGHTS[on_color]) + + if attrs is not None: + for attr in attrs: + res += get_code(ATTRIBUTES[attr]) + + return res + + +def get_spec(color_spec): + """Parses string text color formatting specification. + + Arguments: + color_spec -- string spec for text color formatting, csv string with + `color` / `bg_color` / `attr` spec items having "-" as a delimiter. + + Returns a tuple: (color, bg-color, attributes list) + + Example: + get_spec("green-bold-on_red") -> (32, 41, [1]) + """ + parts = color_spec.split("-") + color = None + on_color = None + attrs = [] + for part in parts: + part = part.lower() + if part in COLORS: + color = part + if part in HIGHLIGHTS: + on_color = part + if part in ATTRIBUTES: + attrs.append(part) + return color, on_color, attrs + + +def tcolor(text, color_spec): + color, on_color, attrs = get_spec(color_spec) + return colored(text, color=color, on_color=on_color, attrs=attrs) + + +def colored(text, color=None, on_color=None, attrs=None): + return get_color(color, on_color, attrs) + text + get_code(COLORS["reset"]) + + +def cprint(text, color=None, on_color=None, attrs=None, **kwargs): + print((colored(text, color, on_color, attrs)), **kwargs) diff --git a/library/python/color/ya.make b/library/python/color/ya.make new file mode 100644 index 0000000000..ff6740b1d4 --- /dev/null +++ b/library/python/color/ya.make @@ -0,0 +1,13 @@ +PY23_LIBRARY() + +LICENSE(MIT) + +PY_SRCS( + __init__.py +) + +PEERDIR( + contrib/python/termcolor +) + +END() diff --git a/library/python/compress/__init__.py b/library/python/compress/__init__.py new file mode 100644 index 0000000000..380ec47dca --- /dev/null +++ b/library/python/compress/__init__.py @@ -0,0 +1,147 @@ +from io import open + +import struct +import json +import os +import logging + +import library.python.par_apply as lpp +import library.python.codecs as lpc + + +logger = logging.getLogger('compress') + + +def list_all_codecs(): + return sorted(frozenset(lpc.list_all_codecs())) + + +def find_codec(ext): + def ext_compress(x): + return lpc.dumps(ext, x) + + def ext_decompress(x): + return lpc.loads(ext, x) + + ext_decompress(ext_compress(b'')) + + return {'c': ext_compress, 'd': ext_decompress, 'n': ext} + + +def codec_for(path): + for ext in reversed(path.split('.')): + try: + return find_codec(ext) + except Exception as e: + logger.debug('in codec_for(): %s', e) + + raise Exception('unsupported file %s' % path) + + +def compress(fr, to, codec=None, fopen=open, threads=1): + if codec: + codec = find_codec(codec) + else: + codec = codec_for(to) + + func = codec['c'] + + def iter_blocks(): + with fopen(fr, 'rb') as f: + while True: + chunk = f.read(16 * 1024 * 1024) + + if chunk: + yield chunk + else: + yield b'' + + return + + def iter_results(): + info = { + 'codec': codec['n'], + } + + if fr: + info['size'] = os.path.getsize(fr) + + yield json.dumps(info, sort_keys=True) + '\n' + + for c in lpp.par_apply(iter_blocks(), func, threads): + yield c + + with fopen(to, 'wb') as f: + for c in iter_results(): + logger.debug('complete %s', len(c)) + f.write(struct.pack('<I', len(c))) + + try: + f.write(c) + except TypeError: + f.write(c.encode('utf-8')) + + +def decompress(fr, to, codec=None, fopen=open, threads=1): + def iter_chunks(): + with fopen(fr, 'rb') as f: + cnt = 0 + + while True: + ll = f.read(4) + + if ll: + ll = struct.unpack('<I', ll)[0] + + if ll: + if ll > 100000000: + raise Exception('broken stream') + + yield f.read(ll) + + cnt += ll + else: + if not cnt: + raise Exception('empty stream') + + return + + it = iter_chunks() + extra = [] + + for chunk in it: + hdr = {} + + try: + hdr = json.loads(chunk) + except Exception as e: + logger.info('can not parse header, suspect old format: %s', e) + extra.append(chunk) + + break + + def resolve_codec(): + if 'codec' in hdr: + return find_codec(hdr['codec']) + + if codec: + return find_codec(codec) + + return codec_for(fr) + + dc = resolve_codec()['d'] + + def iter_all_chunks(): + for x in extra: + yield x + + for x in it: + yield x + + with fopen(to, 'wb') as f: + for c in lpp.par_apply(iter_all_chunks(), dc, threads): + if c: + logger.debug('complete %s', len(c)) + f.write(c) + else: + break diff --git a/library/python/compress/ya.make b/library/python/compress/ya.make new file mode 100644 index 0000000000..bbf2a784e2 --- /dev/null +++ b/library/python/compress/ya.make @@ -0,0 +1,16 @@ +PY23_LIBRARY() + +PEERDIR( + library/python/codecs + library/python/par_apply +) + +PY_SRCS( + __init__.py +) + +END() + +RECURSE_FOR_TESTS( + tests +) diff --git a/library/python/coredump_filter/README.md b/library/python/coredump_filter/README.md new file mode 100644 index 0000000000..87b02e7985 --- /dev/null +++ b/library/python/coredump_filter/README.md @@ -0,0 +1,19 @@ +# Coredump Filter + +- ABC: https://abc.yandex-team.ru/services/cores/ + +Библиотека для разбора (парсинга) трейсов отладчика gdb/lldb, python traceback-ов и minidump-ов. + +На вход принимает текст трейса, на выходе - распаршенный текст + возможность преобразовать +его в html-формат для удобства чтения. + + +## Основные клиенты +- [Агрегатор стектрейсов](https://wiki.yandex-team.ru/cores-aggregation) + + +## Правила разработки + +Библиотека написана таким образом, чтобы файл `__init__.py` мог работать +без внешних зависимостей. Это позволяет использовать библиотеку даже в Ter1-окружениях. +На данный момент этот инвариант не обложен тестами (и это следует исправить). diff --git a/library/python/coredump_filter/__init__.py b/library/python/coredump_filter/__init__.py new file mode 100644 index 0000000000..de0830cd43 --- /dev/null +++ b/library/python/coredump_filter/__init__.py @@ -0,0 +1,1500 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import print_function + +import six +import enum +import datetime +import os +import re +import pkgutil +import sys +import hashlib +import json +import logging + +logger = logging.getLogger(__name__) + + +class CoredumpMode(enum.Enum): + GDB = "gdb" + LLDB = "lldb" + SDC_ASSERT = "sdc_assert" + + +ARCADIA_ROOT_LINK = "https://a.yandex-team.ru/arc/trunk/arcadia/" + +ARCADIA_ROOT_DIRS = [ + # hottest paths + "/util/", + "/contrib/", + "/library/", + "/kernel/", + "/build/", + "/search/", + + # "/gcc-4.8.2/", + + # system paths + # "/lib/x86_64-linux-gnu/", + + # all other stuff + "/aapi/", + "/addappter/", + "/adfox/", + "/admins/", + "/ads/", + "/adv/", + "/advq/", + "/afisha/", + "/afro/", + "/alet/", + "/alice/", + "/analytics/", + "/antiadblock/", + "/antirobot/", + "/apphost/", + "/april/", + "/arc/", + "/arcanum/", + "/augur/", + "/aurora/", + "/autocheck/", + "/balancer/", + "/bass/", + "/billing/", + "/bindings/", + "/browser/", + "/build/", + "/bunker/", + "/caas/", + "/canvas/", + "/captcha/", + "/catboost/", + "/certs/", + "/ci/", + "/clickhouse/", + "/client_analytics/", + "/cloud/", + "/cmicot/", + "/cmnt/", + "/comdep_analytics/", + "/commerce/", + "/contrib/", + "/crm/", + "/crowdsourcing/", + "/crypta/", + "/cv/", + "/datacloud/", + "/datalens/", + "/data-ui/", + "/devtools/", + "/dict/", + "/direct/", + "/disk/", + "/distribution/", + "/distribution_interface/", + "/district/", + "/dj/", + "/docs/", + "/douber/", + "/drive/", + "/edadeal/", + "/education/", + "/entity/", + "/ether/", + "/extdata/", + "/extsearch/", + "/FactExtract/", + "/fintech/", + "/frontend/", + "/fuzzing/", + "/games/", + "/gencfg/", + "/geobase/", + "/geoproduct/", + "/geosuggest/", + "/geotargeting/", + "/glycine/", + "/groups/", + "/haas/", + "/health/", + "/helpdesk/", + "/hitman/", + "/home/", + "/htf/", + "/hw_watcher/", + "/hypercube/", + "/iaas/", + "/iceberg/", + "/infra/", + "/intranet/", + "/inventori/", + "/ipreg/", + "/irt/", + "/it-office/", + "/jdk/", + "/juggler/", + "/junk/", + "/jupytercloud/", + "/kernel/", + "/keyboard/", + "/kikimr/", + "/kinopoisk/", + "/kinopoisk-ott/", + "/laas/", + "/lbs/", + "/library/", + "/load/", + "/locdoc/", + "/logbroker/", + "/logfeller/", + "/mail/", + "/mapreduce/", + "/maps/", + "/maps_adv/", + "/market/", + "/mb/", + "/mds/", + "/media/", + "/media-billing/", + "/media-crm/", + "/mediapers/", + "/mediaplanner/", + "/mediastat/", + "/media-stories/", + "/metrika/", + "/milab/", + "/ml/", + "/mlp/", + "/mlportal/", + "/mobile/", + "/modadvert/", + "/ms/", + "/mssngr/", + "/music/", + "/musickit/", + "/netsys/", + "/nginx/", + "/nirvana/", + "/noc/", + "/ofd/", + "/offline_data/", + "/opensource/", + "/orgvisits/", + "/ott/", + "/packages/", + "/partner/", + "/passport/", + "/payplatform/", + "/paysys/", + "/plus/", + "/portal/", + "/portalytics/", + "/pythia/", + "/quality/", + "/quasar/", + "/razladki/", + "/regulargeo/", + "/release_machine/", + "/rem/", + "/repo/", + "/rnd_toolbox/", + "/robot/", + "/rtc/", + "/rtline/", + "/rtmapreduce/", + "/rt-research/", + "/saas/", + "/samogon/", + "/samsara/", + "/sandbox/", + "/scarab/", + "/sdc/", + "/search/", + "/security/", + "/semantic-web/", + "/serp/", + "/sitesearch/", + "/skynet/", + "/smart_devices/", + "/smarttv/", + "/smm/", + "/solomon/", + "/specsearches/", + "/speechkit/", + "/sport/", + "/sprav/", + "/statbox/", + "/strm/", + "/suburban-trains/", + "/sup/", + "/switch/", + "/talents/", + "/tasklet/", + "/taxi/", + "/taxi_efficiency/", + "/testenv/", + "/testpalm/", + "/testpers/", + "/toloka/", + "/toolbox/", + "/tools/", + "/tracker/", + "/traffic/", + "/transfer_manager/", + "/travel/", + "/trust/", + "/urfu/", + "/vcs/", + "/velocity/", + "/vendor/", + "/vh/", + "/voicetech/", + "/weather/", + "/web/", + "/wmconsole/", + "/xmlsearch/", + "/yabs/", + "/yadoc/", + "/yandex_io/", + "/yaphone/", + "/ydf/", + "/ydo/", + "/yp/", + "/yql/", + "/ysite/", + "/yt/", + "/yweb/", + "/zen/", + "/zapravki/", + "/zen/", + "/zootopia/", + "/zora/", +] + +MY_PATH = os.path.dirname(os.path.abspath(__file__)) + +# 0.2.x uses stable hashing +CORE_PROC_VERSION = "0.2.1" + +ARCADIA_ROOT_SIGN = "$S/" +SIGNAL_NOT_FOUND = "signal not found" + + +class SourceRoot(object): + def __init__(self): + self.root = None + + def detect(self, source): + if not source: + # For example, regexp_4 + return + + if source.startswith("/-S/"): + return source[4:] + + if source.startswith("../"): + return source + + """ + if self.root is not None: + return self.root + """ + + min_pos = 100000 + for root_dir in ARCADIA_ROOT_DIRS: + pos = source.find(root_dir) + if pos < 0: + continue + + if pos < min_pos: + min_pos = pos + + if min_pos < len(source): + self.root = source[:min_pos + 1] + + def crop(self, source): + if not source: + return "" + + # detection attempt + self.detect(source) + + if self.root is not None: + return source.replace(self.root, ARCADIA_ROOT_SIGN, 1) + + # when traceback contains only ??, source root cannot be detected + return source + + +def highlight_func(s): + return ( + s + .replace("=", '<span class="symbol">=</span>') + .replace("(", '<span class="symbol">(</span>') + .replace(")", '<span class="symbol">)</span>') + ) + + +class FrameBase(object): + def __init__( + self, + frame_no=None, + addr="", + func="", + source="", + source_no="", + func_name="", + ): + self.frame_no = frame_no + self.addr = addr + self.func = func + self.source = source + self.source_no = source_no + self.func_name = func_name + + def __str__(self): + return "{}\t{}\t{}".format( + self.frame_no, + self.func, + self.source, + ) + + def to_json(self): + return { + "frame_no": self.frame_no, + "addr": self.addr, + "func": self.func, + "func_name": self.func_name, + "source": self.source, + "source_no": self.source_no, + } + + def fingerprint(self): + return self.func_name + + def cropped_source(self): + return self.source + + def raw(self): + return "{frame} {func} {source}".format( + frame=self.frame_no, + func=self.func, + source=self.source, + ) + + def html(self): + source, source_fmt = self.find_source() + return ( + '<span class="frame">{frame}</span>' + '<span class="func">{func}</span> ' + '<span class="source">{source}</span>{source_fmt}\n'.format( + frame=self.frame_no, + func=highlight_func(self.func.replace("&", "&").replace("<", "<")), + source=source, + source_fmt=source_fmt, + ) + ) + + +class LLDBFrame(FrameBase): + SOURCE_NO_RE = re.compile(r"(.*?[^\d]):(\d+)") + FUNC_RE = re.compile(r"(\w+\s)?(\w+[\w,:,_,<,>,\s,*]+).*$") + + def __init__( + self, + frame_no=None, + addr="", + func="", + source="", + source_no="", + func_name="", + ): + super(LLDBFrame, self).__init__( + frame_no=frame_no, + addr=addr, + func=func, + source=source, + source_no=source_no, + func_name=func_name, + ) + # .source calculation + + func = func.replace("(anonymous namespace)::", "") + m = self.FUNC_RE.match(func) + if m: + self.func_name = m.group(2) # overwrite func_name if name is in func + + if source_no: + self.source_no = source_no + self.source = source + else: + m = self.SOURCE_NO_RE.match(source) + if m: + self.source = m.group(1) + self.source_no = m.group(2) + + def find_source(self): + """ + :return: pair (source, source_fmt) + """ + source_fmt = "" + + if self.source_no: + source_fmt = ' +<span class="source-no">{}</span>'.format(self.source_no) + + return self.source, source_fmt + + +class GDBFrame(FrameBase): + SOURCE_NO_RE = re.compile(r"(.*):(\d+)") + # #7 0x00007f105f3a221d in NAppHost::NTransport::TCoroutineExecutor::Poll (this=0x7f08416a5d00, + # tasks=empty TVector (capacity=32)) at /-S/apphost/lib/executors/executors.cpp:373 + # We match with non-greedy regex a function name that cannot contain equal sign + FUNC_RE = re.compile(r"(.*?) \(([a-zA-Z0-9_]+=.*|)\)$") # function with kwarg-params or zero params + + def __init__( + self, + frame_no=None, + addr="", + func="", + source="", + source_no="", + func_name="", + ): + super(GDBFrame, self).__init__( + frame_no=frame_no, + addr=addr, + func=func, + source=source, + source_no=source_no, + func_name=func_name, + ) + if not source_no: + m = self.SOURCE_NO_RE.match(source) + if m: + self.source = m.group(1) + self.source_no = m.group(2) + if not func_name: + m = self.FUNC_RE.match(self.func) + if m: + self.func_name = m.group(1) + + def find_source(self): + """ + Returns link to arcadia if source is path in arcadia, else just string with path + :return: pair (source, source_fmt) + """ + source_fmt = "" + source = "" + link = "" + dirs = self.source.split("/") + if len(dirs) > 1 and "/{dir}/".format(dir=dirs[1]) in ARCADIA_ROOT_DIRS: + link = self.source.replace(ARCADIA_ROOT_SIGN, ARCADIA_ROOT_LINK) + else: + source = self.source + if self.source_no: + source_fmt = ' +<span class="source-no">{}</span>'.format(self.source_no) + if link: + link += "?#L{line}".format(line=self.source_no) + + if link: + source = '<a href="{link}">{source}</a>'.format( + link=link, + source=self.source, + ) + return source, source_fmt + + +class SDCAssertFrame(LLDBFrame): + + def __init__( + self, + frame_no=None, + addr="", + func="", + source="", + source_no="", + func_name="", + ): + super(SDCAssertFrame, self).__init__( + frame_no=frame_no, + addr=addr, + func=func, + source=source, + source_no=source_no, + func_name=func_name, + ) + # .source calculation + + self.source = source or "" + if isinstance(source_no, str) and len(source_no) > 0: + source_no = int(source_no, 16) + self.source_no = source_no or "" + + m = self.FUNC_RE.match(func) + if m: + self.func_name = m.group(2) + + +class Stack(object): + # priority classes + LOW_IMPORTANT = 25 + DEFAULT_IMPORTANT = 50 + SUSPICIOUS_IMPORTANT = 75 + MAX_IMPORTANT = 100 + + # default coredump's type + mode = CoredumpMode.GDB + + max_depth = None + + fingerprint_blacklist = [ + # bottom frames + "raise", + "abort", + "__gnu_cxx::__verbose_terminate_handler", + "_cxxabiv1::__terminate", + "std::terminate", + "__cxxabiv1::__cxa_throw", + # top frames + "start_thread", + "clone", + "??", + "__clone", + "__libc_start_main", + "_start", + "__nanosleep", + ] + + fingerprint_blacklist_prefix = () + + suspicious_functions = [ + "CheckedDelete", + "NPrivate::Panic", + "abort", + "close_all_fds", + "__cxa_throw", + ] + + low_important_functions_eq = [ + "poll ()", + "recvfrom ()", + "pthread_join ()", + ] + + low_important_functions_match = [ + "TCommonSockOps::SendV", + "WaitD (", + "SleepT (", + "Join (", + "epoll_wait", + "nanosleep", + "pthread_cond_wait", + "pthread_cond_timedwait", + "gsignal", + "std::detail::_", + "std::type_info", + "ros::NodeHandle", + ] + + def __init__( + self, + lines=None, + source_root=None, + thread_ptr=0, + thread_id=None, + frames=None, + important=None, + stack_fp=None, + fingerprint_hash=None, + stream=None, + mode=None, # type: CoredumpMode + ignore_bad_frames=True, + ): + self.lines = lines + self.source_root = source_root + self.thread_ptr = thread_ptr + self.thread_id = thread_id + if mode is not None: + self.mode = mode + + self.frames = frames or [] + if self.frames and isinstance(frames[0], dict): + self.frames = [self.frame_factory(f) for f in self.frames] + self.important = important or self.DEFAULT_IMPORTANT + if thread_id == 1: + self.important = self.MAX_IMPORTANT + self.fingerprint_hash = fingerprint_hash + self.stack_fp = stack_fp + self.stream = stream + self.ignore_bad_frames = ignore_bad_frames + + def to_json(self): + """Should be symmetric with `from_json`.""" + return { + "mode": self.mode.value, + "frames": [frame.to_json() for frame in self.frames], + "important": self.important, + } + + @staticmethod + def from_json(stack): + """Should be symmetric with `to_json`.""" + mode = CoredumpMode(stack.get("mode", CoredumpMode.GDB.value)) + # old serialization format support, should be dropped + lldb_mode = stack.get("lldb_mode", False) + if lldb_mode: + mode = CoredumpMode.LLDB + + unpacked_stack = { + "mode": mode, + "frames": stack["frames"], + "important": stack.get("important", Stack.DEFAULT_IMPORTANT), + } + return mode, unpacked_stack + + def frame_factory(self, args): + frames = { + CoredumpMode.GDB: GDBFrame, + CoredumpMode.LLDB: LLDBFrame, + CoredumpMode.SDC_ASSERT: SDCAssertFrame, + } + + class_object = frames.get(self.mode) + if not class_object: + raise Exception("Invalid mode: {}".format(self.mode.value)) + + return class_object(**args) + + def low_important(self): + return self.important <= self.LOW_IMPORTANT + + def check_importance(self, frame): + # raised priority cannot be lowered + if self.important > self.DEFAULT_IMPORTANT: + return + + # detect suspicious stacks + for name in self.suspicious_functions: + if name in frame.func: + self.important = self.SUSPICIOUS_IMPORTANT + return + + for name in self.low_important_functions_eq: + if name == frame.func: + self.important = self.LOW_IMPORTANT + + for name in self.low_important_functions_match: + if name in frame.func: + self.important = self.LOW_IMPORTANT + + def push_frame(self, frame): + self.check_importance(frame) + # ignore duplicated frames + if len(self.frames) and self.frames[-1].frame_no == frame.frame_no: + return + self.frames.append(frame) + + def parse(self): + """ + Parse one stack + """ + assert self.lines is not None + assert self.source_root is not None + + for line in self.lines: + match_found = False + for regexp in self.REGEXPS: + m = regexp.match(line) + if m: + frame_args = m.groupdict() + if "source" in frame_args: + frame_args["source"] = self.source_root.crop(frame_args["source"]) + + self.push_frame(self.frame_factory(frame_args)) + match_found = True + break + + if not match_found: + self.bad_frame(line) + + def bad_frame(self, line): + if self.ignore_bad_frames: + logger.warning("Bad frame: %s", line) + return + + raise Exception("Bad frame: `{}`, frame `{}`".format( + line, + self.debug(return_result=True), + )) + + def debug(self, return_result=False): + if self.low_important(): + return "" + + res = "\n".join([str(f) for f in self.frames]) + res += "----------------------------- DEBUG END\n" + if return_result: + return res + + self.stream.write(res) + + def raw(self): + return "\n".join([frame.raw() for frame in self.frames]) + + def html(self, same_hash=False, same_count=1, return_result=False): + ans = "" + pre_class = "important-" + str(self.important) + if same_hash: + pre_class += " same-hash" + + ans += '<pre class="{0}">'.format(pre_class) + if not same_hash: + ans += '<a name="stack{0}"></a>'.format(self.hash()) + + ans += '<span class="hash"><a href="#stack{0}">#{0}</a>, {1} stack(s) with same hash</span>\n'.format( + self.hash(), same_count, + ) + + for f in self.frames: + ans += f.html() + ans += "</pre>\n" + + if return_result: + return ans + + self.stream.write(ans) + + def fingerprint(self, max_num=None): + """ + Stack fingerprint: concatenation of non-common stack frames + FIXME: wipe away `max_num` + """ + stack_fp = list() + len_frames = min((max_num or len(self.frames)), len(self.frames)) + + for f in self.frames[:len_frames]: + fp = f.fingerprint() + if not fp: + continue + + if fp in self.fingerprint_blacklist: + continue + + if fp.startswith(self.fingerprint_blacklist_prefix): + continue + + if fp in stack_fp: + # FIXME: optimize duplicate remover: check only previous frame + # see also `push_frame` + continue + + stack_fp.append(fp.strip()) + + if self.max_depth is not None and len(stack_fp) >= self.max_depth: + break + + return "\n".join(stack_fp) + + def simple_html(self, num_frames=None): + if not num_frames: + num_frames = len(self.frames) + pre_class = "important-0" + ans = '<pre class="{0}">'.format(pre_class) + for i in range(min(len(self.frames), num_frames)): + ans += self.frames[i].html() + ans += "</pre>\n" + return ans + + def __str__(self): + return "\n".join(map(str, self.frames)) + + def hash(self, max_num=None): + """ + Entire stack hash for merging same stacks + """ + if self.fingerprint_hash is None: + self.fingerprint_hash = int(hashlib.md5(self.fingerprint(max_num).encode("utf-8")).hexdigest()[0:15], 16) + + return self.fingerprint_hash + + +class GDBStack(Stack): + + mode = CoredumpMode.GDB + + REGEXPS = [ + # #6 0x0000000001d9203e in NAsio::TIOService::TImpl::Run (this=0x137b1ec00) at /place/ + # sandbox-data/srcdir/arcadia_cache/library/neh/asio/io_service_impl.cpp:77 + + re.compile( + r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*) at (?P<source>.*)" + ), + + # #5 TCondVar::WaitD (this=this@entry=0x10196b2b8, mutex=..., deadLine=..., deadLine@entry=...) + # at /place/sandbox-data/srcdir/arcadia_cache/util/system/condvar.cpp:150 + re.compile( + r"#(?P<frame_no>\d+)[ \t]+(?P<func>.*) at (?P<source>/.*)" + ), + + # #0 0x00007faf8eb31d84 in pthread_cond_wait@@GLIBC_2.3.2 () + # from /lib/x86_64-linux-gnu/libpthread.so.0 + re.compile( + r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*) from (?P<source>.*)" + ), + + # #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 + re.compile( + r"#(?P<frame_no>\d+)[ \t]+ (?P<func>.*) at (?P<source>.*)" + ), + + # #10 0x0000000000000000 in ?? () + re.compile( + r"#(?P<frame_no>\d+)[ \t]+(?P<addr>0x[0-9a-f]+) in (?P<func>.*)" + ), + ] + + +class LLDBStack(Stack): + + mode = CoredumpMode.LLDB + + REGEXPS = [ + # 0x00007fd7b300a886 libthird_Uparty_Sros_Sros_Ucomm_Sclients_Sroscpp_Sliblibroscpp.so` + # std::thread::_State_impl<std::thread::_Invoker<std::tuple<ros::PollManager::PollManager()::$_1> > >::_M_run() + # [inlined] ros::PollManager::threadFunc(this=0x00007fd7b30dab20) at poll_manager.cpp:75:16 # noqa + re.compile( + r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+).+inlined]\s(?P<func>.+)\sat\s(?P<source>.+)" + ), + + re.compile( + r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+).+?`(?P<func>.+)\sat\s(?P<source>.+)" + ), + + # * frame #0: 0x00007fd7aee51f47 libc.so.6`gsignal + 199 + re.compile( + r"[ *]*frame #(?P<frame_no>\d+): (?P<addr>0x[0-9a-f]+)\s(?P<source>.+)`(?P<func>.+)\s\+\s(?P<source_no>\d+)" + ), + ] + + # Take not more than `max_depth` non-filtered frames into fingerprint + # See CORES-180 + max_depth = 10 + + fingerprint_blacklist = Stack.fingerprint_blacklist + [ + "ros::ros_wallsleep", + ] + + fingerprint_blacklist_prefix = Stack.fingerprint_blacklist_prefix + ( + "___lldb_unnamed_symbol", + "__gnu_cxx", + "__gthread", + "__pthread", + "decltype", + "myriapoda::BuildersRunner", + "non", + "std::_Function_handler", + "std::_Sp_counted_ptr_inplace", + "std::__invoke_impl", + "std::__invoke_result", + "std::__shared_ptr", + "std::conditional", + "std::shared_ptr", + "std::thread::_Invoker", + "std::thread::_State_impl", + "yandex::sdc::assert_details_", + ) + + suspicious_functions = Stack.suspicious_functions + [ + "Xml", + "boost", + "ros", + "supernode", + "tensorflow", + "yandex::sdc", + ] + + +class PythonStack(Stack): + + REGEXPS = [ + re.compile( + r'File "(?P<source>.*)", line (?P<source_no>\d+), in (?P<func_name>.*)' + ), + ] + + +class SDCAssertStack(LLDBStack): + + mode = CoredumpMode.SDC_ASSERT + + REGEXPS = [ + # 0: ./modules/_shared/libcore_Stools_Slibassert.so(yandex::sdc::assert_details_::PanicV(char const*, + # long, char const*, char const*, bool, char const*, __va_list_tag*) + # +0x2aa)[0x7fb83268feaa] + re.compile( + r"(?P<frame_no>\d+):\s(?P<source>.+.so)\((?P<func>.+)\+(?P<source_no>.+).+\[(?P<addr>0x[0-9a-f]+)" + ), + + re.compile( + r"(?P<frame_no>\d+):\s(?P<source>\w+)\((?P<func>.+)\+(?P<source_no>.+).+\[(?P<addr>0x[0-9a-f]+)" + ) + ] + + +def parse_python_traceback(trace): + trace = trace.replace("/home/zomb-sandbox/client/", "/") + trace = trace.replace("/home/zomb-sandbox/tasks/", "/sandbox/") + trace = trace.split("\n") + exception = trace[-1] # noqa: F841 + trace = trace[1: -1] + pairs = zip(trace[::2], trace[1::2]) + stack = Stack(lines=[]) + for frame_no, (path, row) in enumerate(pairs): + # FIXME: wrap into generic tracer + m = PythonStack.REGEXPS[0].match(path.strip()) + if m: + frame_args = m.groupdict() + if not frame_args["source"].startswith("/"): + frame_args["source"] = "/" + frame_args["source"] + frame_args["frame_no"] = str(frame_no) + frame_args["func"] = row.strip() + stack.push_frame(GDBFrame(**frame_args)) + return [[stack]], [[stack.raw()]], 6 + + +def stack_factory(stack): + mode, unpacked_stack = Stack.from_json(stack) + + if mode == CoredumpMode.GDB: + return GDBStack(**unpacked_stack) + elif mode == CoredumpMode.LLDB: + return LLDBStack(**unpacked_stack) + elif mode == CoredumpMode.SDC_ASSERT: + return SDCAssertStack(**unpacked_stack) + + raise Exception("Invalid stack mode: {}. ".format(mode)) + + +def _read_file(file_name): + with open(file_name) as f: + return f.read() + + +def _file_contents(file_name): + """Return file (or resource) contents as unicode string.""" + if getattr(sys, "is_standalone_binary", False): + try: + contents = pkgutil.get_data(__package__, file_name) + except Exception: + raise IOError("Failed to find resource: " + file_name) + else: + if not os.path.exists(file_name): + file_name = os.path.join(MY_PATH, file_name) + contents = _read_file(file_name) + # py23 compatibility + if not isinstance(contents, six.text_type): + contents = contents.decode("utf-8") + return contents + + +def html_prolog(stream, timestamp): + prolog = _file_contents("prolog.html") + assert isinstance(prolog, six.string_types) + stream.write(prolog.format( + style=_file_contents("styles.css"), + coredump_js=_file_contents("core_proc.js"), + version=CORE_PROC_VERSION, + timestamp=timestamp, + )) + + +def html_epilog(stream): + stream.write(_file_contents("epilog.html")) + + +def detect_coredump_mode(core_text): + if len(core_text) == 0: + raise Exception("Text stacktrace is blank") + + if "Panic at unixtime" in core_text: + return CoredumpMode.SDC_ASSERT + + if "(lldb)" in core_text: + return CoredumpMode.LLDB + + return CoredumpMode.GDB + + +def filter_stack_dump( + core_text=None, + stack_file_name=None, + use_fingerprint=False, + sandbox_failed_task_id=None, + output_stream=None, + timestamp=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + ignore_bad_frames=True, +): + """New interface for stacktrace filtering. Preferred to use.""" + if not core_text and not stack_file_name: + raise ValueError("Either `core_text` or `stack_file_name` should be passed to `filter_stack_dump`. ") + + if core_text is not None and stack_file_name: + raise ValueError("Only one of `core_text` and `stack_file_name` cannot be specified for `filter_stack_dump`. ") + + if stack_file_name: + core_text = _read_file(stack_file_name) + # further processing uses `core_text` only + + mode = detect_coredump_mode(core_text) + core_lines = core_text.split("\n") + + return filter_stackdump( + file_lines=core_lines, + ignore_bad_frames=ignore_bad_frames, + mode=mode, + sandbox_failed_task_id=sandbox_failed_task_id, + stream=output_stream, + timestamp=timestamp, + use_fingerprint=use_fingerprint, + use_stream=output_stream is not None, + ) + + +class StackDumperBase(object): + + SANDBOX_TASK_RE = re.compile(r".*/[0-9a-f]/[0-9a-f]/([0-9]+)/.*") + MAX_SAME_STACKS = 30 + + def __init__( + self, + use_fingerprint, + sandbox_failed_task_id, + stream, + use_stream, + file_lines, + timestamp, + mode, + file_name=None, + ignore_bad_frames=True, + ): + self.source_root = SourceRoot() + self.use_fingerprint = use_fingerprint + self.sandbox_task_id = None + self.sandbox_failed_task_id = sandbox_failed_task_id + self.stream = stream or sys.stdout + self.use_stream = use_stream + self.file_name = file_name + self.file_lines = file_lines + self.timestamp = timestamp + self.ignore_bad_frames = ignore_bad_frames + self.stack_class = self.get_stack_class(mode) + + self.signal = SIGNAL_NOT_FOUND + self.stacks = [] + self._main_info = [] + + @staticmethod + def is_ignored_line(line): + raise NotImplementedError("Not implemented static method `is_ignored_line`. ") + + @staticmethod + def get_stack_class(mode): + exist_modes = {} + for cls in [GDBStack, LLDBStack, SDCAssertStack]: + current_mode = cls.mode + if current_mode in exist_modes: + raise Exception("Duplicate modes are disallowed. Repeated mode: `{}`".format(current_mode.value)) + exist_modes[current_mode] = cls + + if mode not in exist_modes: + raise Exception("Unexpected coredump processing mode: `{}`".format(mode.value)) + + return exist_modes[mode] + + def check_signal(self, line): + raise NotImplementedError("Not implemented `check_signal`.") + + def set_sandbox_task_id(self, task_id): + self.sandbox_task_id = task_id + + def add_main_line(self, line): + self._main_info.append(line) + + def add_stack(self, stack_lines, thread_id): + if not stack_lines: + return + + stack = self.stack_class( + lines=stack_lines, + source_root=self.source_root, + thread_id=thread_id, + stream=self.stream, + ignore_bad_frames=self.ignore_bad_frames, + ) + self.stacks.append(stack) + + def dump(self): + if self.file_lines is None: + # FIXME(mvel): LLDB is not handled here + self.file_lines = get_parsable_gdb_text(_read_file(self.file_name)) + + self._collect_stacks() + + for stack in self.stacks: + stack.parse() + # stack.debug() + + if self.use_stream: + if self.use_fingerprint: + for stack in self.stacks: + self.stream.write(stack.fingerprint() + "\n") + self.stream.write("--------------------------------------\n") + return + else: + html_prolog(self.stream, self.timestamp) + + if self.sandbox_task_id is not None: + self.stream.write( + '<div style="padding-top: 6px; font-size: 18px; font-weight: bold;">' + 'Coredumped binary build task: ' + '<a href="https://sandbox.yandex-team.ru/task/{0}">{0}</a></div>\n'.format( + self.sandbox_task_id + ) + ) + + if self.sandbox_failed_task_id is not None: + self.stream.write( + '<div style="padding-top: 6px; font-size: 18px; font-weight: bold;">' + 'Sandbox failed task: ' + '<a href="https://sandbox.yandex-team.ru/task/{0}">{0}</a></div>\n'.format( + self.sandbox_failed_task_id + ) + ) + + pre_class = "" + self.stream.write('<pre class="{0}">\n'.format(pre_class)) + for line in self._main_info: + self.stream.write(line.replace("&", "&").replace("<", "<") + "\n") + self.stream.write("</pre>\n") + + sorted_stacks = sorted(self.stacks, key=lambda x: (x.important, x.fingerprint()), reverse=True) + + prev_hash = None + all_hash_stacks = [] + cur_hash_stacks = [] + for stack in sorted_stacks: + if stack.hash() == 0: + continue + + if stack.hash() == prev_hash: + if len(cur_hash_stacks) < self.MAX_SAME_STACKS: + # do not collect too much + cur_hash_stacks.append(stack) + continue + + # hash changed + if cur_hash_stacks: + all_hash_stacks.append(cur_hash_stacks) + + prev_hash = stack.hash() + cur_hash_stacks = [stack, ] + + # push last + if cur_hash_stacks: + all_hash_stacks.append(cur_hash_stacks) + + if self.use_stream: + for cur_hash_stacks in all_hash_stacks: + same_hash = False + for stack in cur_hash_stacks: + stack.html(same_hash=same_hash, same_count=len(cur_hash_stacks)) + same_hash = True + + html_epilog(self.stream) + else: + raw_hash_stacks = [ + [stack.raw() for stack in common_hash_stacks] + for common_hash_stacks in all_hash_stacks + ] + return all_hash_stacks, raw_hash_stacks, self.signal + + def _collect_stacks(self): + stack_lines = [] + stack_detected = False + thread_id = None + + for line in self.file_lines: + line = line.strip() + if self.is_ignored_line(line): + continue + + if "Core was generated" in line: + m = self.SANDBOX_TASK_RE.match(line) + if m: + self.set_sandbox_task_id(int(m.group(1))) + + self.check_signal(line) + + # [Switching to thread 55 (Thread 0x7f100a94c700 (LWP 21034))] + # Thread 584 (Thread 0x7ff363c03700 (LWP 2124)): + + # see test2 and test3 + tm = self.THREAD_RE.match(line) + if tm: + stack_detected = True + self.add_stack( + stack_lines=stack_lines, + thread_id=thread_id, + ) + stack_lines = [] + thread_id = int(tm.group(1)) + continue + + if stack_detected: + stack_lines.append(line) + else: + self.add_main_line(line) + + # parse last stack + self.add_stack( + stack_lines=stack_lines, + thread_id=thread_id, + ) + + +class StackDumperGDB(StackDumperBase): + + SIGNAL_FLAG = "Program terminated with signal" + THREAD_RE = re.compile(r".*[Tt]hread (\d+) .*") + LINE_IN = re.compile(r"\d+\tin ") + + def is_ignored_line(self, line): + if not line: + return True + + if line.startswith("[New "): + # LWP, Thread, process + return True + + if line.startswith("[Thread "): + return True + + if line.startswith("Using "): + return True + + if line.startswith("warning:"): + return True + + if line.startswith("Python Exception"): + # TODO: handle this more carefully + return True + + if line[0] != "#" and "No such file or directory" in line: + return True + + if self.LINE_IN.match(line): + # see test1.txt for example + # 641 in /place/sandbox-data/srcdir/arcadia/library/coroutine/engine/impl.h + return True + + return False + + def check_signal(self, line): + if self.SIGNAL_FLAG in line: + self.signal = line[line.find(self.SIGNAL_FLAG) + len(self.SIGNAL_FLAG):].split(",")[0] + + +class StackDumperLLDB(StackDumperBase): + + SIGNAL_FLAG = "stop reason = signal" + + THREAD_RE = re.compile(r".*thread #(\d+), .*") + + SKIP_LINES = { + "(lldb) bt all", + "(lldb) script import sys", + "(lldb) target create", + "Core file", + + # Drop signal interceptor call + # * frame #0: 0x00007efd49042fb7 libc.so.6`__GI___libc_sigaction at sigaction.c:54 + # TODO(epsilond1): Set MAX_IMPORTANT for some thread + "__GI___libc_sigaction", + + # Drop unnamed symbols at lines like + # frame #4: 0x00007fd8054156df libstdc++.so.6`___lldb_unnamed_symbol440$$libstdc++.so.6 + 15 + "$$", + } + + @staticmethod + def is_ignored_line(line): + if not line: + return True + + for skip_line in StackDumperLLDB.SKIP_LINES: + if skip_line in line: + return True + return False + + def check_signal(self, line): + if self.SIGNAL_FLAG in line and self.signal == SIGNAL_NOT_FOUND: + self.signal = line.split()[-1] + + +class StackDumperSDCAssert(StackDumperBase): + + THREAD_RE = re.compile( + r"(\d+)(:\s)" + ) + + def is_ignored_line(self, line): + if not line: + return True + + return not re.match(self.THREAD_RE, line) + + def check_signal(self, line): + self.signal = SIGNAL_NOT_FOUND + + def _collect_stacks(self): + stack_lines = [] + for line in self.file_lines: + line = line.strip() + if self.is_ignored_line(line): + continue + stack_lines.append(line) + self.check_signal(line) + + self.add_stack( + stack_lines=stack_lines, + thread_id=0, + ) + + +def filter_stackdump( + file_name=None, + use_fingerprint=False, + sandbox_failed_task_id=None, + stream=None, + file_lines=None, + use_stream=True, + timestamp=None, + ignore_bad_frames=True, + mode=None, +): + if mode is None and file_name is not None: + mode = detect_coredump_mode(_read_file(file_name)) + if mode == CoredumpMode.GDB: + stack_dumper_cls = StackDumperGDB + elif mode == CoredumpMode.LLDB: + stack_dumper_cls = StackDumperLLDB + elif mode == CoredumpMode.SDC_ASSERT: + stack_dumper_cls = StackDumperSDCAssert + else: + raise Exception("Invalid mode: {}".format(mode.value)) + + dumper = stack_dumper_cls( + file_name=file_name, + use_fingerprint=use_fingerprint, + sandbox_failed_task_id=sandbox_failed_task_id, + stream=stream, + use_stream=use_stream, + file_lines=file_lines, + timestamp=timestamp, + ignore_bad_frames=ignore_bad_frames, + mode=mode, + ) + + return dumper.dump() + + +def get_parsable_gdb_text(core_text): + # FIXME(mvel): Check encoding? + # core_text = core_text.encode("ascii", "ignore").decode("ascii") + core_text = ( + core_text + # .replace("#", "\n#") # bug here + .replace("No core", "\nNo core") + .replace("[New", "\n[New") + .replace("\n\n", "\n") + ) + + return core_text.split("\n") + + +if __name__ == "__main__": + if len(sys.argv) < 2: + sys.stderr.write( + """Traceback filter "Tri Korochki" +https://wiki.yandex-team.ru/cores-aggregation/ +Usage: + core_proc.py <traceback.txt> [-f|--fingerprint] + core_proc.py -v|--version +""" + ) + sys.exit(1) + + if sys.argv[1] == "--version" or sys.argv[1] == "-v": + if os.system("svn info 2>/dev/null | grep '^Revision'") != 0: + print(CORE_PROC_VERSION) + sys.exit(0) + + sandbox_failed_task_id = None + + use_fingerprint = False + if len(sys.argv) >= 3: + if sys.argv[2] == "-f" or sys.argv[2] == "--fingerprint": + use_fingerprint = True + sandbox_failed_task_id = sys.argv[2] + + filter_stack_dump( + core_text=_read_file(sys.argv[1]), + use_fingerprint=use_fingerprint, + sandbox_failed_task_id=sandbox_failed_task_id, + output_stream=sys.stdout, + ) + + +""" +Stack group is a `Stack` objects list with the same hash (fingerprint). +""" + + +class StackEncoder(json.JSONEncoder): + """Stack JSON serializer.""" + + def default(self, obj): + if isinstance(obj, Stack): + return obj.to_json() + + return json.JSONEncoder.default(obj) + + +def serialize_stacks(stack_groups): + """ + Serialize list of stack groups to string (using JSON format). + + :param stack_groups: list of stack groups. + :return: JSON serialized to string + """ + return json.dumps(stack_groups, cls=StackEncoder) + + +def deserialize_stacks(stack_groups_str): + """ + Restore JSON-serialized stack data into stack groups. + + :param stack_groups_str: JSON-serialized data. + :return: list of stack groups + """ + stack_groups_json = json.loads(stack_groups_str) + # please do not use `map` hell here, it's impossible to debug + all_stacks = [ + [stack_factory(stack) for stack in stacks] + for stacks in stack_groups_json + ] + return all_stacks diff --git a/library/python/coredump_filter/core_proc.js b/library/python/coredump_filter/core_proc.js new file mode 100644 index 0000000000..15413adeae --- /dev/null +++ b/library/python/coredump_filter/core_proc.js @@ -0,0 +1,21 @@ + + +$(document).ready(function() { + $('#show-same-stacks').click(function() { + var stacks = $('.same-hash'); + for (var i = 0; i < stacks.length; ++i) + $(stacks[i]).show(); + $('#show-same-stacks').hide(); + $('#hide-same-stacks').show(); + return false; + }); + + $('#hide-same-stacks').click(function() { + var stacks = $('.same-hash'); + for (var i = 0; i < stacks.length; ++i) + $(stacks[i]).hide(); + $('#hide-same-stacks').hide(); + $('#show-same-stacks').show(); + return false; + }); +}); diff --git a/library/python/coredump_filter/epilog.html b/library/python/coredump_filter/epilog.html new file mode 100644 index 0000000000..b317cc2a91 --- /dev/null +++ b/library/python/coredump_filter/epilog.html @@ -0,0 +1,2 @@ + </body> +</html>
\ No newline at end of file diff --git a/library/python/coredump_filter/prolog.html b/library/python/coredump_filter/prolog.html new file mode 100644 index 0000000000..f102a7210d --- /dev/null +++ b/library/python/coredump_filter/prolog.html @@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html> + <head> + <style>{style}</style> + <script src="https://yastatic.net/jquery/1.7.1/jquery.min.js"></script> + <script>{coredump_js}</script> + </head> + <body> + <h1>Coredump report generated on {timestamp} by Coredump/traceback filter + <i><a href="http://mvel.at.yandex-team.ru/2373">Tri Korochki</a></i> + ({version}) + </h1> + <h3>Author: <a href="https://staff.yandex-team.ru/mvel">mvel@</a> aka Mikhail Veltishchev</h3> + <h3>© Yandex LLC. All rights reversed</h3> + <div class="legend"> + <ul style="line-height: 22px"> + <li><span class="important-100">Problem</span> stacks</li> + <li><span class="important-75">Suspicious</span> stacks</li> + <li><span class="important-50">Active</span> stacks</li> + <li><span class="important-25">Non-active</span> stacks</li> + </ul> + <a class="show-same-hash" id="show-same-stacks" href="#">Show same stacks</a> + <a class="show-same-hash" id="hide-same-stacks" href="#" style="display: none">Hide same stacks</a> + </div> diff --git a/library/python/coredump_filter/styles.css b/library/python/coredump_filter/styles.css new file mode 100644 index 0000000000..fdd09ce09e --- /dev/null +++ b/library/python/coredump_filter/styles.css @@ -0,0 +1,116 @@ +body { + font-family: sans-serif; + font-size: 12px; +} + +a { + text-decoration: none; + color: #486DEC; +} + +.frame { + color: #7f7f7f; + display: inline-block; + width: 30px; +} + +.addr { + color: #999999; + padding-right: 10px; +} + +.func { + color: #7f0000; + word-wrap: normal; +} + +.source { + color: #007f00; +} + +.symbol { + color: #0000ff; +} + +h1 { + font-size: 1.5em; + margin-top: .1em; + margin-bottom: .2em; +} + +h3 { + font-size: 1em; + margin-top: .1em; + margin-bottom: .2em; +} + +pre { + overflow-x: auto; + margin: 6px 0px 6px 0px; + padding: 0px 12px 6px 12px; + position: relative; +} + +pre.important-25 { + background-color: #eeeeee; +} + +span.important-25 { + background-color: #eeeeee; + padding: 3px; +} + +pre.important-50 { + background-color: #e7ffe7; +} + +span.important-50 { + background-color: #e7ffe7; + padding: 3px; +} + +pre.important-75 { + background-color: #ffffcc; +} + +span.important-75 { + background-color: #ffffcc; + padding: 3px; +} + +pre.important-100 { + background-color: #ffdddd; +} + +span.important-100 { + background-color: #ffdddd; + padding: 3px; +} + +a.show-same-hash { + padding: 0px 20px 0px 20px; +} + +/* hidden by default */ +.same-hash { + display: none; +} + +span.hash { + position: absolute; + top: 3px; right: 3px; +} + +div.legend { + position: absolute; + z-index: 1; + top: 5px; + right: 8px; + border: 1px solid #7f7f7f; + border-radius: 3px; + padding: 0px 20px 3px 0px; +} + +div.legend ul { + margin: 3px 0px 3px 0px; +} diff --git a/library/python/coredump_filter/ya.make b/library/python/coredump_filter/ya.make new file mode 100644 index 0000000000..fc8ec1a45f --- /dev/null +++ b/library/python/coredump_filter/ya.make @@ -0,0 +1,23 @@ +PY23_LIBRARY() + +PY_SRCS( + __init__.py +) + +RESOURCE_FILES( + PREFIX library/python/coredump_filter/ + core_proc.js + epilog.html + prolog.html + styles.css +) + +IF(PYTHON2) + PEERDIR(contrib/deprecated/python/enum34) +ENDIF() + +END() + +RECURSE( + tests +) diff --git a/library/python/json/__init__.py b/library/python/json/__init__.py new file mode 100644 index 0000000000..c6420d5e6d --- /dev/null +++ b/library/python/json/__init__.py @@ -0,0 +1,44 @@ +from library.python.json.loads import loads as _loads +from simplejson import loads as _sj_loads + + +def loads(*args, **kwargs): + try: + return _loads(*args, **kwargs) + except Exception as e: + if 'invalid syntax at token' in str(e): + kwargs.pop('intern_keys', None) + kwargs.pop('intern_vals', None) + kwargs.pop('may_unicode', None) + return _sj_loads(*args, **kwargs) + + raise + + +from simplejson import load, dump, dumps # noqa + + +def read_file(file_name, **kwargs): + """ + Read file and return its parsed json contents. + + All kwargs will be proxied to `json.load` method as is. + + :param file_name: file with json contents + :return: parsed json contents + """ + with open(file_name) as f: + return load(f, **kwargs) + + +def write_file(file_name, contents, **kwargs): + """ + Dump json data to file. + + All kwargs will be proxied to `json.dump` method as is. + + :param file_name: file to dump to + :param contents: JSON-serializable object + """ + with open(file_name, "w") as f: + dump(contents, f, **kwargs) diff --git a/library/python/json/loads.cpp b/library/python/json/loads.cpp new file mode 100644 index 0000000000..19cdb096ae --- /dev/null +++ b/library/python/json/loads.cpp @@ -0,0 +1,246 @@ +#include "loads.h" + +#include <Python.h> + +#include <library/cpp/json/fast_sax/parser.h> + +#include <util/generic/algorithm.h> +#include <util/generic/stack.h> +#include <util/generic/vector.h> +#include <util/generic/ylimits.h> +#include <util/string/ascii.h> + +using namespace NJson; + +namespace { + enum EKind { + Undefined, + Array, + Dict, + Value, + Key, + }; + + static inline TStringBuf ToStr(EKind kind) noexcept { + switch (kind) { + case Undefined: + return TStringBuf("Undefined"); + + case Array: + return TStringBuf("Array"); + + case Dict: + return TStringBuf("Dict"); + + case Value: + return TStringBuf("Value"); + + case Key: + return TStringBuf("Key"); + } + + Y_UNREACHABLE(); + } + + struct TUnref { + static inline void Destroy(PyObject* o) noexcept { + Py_XDECREF(o); + } + }; + + using TObjectPtr = TAutoPtr<PyObject, TUnref>; + + static inline TObjectPtr BuildBool(bool val) noexcept { + if (val) { + Py_RETURN_TRUE; + } + + Py_RETURN_FALSE; + } + + // Translate python exceptions from object-creating functions into c++ exceptions + // Such errors are reported by returning nullptr + // When a python error is set and C++ exception is caught by Cython wrapper, + // Python exception is propagated, while C++ exception is discarded. + PyObject* CheckNewObject(PyObject* obj) { + Y_ENSURE(obj != nullptr, "got python exception"); + return obj; + } + + void CheckRetcode(int retcode) { + Y_ENSURE(retcode == 0, "got python exception"); + } + + static inline TObjectPtr BuildSmall(long val) { +#if PY_VERSION_HEX >= 0x03000000 + return CheckNewObject(PyLong_FromLong(val)); +#else + return CheckNewObject(PyInt_FromLong(val)); +#endif + } + + PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) { +#if PY_VERSION_HEX >= 0x03000000 + Y_UNUSED(mayUnicode); + PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size()); + if (intern) { + PyUnicode_InternInPlace(&pyStr); + } +#else + const bool needUnicode = mayUnicode && !AllOf(str, IsAscii); + PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size()) + : PyString_FromStringAndSize(str.data(), str.size()); + if (intern && !needUnicode) { + PyString_InternInPlace(&pyStr); + } +#endif + return pyStr; + } + + struct TVal { + EKind Kind = Undefined; + TObjectPtr Val; + + inline TVal() noexcept + : Kind(Undefined) + { + } + + inline TVal(EKind kind, TObjectPtr val) noexcept + : Kind(kind) + , Val(val) + { + } + }; + + static inline TObjectPtr NoneRef() noexcept { + Py_RETURN_NONE; + } + + struct TContext: public TJsonCallbacks { + const bool InternKeys; + const bool InternVals; + const bool MayUnicode; + TStack<TVal, TVector<TVal>> S; + + inline TContext(bool internKeys, bool internVals, bool mayUnicode) + : TJsonCallbacks(true) + , InternKeys(internKeys) + , InternVals(internVals) + , MayUnicode(mayUnicode) + { + S.emplace(); + } + + inline bool Consume(TObjectPtr o) { + auto& t = S.top(); + + if (t.Kind == Array) { + CheckRetcode(PyList_Append(t.Val.Get(), o.Get())); + } else if (t.Kind == Key) { + auto key = S.top().Val; + + S.pop(); + + CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get())); + } else { + t = TVal(Value, o); + } + + return true; + } + + inline TObjectPtr Pop(EKind expect) { + auto res = S.top(); + + S.pop(); + + if (res.Kind != expect) { + ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")"; + } + + return res.Val; + } + + inline void Push(EKind kind, TObjectPtr object) { + S.push(TVal(kind, object)); + } + + virtual bool OnNull() { + return Consume(NoneRef()); + } + + virtual bool OnBoolean(bool v) { + return Consume(BuildBool(v)); + } + + virtual bool OnInteger(long long v) { + if (v >= (long long)Min<long>()) { + return Consume(BuildSmall((long)v)); + } + + return Consume(CheckNewObject(PyLong_FromLongLong(v))); + } + + virtual bool OnUInteger(unsigned long long v) { + if (v <= (unsigned long long)Max<long>()) { + return Consume(BuildSmall((long)v)); + } + + return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v))); + } + + virtual bool OnDouble(double v) { + return Consume(CheckNewObject(PyFloat_FromDouble(v))); + } + + virtual bool OnString(const TStringBuf& v) { + return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode))); + } + + virtual bool OnOpenMap() { + Push(Dict, CheckNewObject(PyDict_New())); + + return true; + } + + virtual bool OnCloseMap() { + return Consume(Pop(Dict)); + } + + virtual bool OnMapKey(const TStringBuf& k) { + Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode))); + return true; + } + + virtual bool OnOpenArray() { + Push(Array, CheckNewObject(PyList_New(0))); + + return true; + } + + virtual bool OnCloseArray() { + return Consume(Pop(Array)); + } + }; +} + +PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) { + TContext ctx(internKeys, internVals, mayUnicode); + + if (!len) { + ythrow yexception() << "parse error: zero length input string"; + } + + if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) { + ythrow yexception() << "parse error"; + } + + auto& s = ctx.S; + + if (!s || s.top().Kind != Value) { + ythrow yexception() << "shit happen"; + } + + return s.top().Val.Release(); +} diff --git a/library/python/json/loads.h b/library/python/json/loads.h new file mode 100644 index 0000000000..62dcdf6f21 --- /dev/null +++ b/library/python/json/loads.h @@ -0,0 +1,5 @@ +#pragma once + +#include <Python.h> + +PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys = false, bool internVals = false, bool mayUnicode = false); diff --git a/library/python/json/loads.pyx b/library/python/json/loads.pyx new file mode 100644 index 0000000000..82e5c6dce7 --- /dev/null +++ b/library/python/json/loads.pyx @@ -0,0 +1,14 @@ +from libcpp cimport bool + +cdef extern from "library/python/json/loads.h": + object LoadJsonFromString(const char*, size_t, bool internKeys, bool internVals, bool mayUnicode) except + + + +def loads(s, intern_keys = False, intern_vals = False, may_unicode = False): + if isinstance(s, unicode): + s = s.encode('utf-8') + + try: + return LoadJsonFromString(s, len(s), intern_keys, intern_vals, may_unicode) + except Exception as e: + raise ValueError(str(e)) diff --git a/library/python/json/ya.make b/library/python/json/ya.make new file mode 100644 index 0000000000..74a82de9d8 --- /dev/null +++ b/library/python/json/ya.make @@ -0,0 +1,17 @@ +PY23_LIBRARY() + +PEERDIR( + contrib/python/simplejson + library/cpp/json/fast_sax +) + +PY_SRCS( + __init__.py + loads.pyx +) + +SRCS( + loads.cpp +) + +END() diff --git a/library/python/mlockall/__init__.py b/library/python/mlockall/__init__.py new file mode 100644 index 0000000000..4d867d692e --- /dev/null +++ b/library/python/mlockall/__init__.py @@ -0,0 +1,10 @@ +import sys + + +def mlockall_current(): + if not sys.platform.startswith('linux'): + return -1 + + import library.python.mlockall.mlockall as ml + + return ml.mlockall_current() diff --git a/library/python/mlockall/mlockall.pyx b/library/python/mlockall/mlockall.pyx new file mode 100644 index 0000000000..b35d661a42 --- /dev/null +++ b/library/python/mlockall/mlockall.pyx @@ -0,0 +1,19 @@ +cdef extern from "<util/system/error.h>": + int LastSystemError() + +cdef extern from "<util/system/mlock.h>": + cdef enum ELockAllMemoryFlag: + LockCurrentMemory + LockFutureMemory + cppclass ELockAllMemoryFlags: + operator=(ELockAllMemoryFlag) + void LockAllMemory(ELockAllMemoryFlags flags) except+ + +def mlockall_current(): + cdef ELockAllMemoryFlags flags + try: + flags = LockCurrentMemory + LockAllMemory(flags) + return 0 + except Exception: + return LastSystemError() diff --git a/library/python/mlockall/ya.make b/library/python/mlockall/ya.make new file mode 100644 index 0000000000..96eba051ab --- /dev/null +++ b/library/python/mlockall/ya.make @@ -0,0 +1,14 @@ +PY23_LIBRARY() + +PY_SRCS( + __init__.py +) + +IF (OS_LINUX) + PY_SRCS( + mlockall.pyx + ) +ENDIF() + +END() + diff --git a/library/python/nstools/__init__.py b/library/python/nstools/__init__.py new file mode 100644 index 0000000000..34cc0f9574 --- /dev/null +++ b/library/python/nstools/__init__.py @@ -0,0 +1,6 @@ +from .nstools import unshare_ns, move_to_ns + +__all__ = [ + 'unshare_ns', + 'move_to_ns' +] diff --git a/library/python/nstools/nstools.pyx b/library/python/nstools/nstools.pyx new file mode 100644 index 0000000000..5ef30373ff --- /dev/null +++ b/library/python/nstools/nstools.pyx @@ -0,0 +1,28 @@ +from cpython.exc cimport PyErr_SetFromErrno + +cdef extern from "<sched.h>" nogil: + int setns(int fd, int mode) + int unshare(int flags) + + cpdef enum: + Fs "CLONE_FS" + # Cgroup "CLONE_NEWCGROUP" + Ipc "CLONE_NEWIPC" + Network "CLONE_NEWNET" + Mount "CLONE_NEWNS" + Pid "CLONE_NEWPID" + User "CLONE_NEWUSER" + Uts "CLONE_NEWUTS" + +def unshare_ns(int flags): + cdef int ret = unshare(flags) + if ret != 0: + PyErr_SetFromErrno(OSError) + +def move_to_ns(object fileobject, int mode): + if not isinstance(fileobject, int): + fileobject = fileobject.fileno() + + cdef int ret = setns(fileobject, mode) + if ret != 0: + PyErr_SetFromErrno(OSError) diff --git a/library/python/nstools/ya.make b/library/python/nstools/ya.make new file mode 100644 index 0000000000..49eddeb919 --- /dev/null +++ b/library/python/nstools/ya.make @@ -0,0 +1,14 @@ +PY23_LIBRARY() + +IF(OS_LINUX) +PY_SRCS( + __init__.py + nstools.pyx +) +ELSE() +PY_SRCS( + nstools.py +) +ENDIF() + +END() diff --git a/library/python/par_apply/__init__.py b/library/python/par_apply/__init__.py new file mode 100644 index 0000000000..19b89ae843 --- /dev/null +++ b/library/python/par_apply/__init__.py @@ -0,0 +1,114 @@ +import sys +import threading +import six + +from six.moves import queue + + +def par_apply(seq, func, thr_num, join_polling=None): + if thr_num < 2: + for x in seq: + yield func(x) + + return + + in_q = queue.Queue() + out_q = queue.Queue() + + def enumerate_blocks(): + n = 0 + + for b in seq: + yield n, [b] + n += 1 + + yield n, None + + def iter_out(): + n = 0 + d = {} + + while True: + if n in d: + r = d[n] + del d[n] + n += 1 + + yield r + else: + res = out_q.get() + + d[res[0]] = res + + out_iter = iter_out() + + def wait_block(): + for x in out_iter: + return x + + def iter_compressed(): + p = 0 + + for n, b in enumerate_blocks(): + in_q.put((n, b)) + + while n > p + (thr_num * 2): + p, b, c = wait_block() + + if not b: + return + + yield p, c + + while True: + p, b, c = wait_block() + + if not b: + return + + yield p, c + + def proc(): + while True: + data = in_q.get() + + if data is None: + return + + n, b = data + + if b: + try: + res = (func(b[0]), None) + except Exception: + res = (None, sys.exc_info()) + else: + res = (None, None) + + out_q.put((n, b, res)) + + thrs = [threading.Thread(target=proc) for i in range(0, thr_num)] + + for t in thrs: + t.start() + + try: + for p, c in iter_compressed(): + res, err = c + + if err: + six.reraise(*err) + + yield res + finally: + for t in thrs: + in_q.put(None) + + for t in thrs: + if join_polling is not None: + while True: + t.join(join_polling) + if not t.is_alive(): + break + else: + t.join() diff --git a/library/python/par_apply/ya.make b/library/python/par_apply/ya.make new file mode 100644 index 0000000000..b14592ab79 --- /dev/null +++ b/library/python/par_apply/ya.make @@ -0,0 +1,11 @@ +PY23_LIBRARY() + +PEERDIR( + contrib/python/six +) + +PY_SRCS( + __init__.py +) + +END() diff --git a/library/python/symbols/libmagic/syms.cpp b/library/python/symbols/libmagic/syms.cpp new file mode 100644 index 0000000000..839441ae14 --- /dev/null +++ b/library/python/symbols/libmagic/syms.cpp @@ -0,0 +1,19 @@ +#include <contrib/libs/libmagic/src/magic.h> + +#include <library/python/symbols/registry/syms.h> + +BEGIN_SYMS("magic") +SYM(magic_open) +SYM(magic_close) +SYM(magic_error) +SYM(magic_errno) +SYM(magic_file) +SYM(magic_buffer) +SYM(magic_load) +SYM(magic_setflags) +SYM(magic_check) +SYM(magic_compile) +SYM(magic_descriptor) +SYM(magic_list) +SYM(magic_version) +END_SYMS() diff --git a/library/python/symbols/libmagic/ya.make b/library/python/symbols/libmagic/ya.make new file mode 100644 index 0000000000..a248603a41 --- /dev/null +++ b/library/python/symbols/libmagic/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + contrib/libs/libmagic + library/python/symbols/registry +) + +SRCS( + GLOBAL syms.cpp +) + +END() diff --git a/library/python/testing/coverage_utils/__init__.py b/library/python/testing/coverage_utils/__init__.py new file mode 100644 index 0000000000..3313eee7b5 --- /dev/null +++ b/library/python/testing/coverage_utils/__init__.py @@ -0,0 +1,14 @@ +import re + + +def make_filter(prefix_filter, exclude_regexp): + filters = [] + if prefix_filter: + filters.append(lambda x: x.startswith(prefix_filter)) + if exclude_regexp: + regexp = re.compile(exclude_regexp) + filters.append(lambda x: not regexp.search(x)) + + if filters: + return lambda x: all(pred(x) for pred in filters) + return lambda x: True diff --git a/library/python/testing/coverage_utils/ya.make b/library/python/testing/coverage_utils/ya.make new file mode 100644 index 0000000000..3582136180 --- /dev/null +++ b/library/python/testing/coverage_utils/ya.make @@ -0,0 +1,5 @@ +PY23_LIBRARY() + +PY_SRCS(__init__.py) + +END() diff --git a/library/python/testing/system_info/__init__.py b/library/python/testing/system_info/__init__.py new file mode 100644 index 0000000000..8bad854d97 --- /dev/null +++ b/library/python/testing/system_info/__init__.py @@ -0,0 +1,204 @@ +import collections +import psutil +from functools import wraps + + +def safe(name): + def decorator_safe(func): + """ + Decorator for try-catch on string assembly + """ + + @wraps(func) + def wrap_safe(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + return "Failed to get {}: {}".format(name, e) + + return wrap_safe + + return decorator_safe + + +def get_proc_attrib(attr): + if callable(attr): + try: + return attr() + except psutil.Error: + return None + else: + return attr + + +@safe("cpu/mem info") +def _cpu_mem_str(): + vm = psutil.virtual_memory() + cpu_tp = psutil.cpu_times_percent(0.1) + + str_items = [] + str_items.append( + "CPU: Idle: {}% User: {}% System: {}% IOwait: {}%\n".format( + cpu_tp.idle, cpu_tp.user, cpu_tp.system, cpu_tp.iowait + ) + ) + + str_items.append( + "MEM: total {} Gb available: {} Gb used: {} Gb free: {} Gb active: {} Gb inactive: {} Gb shared: {} Gb\n".format( + round(vm.total / 1e9, 2), + round(vm.available / 1e9, 2), + round(vm.used / 1e9, 2), + round(vm.free / 1e9, 2), + round(vm.active / 1e9, 2), + round(vm.inactive / 1e9, 2), + round(vm.shared / 1e9, 2), + ) + ) + + str_items.append("Used swap: {}%\n".format(psutil.swap_memory().percent)) + + return "".join(str_items) + + +@safe("processes tree") +def _proc_tree_str(): + tree = collections.defaultdict(list) + for p in psutil.process_iter(): + try: + tree[p.ppid()].append(p.pid) + except (psutil.NoSuchProcess, psutil.ZombieProcess): + pass + # on systems supporting PID 0, PID 0's parent is usually 0 + if 0 in tree and 0 in tree[0]: + tree[0].remove(0) + + return _print_proc_tree(min(tree), tree) + + +def _print_proc_tree(parent_root, tree, indent_root=''): + stack = [(parent_root, indent_root, "")] + str_items = list() + + while len(stack) > 0: + try: + parent, indent, prefix = stack.pop() + p = psutil.Process(parent) + name = get_proc_attrib(p.name) + str_items.append("{}({}, '{}'".format(prefix, parent, name if name else '?')) + + exe = get_proc_attrib(p.exe) + if exe: + str_items.append(" [{}]".format(exe)) + + str_items.append(") ") + str_items.append(" st: {}".format(p.status())) + str_items.append(" mem: {}%".format(round(p.memory_percent(), 2))) + + ndfs = get_proc_attrib(p.num_fds) + if ndfs and ndfs > 0: + str_items.append(" fds: {}".format(ndfs)) + + conns = get_proc_attrib(p.connections) + if conns and len(conns) > 1: + str_items.append(" num con: {}".format(len(conns))) + + ths = get_proc_attrib(p.num_threads) + if ths and ths > 1: + str_items.append(" threads: {}".format(ths)) + + str_items.append("\n") + except psutil.Error: + name = "?" + str_items.append("({}, '{}')\n".format(parent, name)) + + if parent not in tree: + continue + + child = tree[parent][-1] + stack.append((child, indent + " ", indent + "`_ ")) + + children = tree[parent][:-1] + children.reverse() + for child in children: + stack.append((child, indent + "| ", indent + "|- ")) + + return "".join(str_items) + + +@safe("network info") +def _network_conn_str(): + str_items = list() + + counters = psutil.net_io_counters() + str_items.append( + "\nPackSent: {} PackRecv: {} ErrIn: {} ErrOut: {} DropIn: {} DropOut: {}\n\n".format( + counters.packets_sent, + counters.packets_recv, + counters.errin, + counters.errout, + counters.dropin, + counters.dropout, + ) + ) + + ifaces = psutil.net_if_addrs() + conns = psutil.net_connections() + list_ip = collections.defaultdict(list) + for con in conns: + list_ip[con.laddr.ip].append(con) + + for name, addrs in ifaces.iteritems(): + str_items.append("{}:\n".format(name)) + + for ip in addrs: + str_items.append(" {}".format(ip.address)) + if ip.netmask: + str_items.append(" mask={}".format(ip.netmask)) + if ip.broadcast: + str_items.append(" bc={}".format(ip.broadcast)) + str_items.append("\n") + + for con in list_ip[ip.address]: + str_items.append(" {}".format(con.laddr.port)) + if con.raddr: + str_items.append(" <--> {} : {}".format(con.raddr.ip, con.raddr.port)) + str_items.append(" (stat: {}".format(con.status)) + if con.pid: + str_items.append(" proc: {} (pid={})".format(psutil.Process(con.pid).exe(), con.pid)) + str_items.append(")\n") + + del list_ip[ip.address] + + str_items.append("***\n") + for ip, conns in list_ip.iteritems(): + str_items.append(" {}\n".format(ip)) + + for con in conns: + str_items.append(" {}".format(con.laddr.port)) + if con.raddr: + str_items.append(" <--> {} : {}".format(con.raddr.ip, con.raddr.port)) + str_items.append(" (stat: {}".format(con.status)) + if con.pid: + str_items.append(" proc: {} (pid={})".format(psutil.Process(con.pid).exe(), con.pid)) + str_items.append(")\n") + + return "".join(str_items) + + +@safe("info") +def get_system_info(): + str_items = list() + + str_items.append("\n --- CPU MEM --- \n") + str_items.append(_cpu_mem_str()) + str_items.append("\n") + + str_items.append("\n --- PROCESSES TREE --- \n") + str_items.append(_proc_tree_str()) + str_items.append("\n") + + str_items.append("\n --- NETWORK INFO --- \n") + str_items.append(_network_conn_str()) + str_items.append("\n") + + return "".join(str_items) diff --git a/library/python/testing/system_info/ya.make b/library/python/testing/system_info/ya.make new file mode 100644 index 0000000000..f655db8ebe --- /dev/null +++ b/library/python/testing/system_info/ya.make @@ -0,0 +1,15 @@ +PY23_LIBRARY() + +PY_SRCS(__init__.py) + +PEERDIR( + contrib/python/psutil +) + +STYLE_PYTHON() + +END() + +RECURSE_FOR_TESTS( + test +) |