intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <arcadia-devtools@yandex-team.ru> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/python/cores
download: ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
2 files changed, 208 insertions, 0 deletions
diff --git a/library/python/cores/__init__.py b/library/python/cores/__init__.py
new file mode 100644
index 0000000000..fdb1f82a46
--- /dev/null
+++ b/library/python/cores/__init__.py
@@ -0,0 +1,193 @@
+# coding: utf-8
+
+import os
+import re
+import glob
+import socket
+import logging
+import platform
+import subprocess
+
+import six
+
+from library.python.reservoir_sampling import reservoir_sampling
+
+
+logger = logging.getLogger(__name__)
+
+
+def _read_file(filename):
+    with open(filename) as afile:
+        return afile.read().strip("\n")
+
+
+def recover_core_dump_file(binary_path, cwd, pid):
+
+    class CoreFilePattern(object):
+        def __init__(self, path, mask):
+            self.path = path
+            self.mask = mask
+
+    cwd = cwd or os.getcwd()
+    system = platform.system().lower()
+    if system.startswith("linux"):
+        import stat
+        import resource
+
+        logger.debug("hostname = '%s'", socket.gethostname())
+        logger.debug("rlimit_core = '%s'", str(resource.getrlimit(resource.RLIMIT_CORE)))
+        core_pattern = _read_file("/proc/sys/kernel/core_pattern")
+        logger.debug("core_pattern = '%s'", core_pattern)
+        if core_pattern.startswith("/"):
+            default_pattern = CoreFilePattern(os.path.dirname(core_pattern), '*')
+        else:
+            default_pattern = CoreFilePattern(cwd, '*')
+
+        def resolve_core_mask(core_mask):
+            def resolve(text):
+                if text == "%p":
+                    return str(pid)
+                elif text == "%e":
+                    # https://github.com/torvalds/linux/blob/7876320f88802b22d4e2daf7eb027dd14175a0f8/include/linux/sched.h#L847
+                    # https://github.com/torvalds/linux/blob/7876320f88802b22d4e2daf7eb027dd14175a0f8/fs/coredump.c#L278
+                    return os.path.basename(binary_path)[:15]
+                elif text == "%E":
+                    return binary_path.replace("/", "!")
+                elif text == "%%":
+                    return "%"
+                elif text.startswith("%"):
+                    return "*"
+                return text
+
+            parts = filter(None, re.split(r"(%.)", core_mask))
+            return "".join([resolve(p) for p in parts])
+
+        # don't interpret a program for piping core dumps as a pattern
+        if core_pattern and not core_pattern.startswith("|"):
+            default_pattern.mask = os.path.basename(core_pattern)
+        else:
+            core_uses_pid = int(_read_file("/proc/sys/kernel/core_uses_pid"))
+            logger.debug("core_uses_pid = '%d'", core_uses_pid)
+            if core_uses_pid == 0:
+                default_pattern.mask = "core"
+            else:
+                default_pattern.mask = "core.%p"
+
+        # widely distributed core dump dir and mask (see DEVTOOLS-4408)
+        yandex_pattern = CoreFilePattern('/coredumps', '%e.%p.%s')
+        yandex_market_pattern = CoreFilePattern('/var/tmp/cores', 'core.%..%e.%s.%p.*')
+
+        for pattern in [default_pattern, yandex_pattern, yandex_market_pattern]:
+            pattern.mask = resolve_core_mask(pattern.mask)
+
+            if not os.path.exists(pattern.path):
+                logger.warning("Core dump dir doesn't exist: %s", pattern.path)
+                continue
+
+            logger.debug(
+                "Core dump dir (%s) permission mask: %s (expected: %s (%s-dir, %s-sticky bit))",
+                pattern.path,
+                oct(os.stat(pattern.path)[stat.ST_MODE]),
+                oct(stat.S_IFDIR | stat.S_ISVTX | 0o777),
+                oct(stat.S_IFDIR),
+                oct(stat.S_ISVTX),
+            )
+            logger.debug("Search for core dump files match pattern '%s' in '%s'", pattern.mask, pattern.path)
+            cores = glob.glob(os.path.join(pattern.path, pattern.mask))
+            files = os.listdir(pattern.path)
+            logger.debug(
+                "Matched core dump files (%d/%d): [%s] (mismatched samples: %s)",
+                len(cores),
+                len(files),
+                ", ".join(cores),
+                ", ".join(reservoir_sampling(files, 5)),
+            )
+
+            if len(cores) == 1:
+                return cores[0]
+            elif len(cores) > 1:
+                stat = [(filename, os.stat(filename).st_mtime) for filename in cores]
+                entry = sorted(stat, key=lambda x: x[1])[-1]
+                logger.debug("Latest core dump file: '%s' with %d mtime", entry[0], entry[1])
+                return entry[0]
+    else:
+        logger.debug("Core dump file recovering is not supported on '%s'", system)
+    return None
+
+
+def get_gdb_full_backtrace(binary, core, gdb_path):
+    cmd = [
+        gdb_path, binary, core,
+        "--eval-command", "set print thread-events off",
+        "--eval-command", "thread apply all backtrace full",
+        "--batch",
+        "--quiet",
+    ]
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    output, stderr = proc.communicate()
+    output = six.ensure_str(output)
+    if stderr:
+        output += "\nstderr >>\n" + six.ensure_str(stderr)
+    return output
+
+
+def get_problem_stack(backtrace):
+    stack = []
+    found_thread1 = False
+    regex = re.compile(r'[Tt]hread (\d+)')
+
+    for line in backtrace.split("\n"):
+        match = regex.search(line)
+        if match:
+            if found_thread1:
+                break
+            if int(match.group(1)) == 1:
+                found_thread1 = True
+        if found_thread1:
+            stack.append(line)
+
+    if not stack:
+        return backtrace
+    return "\n".join(stack)
+
+
+# XXX
+def colorize_backtrace(text):
+    filters = [
+        # Function names and the class they belong to
+        (re.compile(r"^(#[0-9]+ .*?)([a-zA-Z0-9_:\.@]+)(\s?\()", flags=re.MULTILINE), r"\1[[c:cyan]]\2[[rst]]\3"),
+        # Function argument names
+        (re.compile(r"([a-zA-Z0-9_#]*)(\s?=\s?)"), r"[[c:green]]\1[[rst]]\2"),
+        # Stack frame number
+        (re.compile(r"^(#[0-9]+)", flags=re.MULTILINE), r"[[c:red]]\1[[rst]]"),
+        # Thread id colorization
+        (re.compile(r"^([ \*]) ([0-9]+)", flags=re.MULTILINE), r"[[c:light-cyan]]\1 [[c:red]]\2[[rst]]"),
+        # File path and line number
+        (re.compile(r"(\.*[/A-Za-z0-9\+_\.\-]*):(([0-9]+)(:[0-9]+)?)$", flags=re.MULTILINE), r"[[c:light-grey]]\1[[rst]]:[[c:magenta]]\2[[rst]]"),
+        # Addresses
+        (re.compile(r"\b(0x[a-f0-9]{6,})\b"), r"[[c:light-grey]]\1[[rst]]"),
+    ]
+
+    for regex, substitution in filters:
+        text = regex.sub(substitution, text)
+    return text
+
+
+def resolve_addresses(addresses, symbolizer, binary):
+    addresses = list(set(addresses))
+    cmd = [
+        symbolizer,
+        "-demangle",
+        "-obj",
+        binary,
+    ]
+    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    out, err = proc.communicate(input="\n".join(addresses))
+    if proc.returncode:
+        raise Exception("Symbolizer failed with rc:{}\nstderr: {}".format(proc.returncode, err))
+
+    resolved = filter(None, out.split("\n\n"))
+    if len(addresses) != len(resolved):
+        raise Exception("llvm-symbolizer can not extract lines from addresses (count mismatch: {}-{})".format(len(addresses), len(resolved)))
+
+    return {k: v.strip(" \n") for k, v in zip(addresses, resolved)}
diff --git a/library/python/cores/ya.make b/library/python/cores/ya.make
new file mode 100644
index 0000000000..76264e9cce
--- /dev/null
+++ b/library/python/cores/ya.make
@@ -0,0 +1,15 @@
+OWNER(
+    prettyboy
+    g:yatest
+)
+
+PY23_LIBRARY()
+
+PY_SRCS(__init__.py)
+
+PEERDIR(
+    contrib/python/six
+    library/python/reservoir_sampling
+)
+
+END()
author	Devtools Arcadia <arcadia-devtools@yandex-team.ru>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/python/cores
download	ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz