aboutsummaryrefslogtreecommitdiffstats
path: root/library/python/cores
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/python/cores
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/python/cores')
-rw-r--r--library/python/cores/__init__.py193
-rw-r--r--library/python/cores/ya.make15
2 files changed, 208 insertions, 0 deletions
diff --git a/library/python/cores/__init__.py b/library/python/cores/__init__.py
new file mode 100644
index 0000000000..fdb1f82a46
--- /dev/null
+++ b/library/python/cores/__init__.py
@@ -0,0 +1,193 @@
+# coding: utf-8
+
+import os
+import re
+import glob
+import socket
+import logging
+import platform
+import subprocess
+
+import six
+
+from library.python.reservoir_sampling import reservoir_sampling
+
+
+logger = logging.getLogger(__name__)
+
+
+def _read_file(filename):
+ with open(filename) as afile:
+ return afile.read().strip("\n")
+
+
+def recover_core_dump_file(binary_path, cwd, pid):
+
+ class CoreFilePattern(object):
+ def __init__(self, path, mask):
+ self.path = path
+ self.mask = mask
+
+ cwd = cwd or os.getcwd()
+ system = platform.system().lower()
+ if system.startswith("linux"):
+ import stat
+ import resource
+
+ logger.debug("hostname = '%s'", socket.gethostname())
+ logger.debug("rlimit_core = '%s'", str(resource.getrlimit(resource.RLIMIT_CORE)))
+ core_pattern = _read_file("/proc/sys/kernel/core_pattern")
+ logger.debug("core_pattern = '%s'", core_pattern)
+ if core_pattern.startswith("/"):
+ default_pattern = CoreFilePattern(os.path.dirname(core_pattern), '*')
+ else:
+ default_pattern = CoreFilePattern(cwd, '*')
+
+ def resolve_core_mask(core_mask):
+ def resolve(text):
+ if text == "%p":
+ return str(pid)
+ elif text == "%e":
+ # https://github.com/torvalds/linux/blob/7876320f88802b22d4e2daf7eb027dd14175a0f8/include/linux/sched.h#L847
+ # https://github.com/torvalds/linux/blob/7876320f88802b22d4e2daf7eb027dd14175a0f8/fs/coredump.c#L278
+ return os.path.basename(binary_path)[:15]
+ elif text == "%E":
+ return binary_path.replace("/", "!")
+ elif text == "%%":
+ return "%"
+ elif text.startswith("%"):
+ return "*"
+ return text
+
+ parts = filter(None, re.split(r"(%.)", core_mask))
+ return "".join([resolve(p) for p in parts])
+
+ # don't interpret a program for piping core dumps as a pattern
+ if core_pattern and not core_pattern.startswith("|"):
+ default_pattern.mask = os.path.basename(core_pattern)
+ else:
+ core_uses_pid = int(_read_file("/proc/sys/kernel/core_uses_pid"))
+ logger.debug("core_uses_pid = '%d'", core_uses_pid)
+ if core_uses_pid == 0:
+ default_pattern.mask = "core"
+ else:
+ default_pattern.mask = "core.%p"
+
+ # widely distributed core dump dir and mask (see DEVTOOLS-4408)
+ yandex_pattern = CoreFilePattern('/coredumps', '%e.%p.%s')
+ yandex_market_pattern = CoreFilePattern('/var/tmp/cores', 'core.%..%e.%s.%p.*')
+
+ for pattern in [default_pattern, yandex_pattern, yandex_market_pattern]:
+ pattern.mask = resolve_core_mask(pattern.mask)
+
+ if not os.path.exists(pattern.path):
+ logger.warning("Core dump dir doesn't exist: %s", pattern.path)
+ continue
+
+ logger.debug(
+ "Core dump dir (%s) permission mask: %s (expected: %s (%s-dir, %s-sticky bit))",
+ pattern.path,
+ oct(os.stat(pattern.path)[stat.ST_MODE]),
+ oct(stat.S_IFDIR | stat.S_ISVTX | 0o777),
+ oct(stat.S_IFDIR),
+ oct(stat.S_ISVTX),
+ )
+ logger.debug("Search for core dump files match pattern '%s' in '%s'", pattern.mask, pattern.path)
+ cores = glob.glob(os.path.join(pattern.path, pattern.mask))
+ files = os.listdir(pattern.path)
+ logger.debug(
+ "Matched core dump files (%d/%d): [%s] (mismatched samples: %s)",
+ len(cores),
+ len(files),
+ ", ".join(cores),
+ ", ".join(reservoir_sampling(files, 5)),
+ )
+
+ if len(cores) == 1:
+ return cores[0]
+ elif len(cores) > 1:
+ stat = [(filename, os.stat(filename).st_mtime) for filename in cores]
+ entry = sorted(stat, key=lambda x: x[1])[-1]
+ logger.debug("Latest core dump file: '%s' with %d mtime", entry[0], entry[1])
+ return entry[0]
+ else:
+ logger.debug("Core dump file recovering is not supported on '%s'", system)
+ return None
+
+
+def get_gdb_full_backtrace(binary, core, gdb_path):
+ cmd = [
+ gdb_path, binary, core,
+ "--eval-command", "set print thread-events off",
+ "--eval-command", "thread apply all backtrace full",
+ "--batch",
+ "--quiet",
+ ]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ output, stderr = proc.communicate()
+ output = six.ensure_str(output)
+ if stderr:
+ output += "\nstderr >>\n" + six.ensure_str(stderr)
+ return output
+
+
+def get_problem_stack(backtrace):
+ stack = []
+ found_thread1 = False
+ regex = re.compile(r'[Tt]hread (\d+)')
+
+ for line in backtrace.split("\n"):
+ match = regex.search(line)
+ if match:
+ if found_thread1:
+ break
+ if int(match.group(1)) == 1:
+ found_thread1 = True
+ if found_thread1:
+ stack.append(line)
+
+ if not stack:
+ return backtrace
+ return "\n".join(stack)
+
+
+# XXX
+def colorize_backtrace(text):
+ filters = [
+ # Function names and the class they belong to
+ (re.compile(r"^(#[0-9]+ .*?)([a-zA-Z0-9_:\.@]+)(\s?\()", flags=re.MULTILINE), r"\1[[c:cyan]]\2[[rst]]\3"),
+ # Function argument names
+ (re.compile(r"([a-zA-Z0-9_#]*)(\s?=\s?)"), r"[[c:green]]\1[[rst]]\2"),
+ # Stack frame number
+ (re.compile(r"^(#[0-9]+)", flags=re.MULTILINE), r"[[c:red]]\1[[rst]]"),
+ # Thread id colorization
+ (re.compile(r"^([ \*]) ([0-9]+)", flags=re.MULTILINE), r"[[c:light-cyan]]\1 [[c:red]]\2[[rst]]"),
+ # File path and line number
+ (re.compile(r"(\.*[/A-Za-z0-9\+_\.\-]*):(([0-9]+)(:[0-9]+)?)$", flags=re.MULTILINE), r"[[c:light-grey]]\1[[rst]]:[[c:magenta]]\2[[rst]]"),
+ # Addresses
+ (re.compile(r"\b(0x[a-f0-9]{6,})\b"), r"[[c:light-grey]]\1[[rst]]"),
+ ]
+
+ for regex, substitution in filters:
+ text = regex.sub(substitution, text)
+ return text
+
+
+def resolve_addresses(addresses, symbolizer, binary):
+ addresses = list(set(addresses))
+ cmd = [
+ symbolizer,
+ "-demangle",
+ "-obj",
+ binary,
+ ]
+ proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = proc.communicate(input="\n".join(addresses))
+ if proc.returncode:
+ raise Exception("Symbolizer failed with rc:{}\nstderr: {}".format(proc.returncode, err))
+
+ resolved = filter(None, out.split("\n\n"))
+ if len(addresses) != len(resolved):
+ raise Exception("llvm-symbolizer can not extract lines from addresses (count mismatch: {}-{})".format(len(addresses), len(resolved)))
+
+ return {k: v.strip(" \n") for k, v in zip(addresses, resolved)}
diff --git a/library/python/cores/ya.make b/library/python/cores/ya.make
new file mode 100644
index 0000000000..76264e9cce
--- /dev/null
+++ b/library/python/cores/ya.make
@@ -0,0 +1,15 @@
+OWNER(
+ prettyboy
+ g:yatest
+)
+
+PY23_LIBRARY()
+
+PY_SRCS(__init__.py)
+
+PEERDIR(
+ contrib/python/six
+ library/python/reservoir_sampling
+)
+
+END()