diff options
author | zverevgeny <[email protected]> | 2025-05-18 19:39:10 +0300 |
---|---|---|
committer | GitHub <[email protected]> | 2025-05-18 19:39:10 +0300 |
commit | cda6dc1646e590def4d9beee8fccb76843ca1151 (patch) | |
tree | f75cd8d8b9eda58690aed7f51e33433574a9676b | |
parent | dee22ec17f9818abef2a8bfb2a2ad405c97eabb9 (diff) |
ya.make for visualize_portions (#18336)
-rw-r--r-- | ydb/core/tx/columnshard/tools/visualize_portions/__main__.py | 66 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/tools/visualize_portions/ya.make | 13 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/ya.make | 1 |
3 files changed, 56 insertions, 24 deletions
diff --git a/ydb/core/tx/columnshard/tools/visualize_portions/__main__.py b/ydb/core/tx/columnshard/tools/visualize_portions/__main__.py index 0beeee8e397..662f014fd6a 100644 --- a/ydb/core/tx/columnshard/tools/visualize_portions/__main__.py +++ b/ydb/core/tx/columnshard/tools/visualize_portions/__main__.py @@ -1,11 +1,12 @@ import matplotlib.pyplot as plt import enum import sys -import matplotlib.dates as mdates import argparse from datetime import datetime, timezone -from matplotlib.patches import Rectangle -from matplotlib.collections import PatchCollection +from matplotlib import dates, patches, collections +from importlib import metadata +from packaging import version + import json @@ -53,18 +54,18 @@ class Portion: def __repr__(self): return f"pk:[{self.PkMin}..{self.PkMax}], plan_step:[{self.PlanStepMin}..{self.PlanStepMax}]" - def ToRectangle(self, levelColors) -> Rectangle: + def ToRectangle(self, levelColors) -> patches.Rectangle: if self.Pk0Type == FirstPkColumnType.Integer: x0 = self.PkMin dx = self.PkMax - self.PkMin elif self.Pk0Type == FirstPkColumnType.Timestamp: - x0 = mdates.date2num(self.PkMin) - dx = mdates.date2num(self.PkMax) - mdates.date2num(self.PkMin) + x0 = dates.date2num(self.PkMin) + dx = dates.date2num(self.PkMax) - dates.date2num(self.PkMin) else: raise Exception("Unsupported PK column type") - y0 = mdates.date2num(self.PlanStepMin) - dy = mdates.date2num(self.PlanStepMax) - mdates.date2num(self.PlanStepMin) - return Rectangle( + y0 = dates.date2num(self.PlanStepMin) + dy = dates.date2num(self.PlanStepMax) - dates.date2num(self.PlanStepMin) + return patches.Rectangle( (x0, y0), dx, dy, @@ -107,11 +108,12 @@ def ParsePortionStatFile(path: str, pk0type: FirstPkColumnType) -> Portions: def GetLevelColours(maxLevel): - levelColors = {l: 'blue' for l in range(portions.MaxCompactionLevel + 1)} + levelColors = {level: 'blue' for level in range(portions.MaxCompactionLevel + 1)} levelColors[maxLevel] = 'green' levelColors[0] = 'red' return levelColors + def GetIntersections(portions): points = [] for p in portions: @@ -119,7 +121,7 @@ def GetIntersections(portions): points.append((p.PkMax, -1)) points.sort(key=lambda p: p[0]) - + intersections = [] prevPk = points[0][0] cur = 0 @@ -127,7 +129,7 @@ def GetIntersections(portions): for p in points[1:]: cur += p[1] maxIntersections = max(maxIntersections, cur) - r = Rectangle( + r = patches.Rectangle( (prevPk, 0), p[0]-prevPk, cur, linestyle="dashed", @@ -138,6 +140,16 @@ def GetIntersections(portions): return intersections, maxIntersections +def get_interactive_backends(): + print(f"matplotlib version: {metadata.version("matplotlib")}") + if version.Version(metadata.version("matplotlib")) < version.Version("3.9"): + from matplotlib import rcsetup + return rcsetup.interactive_bk + else: + from matplotlib import backends + return backends.backend_registry.list_builtin(backends.BackendFilter.INTERACTIVE) + + if __name__ == '__main__': parser = argparse.ArgumentParser("""Visualize portions from YDB Column table. To get portion info for a table, use ydb cli: @@ -149,6 +161,14 @@ To get portion info for a table, use ydb cli: args = parser.parse_args() inputFile = args.input_file pk0Type = FirstPkColumnType[args.type] + + if args.output_file is None: + if plt.get_backend() not in get_interactive_backends(): + print("""No interactive rendering backend is available. Only output to file mode can be used(--output-file). +Or you can run this script in some environment with installed interactive backend, i.e venv +""") + sys.exit(1) + print(f"Loading file: {inputFile}...") portions = ParsePortionStatFile(inputFile, pk0Type) print(f"Loading file: {inputFile}... completed, {len(portions.Portions)} portions") @@ -160,10 +180,10 @@ To get portion info for a table, use ydb cli: for i in [0, 1]: ax[i].set_xlabel("pk") if pk0Type == FirstPkColumnType.Timestamp: - ax[i].xaxis.set_major_locator(mdates.AutoDateLocator()) - ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) - xMin = mdates.date2num(portions.PkMin) - xMax = mdates.date2num(portions.PkMax) + ax[i].xaxis.set_major_locator(dates.AutoDateLocator()) + ax[i].xaxis.set_major_formatter(dates.DateFormatter('%Y-%m-%d')) + xMin = dates.date2num(portions.PkMin) + xMax = dates.date2num(portions.PkMax) elif pk0Type == FirstPkColumnType.Integer: xMin = portions.PkMin xMax = portions.PkMax @@ -172,22 +192,20 @@ To get portion info for a table, use ydb cli: dx = xMax - xMin ax[i].set_xlim(xMin - 0.05 * dx, xMax + 0.05 * dx) - ax[0].set_title("Column table portions") - ax[0].add_collection(PatchCollection(rectangles, match_original=True)) - + ax[0].add_collection(collections.PatchCollection(rectangles, match_original=True)) ax[0].set_ylabel("plan_step") - ax[0].yaxis.set_major_locator(mdates.AutoDateLocator()) - ax[0].yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S\n')) - yMin = mdates.date2num(portions.PlanStepMin) - yMax = mdates.date2num(portions.PlanStepMax) + ax[0].yaxis.set_major_locator(dates.AutoDateLocator()) + ax[0].yaxis.set_major_formatter(dates.DateFormatter('%Y-%m-%d %H:%M:%S\n')) + yMin = dates.date2num(portions.PlanStepMin) + yMax = dates.date2num(portions.PlanStepMax) dy = yMax - yMin ax[0].set_ylim(yMin - 0.05 * dy, yMax + 0.05 * dy) intersections, maxIntersections = GetIntersections(portions.Portions) ax[1].set_title("Portion intersections") - ax[1].add_collection(PatchCollection(intersections, match_original=True)) + ax[1].add_collection(collections.PatchCollection(intersections, match_original=True)) ax[1].set_ylabel("intersection") ax[1].set_ylim(0, maxIntersections * 1.1 + 1) diff --git a/ydb/core/tx/columnshard/tools/visualize_portions/ya.make b/ydb/core/tx/columnshard/tools/visualize_portions/ya.make new file mode 100644 index 00000000000..70a26ecf3f2 --- /dev/null +++ b/ydb/core/tx/columnshard/tools/visualize_portions/ya.make @@ -0,0 +1,13 @@ +PY3_PROGRAM(visualize_portions) + +PY_SRCS( + __main__.py +) + +PEERDIR( + contrib/python/matplotlib +) + +END() + + diff --git a/ydb/core/tx/columnshard/ya.make b/ydb/core/tx/columnshard/ya.make index f945d8d4fa9..32e9059f7d9 100644 --- a/ydb/core/tx/columnshard/ya.make +++ b/ydb/core/tx/columnshard/ya.make @@ -88,6 +88,7 @@ END() RECURSE( engines splitter + tools/visualize_portions ) RECURSE_FOR_TESTS( |