diff options
author | Maxim Yurchuk <maxim-yurchuk@ydb.tech> | 2025-02-11 09:08:56 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-02-11 12:08:56 +0300 |
commit | a5fc6d865de01f57efb0f42655c5e8032c3b5e5e (patch) | |
tree | c8313eb3f9ae2468a42d6e2103570a9887b73f8c | |
parent | b829f5309adceb8416226d33a87297f0b9f4d7ef (diff) | |
download | ydb-a5fc6d865de01f57efb0f42655c5e8032c3b5e5e.tar.gz |
Remove local_stats_drawing (#14418)
-rw-r--r-- | ydb/public/tools/task_stats_drawing/__main__.py | 231 | ||||
-rw-r--r-- | ydb/public/tools/task_stats_drawing/ya.make | 10 | ||||
-rw-r--r-- | ydb/public/tools/ya.make | 1 |
3 files changed, 0 insertions, 242 deletions
diff --git a/ydb/public/tools/task_stats_drawing/__main__.py b/ydb/public/tools/task_stats_drawing/__main__.py deleted file mode 100644 index 4d1430ea81..0000000000 --- a/ydb/public/tools/task_stats_drawing/__main__.py +++ /dev/null @@ -1,231 +0,0 @@ -import argparse -import json -import matplotlib.pyplot as plt -import pandas as pd - - -def take_sort_key(t): - return t.finish - - -def take_node_sort_key(n): - return n.max_task_finished() - - -class PlanNode: - plan_node_idx = 0 - - def __init__(self, name): - self.nodes = {} - self.nodes_list = [] - self.name = name - self.sub_nodes = [] - - def build_sub_node(self, name): - node = PlanNode(name) - self.sub_nodes.append(node) - return node - - def prepare(self): - for i in self.sub_nodes: - i.prepare() - for n_id, n in self.nodes.items(): - n.tasks.sort(key=take_sort_key) - self.nodes_list.sort(key=take_node_sort_key) - - def get_plans_with_tasks(self): - result = [] - if (len(self.nodes) > 0): - result.append(self) - for i in self.sub_nodes: - sub_result = i.get_plans_with_tasks() - for t in sub_result: - result.append(t) - return result - - def min_task_start(self): - start = 0 - first = True - for id, n in self.nodes.items(): - for t in n.tasks: - if (first or start > t.start): - start = t.start - first = False - - for i in self.sub_nodes: - sub_start = i.min_task_start() - if (sub_start == 0): - continue - if (first or sub_start < start): - start = sub_start - first = False - return start - - -class Node: - node_id = 0 - shards_count = 0 - - def __init__(self, node_id, host): - self.tasks = [] - self.node_id = node_id - self.host = host - self.finished = 0 - - def max_task_finished(self): - if (self.finished == 0): - for t in self.tasks: - if (self.finished < t.finish): - self.finished = t.finish - return self.finished - - -class Task: - task_id = '' - node_id = 0 - full_executor_id = '' - executor_id = '' - executor = '' - start = 0 - finish = 0 - pending = 0 - compute = 0 - waiting = 0 - stage = 0 - host = '' - - def parse(self, t, stage): - self.task_id = t["TaskId"] - self.node_id = t["NodeId"] - self.host = t["Host"] - self.executor_id = t["Host"].split('.')[0] + "::" + str(t["NodeId"]) - self.full_executor_id = str(stage) + "::" + t["Host"].split('.')[0] + \ - "::" + str(t["NodeId"]) - self.executor = self.executor_id + "::" + str(t["TaskId"]) - self.start = t["StartTimeMs"] - self.finish = t["FinishTimeMs"] - self.pending = t["PendingInputTimeUs"]/1000 - self.compute = t["ComputeTimeUs"]/1000 - if ("WaitTimeUs" in t): - self.waiting = t["WaitTimeUs"]/1000 - else: - self.waiting = 0 - self.stage = stage - - -def rgb_to_hex(rgb): - return '#%02x%02x%02x' % rgb - - -def scan_json(plan_node, json, current_stack): - current_stack.append(json["Node Type"]) - plan_node.name = "::".join(current_stack) - if ("Plans" in json): - for i in json["Plans"]: - sub_stage = plan_node.build_sub_node("") - scan_json(sub_stage, i, current_stack) - if ("Stats" in json): - if ("ComputeNodes" in json["Stats"]): - for n in json["Stats"]["ComputeNodes"]: - if ("Tasks" not in n): - continue - json_tasks = n["Tasks"] - for j_task in json_tasks: - task = Task() - task.parse(j_task, plan_node.plan_node_idx) - if (task.node_id not in plan_node.nodes): - node = Node(task.node_id, task.host) - plan_node.nodes[task.node_id] = node - plan_node.nodes_list.append(node) - plan_node.nodes[task.node_id].tasks.append(task) - if ("NodesScanShards" in json["Stats"] and - not json["Stats"]["NodesScanShards"] is None): - for n in json["Stats"]["NodesScanShards"]: - if (n["node_id"] in plan_node.nodes): - plan_node.nodes[n["node_id"]].shards_count = \ - n["shards_count"] - current_stack = current_stack.pop() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='arguments for profiler') - parser.add_argument('stats_path', type=str, help='path for profile') - args = parser.parse_args() - - stats_path = args.stats_path - json_started = False - json_string = "" - with open(stats_path, "r", encoding="utf-8") as f: - json_string += f.readline() - json_profile = json.loads(json_string) - - plan_node = PlanNode("") - scan_json(plan_node, json_profile["Plans"][0], []) - plan_node.prepare() - start = plan_node.min_task_start() - plans = plan_node.get_plans_with_tasks() - - tasks = {'executor_id': [], 'y': [], 'start': [], 'finish': [], - 'pending': [], 'compute': [], 'waiting': [], 'task_id': []} - - tasks_count = 0 - max_time = 0 - y_shift = 0 - legend_y = {'pos': [], 'label': []} - tasks_count_by_node = {} - s_pred_name = "" - for s in plans: - stage = s.name - for n in s.nodes_list: - legend_y['pos'].append(y_shift - 0.5 * len(n.tasks)) - label = "" - if (s.name != s_pred_name): - label = s.name + "\n" - label = label + n.host + "\n" + str(n.node_id) + \ - "/t=" + str(len(n.tasks)) - if (n.shards_count): - label = label + "/sh=" + str(n.shards_count) - legend_y['label'].append(label) - count_in_executor = 1 - for t in n.tasks: - tasks['task_id'].append(t.task_id) - executor_id = t.executor_id - if (not (stage in tasks_count_by_node)): - tasks_count_by_node[stage] = {} - tasks_count_by_stage = tasks_count_by_node[stage] - if (executor_id not in tasks_count_by_stage): - tasks_count_by_node[stage][executor_id] = 0 - tasks_count_by_node[stage][executor_id] = \ - tasks_count_by_node[stage][executor_id] + 1 - - y_shift = y_shift - 1 - - tasks['executor_id'].append(t.executor_id) - tasks['start'].append(t.start - start) - tasks['finish'].append(t.finish - start) - tasks['pending'].append(t.pending) - tasks['compute'].append(t.compute) - tasks['waiting'].append(t.waiting) - tasks['y'].append(y_shift) - tasks_count = tasks_count + 1 - max_time = max(max_time, t.finish - start) - y_shift -= 5 - s_pred_name = s.name - y_shift -= 5 - - df = pd.DataFrame(tasks) - print(tasks_count_by_node) - print(df) - - fig, ax = plt.subplots(1, figsize=(20, 6)) - ax.set_yticks(legend_y['pos']) - ax.set_yticklabels(legend_y['label']) - ax.barh(y=df.y, left=df.start, width=df.finish-df.start, - color="#0e0c0c", alpha=1, label="other") - ax.barh(y=df.y, left=df.start, width=df.waiting, - color="#001eff", alpha=1, label="waiting") - ax.barh(y=df.y, left=df.start+df.waiting, width=df.compute, - color="#ff0000", alpha=1, label="compute") - ax.legend() - - plt.savefig('image.png', bbox_inches='tight') diff --git a/ydb/public/tools/task_stats_drawing/ya.make b/ydb/public/tools/task_stats_drawing/ya.make deleted file mode 100644 index 4af2b6f91a..0000000000 --- a/ydb/public/tools/task_stats_drawing/ya.make +++ /dev/null @@ -1,10 +0,0 @@ -PY3_PROGRAM(task_stats_drawing) - -PY_SRCS(__main__.py) - -PEERDIR( - contrib/python/matplotlib - contrib/python/pandas -) - -END() diff --git a/ydb/public/tools/ya.make b/ydb/public/tools/ya.make index 1ff35d50f3..3242a012dc 100644 --- a/ydb/public/tools/ya.make +++ b/ydb/public/tools/ya.make @@ -1,6 +1,5 @@ RECURSE( lib local_ydb - task_stats_drawing ydb_recipe ) |