summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaxim Yurchuk <[email protected]>2024-08-08 16:07:17 +0300
committerGitHub <[email protected]>2024-08-08 16:07:17 +0300
commit7bc66166cceacef1433024d3125d45597b267d1c (patch)
tree7815f1a7500a13148702fac0ac841067b65864ea
parent423d2b79061b3b9a0728809e62dea5b4a3584ba3 (diff)
Use common tool in treemap view (#7471)
-rwxr-xr-xydb/ci/build_bloat/main.py208
-rwxr-xr-xydb/ci/build_bloat/template_bloat.py116
-rwxr-xr-xydb/ci/build_bloat/tree_map.py81
3 files changed, 224 insertions, 181 deletions
diff --git a/ydb/ci/build_bloat/main.py b/ydb/ci/build_bloat/main.py
index a7134bf2efd..58183ea5ecf 100755
--- a/ydb/ci/build_bloat/main.py
+++ b/ydb/ci/build_bloat/main.py
@@ -1,12 +1,13 @@
#!/usr/bin/env python3
import argparse
+import copy
import json
from functools import partial
import os
from concurrent.futures import ProcessPoolExecutor
-from jinja2 import Environment, FileSystemLoader, StrictUndefined
+import tree_map
HEADER_COMPILE_TIME_TO_SHOW = 0.5 # sec
@@ -57,43 +58,6 @@ def get_compile_duration_and_cpp_path(time_trace_path: str) -> tuple[float, str,
return duration_us / 1e6, cpp_file, time_trace_path
-def add_to_tree(chunks: list[tuple[str, str]], value: int, tree: dict) -> None:
- tree["name"] = chunks[0][0]
- tree["type"] = chunks[0][1]
- if len(chunks) == 1:
- tree["size"] = value
- else:
- if "children" not in tree:
- tree["children"] = []
- for child_ in tree["children"]:
- if child_["name"] == chunks[1][0]:
- child = child_
- break
-
- else:
- child = {"name": chunks[1][0]}
- tree["children"].append(child)
- add_to_tree(chunks[1:], value, child)
-
-
-def propogate_area(tree):
- area = 0
- for child_ in tree.get("children", []):
- propogate_area(child_)
- area += child_["size"]
-
- if "size" not in tree:
- tree["size"] = area
-
-
-def enrich_names_with_sec(tree):
- area = 0
- for child_ in tree.get("children", []):
- enrich_names_with_sec(child_)
-
- tree["name"] = tree["name"] + " " + "{:_} ms".format(tree["size"])
-
-
def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> list:
with open(path) as f:
obj = json.load(f)
@@ -110,26 +74,74 @@ def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> l
include_events.sort(key=lambda event: (event[0], -event[1]))
- path_to_time = {}
- current_includes_stack = [] # stack
- last_time_stamp = None
-
- result = []
+ tree_path_to_sum_duration = {}
+ current_includes_stack = []
for time_stamp, ev, path, duration in include_events:
- if current_includes_stack:
- last_path = current_includes_stack[-1]
- prev = path_to_time.get(last_path, 0)
- path_to_time[last_path] = prev + (time_stamp - last_time_stamp) / 1000 / 1000
-
if ev == 1:
current_includes_stack.append(sanitize_path(path, base_src_dir))
- if duration > HEADER_COMPILE_TIME_TO_SHOW * 1000 * 1000:
- result.append((current_includes_stack[:], duration))
+ tree_path = tuple(current_includes_stack)
+ prev = tree_path_to_sum_duration.get(tree_path, 0)
+ tree_path_to_sum_duration[tree_path] = prev + duration
else:
assert current_includes_stack[-1] == sanitize_path(path, base_src_dir)
current_includes_stack.pop()
- last_time_stamp = time_stamp
+
+ # filter small entities
+ tree_paths_to_include = set()
+ result = []
+ for tree_path, duration in tree_path_to_sum_duration.items():
+ if duration > HEADER_COMPILE_TIME_TO_SHOW * 1000 * 1000:
+ for i in range(1, len(tree_path) + 1):
+ tree_paths_to_include.add(tree_path[:i])
+
+ def add_to_tree(tree, tree_path, duration):
+ if len(tree_path) == 0:
+ tree["duration"] += duration
+ else:
+ if tree_path[0] not in tree["children"]:
+ tree["children"][tree_path[0]] = {
+ "duration": 0,
+ "children": {},
+ }
+ add_to_tree(tree["children"][tree_path[0]], tree_path[1:], duration)
+
+ tree = {"children": {}, "duration": 0}
+ for tree_path in tree_paths_to_include:
+ add_to_tree(tree, tree_path, tree_path_to_sum_duration[tree_path])
+
+ def print_tree(tree, padding):
+ for child, child_tree in tree["children"].items():
+ print(padding + child, child_tree["duration"])
+ print_tree(child_tree, padding + " ")
+
+ # handy for debug
+ # print_tree(tree,"")
+
+ # subtract children
+ def subtract_duration(tree):
+ if len(tree["children"]) == 0:
+ return tree["duration"]
+ else:
+ children_duration = 0
+ for child, child_tree in tree["children"].items():
+ children_duration += subtract_duration(child_tree)
+
+ tree["duration"] -= children_duration
+ return tree["duration"] + children_duration
+
+ subtract_duration(tree)
+
+ # collect result
+ result = []
+
+ def collect(tree, current_tree_path):
+ if current_tree_path:
+ result.append((current_tree_path[:], tree["duration"]))
+ for child, child_tree in tree["children"].items():
+ collect(child_tree, current_tree_path + [child])
+
+ collect(tree, [])
return result
@@ -163,14 +175,27 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str
cpp_compilation_times = []
total_compilation_time = 0.0
+ tree_paths = []
+
for duration, path, time_trace_path in result:
splitted = path.split(os.sep)
chunks = list(zip(splitted, (len(splitted) - 1) * ["dir"] + ["cpp"]))
- add_to_tree(chunks, int(duration * 1000), tree)
+ chunks = ["/"] + chunks
+ cpp_tree_path = [[chunk, "dir", 0] for chunk in splitted]
+ cpp_tree_path[-1][1] = "cpp"
+
+ cpp_tree_path_fixed_duration = copy.deepcopy(cpp_tree_path)
+ cpp_tree_path_fixed_duration[-1][2] = duration * 1000
+
include_tree = build_include_tree(time_trace_path, build_output_dir, base_src_dir)
+
for inc_path, inc_duration in include_tree:
- additional_chunks = list(zip(inc_path, "h" * len(inc_path)))
- add_to_tree(chunks + additional_chunks, inc_duration / 1000, tree)
+ include_tree_path = [[chunk, "h", 0] for chunk in inc_path]
+ include_tree_path[-1][2] = inc_duration / 1000
+ cpp_tree_path_fixed_duration[-1][2] -= include_tree_path[-1][2]
+ tree_paths.append(cpp_tree_path + include_tree_path)
+
+ tree_paths.append(cpp_tree_path_fixed_duration)
print("{} -> {:.2f}s".format(path, duration))
cpp_compilation_times.append(
{
@@ -179,6 +204,12 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str
}
)
total_compilation_time += duration
+ types = [
+ ("h", "Header", "#66C2A5"),
+ ("cpp", "Cpp", "#FC8D62"),
+ ("dir", "Dir", "#8DA0CB"),
+ ]
+ tree_map.generate_tree_map_html(result_dir, tree_paths, unit_name="ms", factor=1, types=types)
os.makedirs(result_dir, exist_ok=True)
@@ -190,11 +221,6 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str
with open(os.path.join(result_dir, "output.json"), "w") as f:
json.dump(human_readable_output, f, indent=4)
- propogate_area(tree)
- enrich_names_with_sec(tree)
-
- return tree
-
def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int, str]], dict]:
print("Processing includes in {}".format(trace_path))
@@ -310,14 +336,16 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:
tree = {}
headers_compile_duration = []
-
+ tree_paths = []
for duration, cnt, path in result:
path_chunks = path.split(os.sep)
path_chunks[-1] = path_chunks[-1] + " (total {} times)".format(cnt)
- path_chunks_count = len(path_chunks)
- chunks = list(zip(path_chunks, (path_chunks_count - 1) * ["dir"] + ["h"]))
- add_to_tree(chunks, int(duration * 1000), tree)
+ tree_path = [[chunk, "dir", 0] for chunk in path_chunks]
+ tree_path[-1][1] = "h"
+ tree_path[-1][2] = duration * 1000
print("{} -> {:.2f}s (aggregated {} times)".format(path, duration, cnt))
+ if duration > HEADER_COMPILE_TIME_TO_SHOW:
+ tree_paths.append(tree_path)
headers_compile_duration.append(
{
"path": path,
@@ -326,6 +354,13 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:
}
)
+ types = [
+ ("h", "Header", "#66C2A5"),
+ ("cpp", "Cpp", "#FC8D62"),
+ ("dir", "Dir", "#8DA0CB"),
+ ]
+ tree_map.generate_tree_map_html(result_dir, tree_paths, unit_name="ms", factor=1, types=types)
+
time_breakdown = {}
for path in total_time_breakdown:
@@ -352,10 +387,6 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:
with open(os.path.join(result_dir, "output.json"), "w") as f:
json.dump(human_readable_output, f, indent=4)
- propogate_area(tree)
- enrich_names_with_sec(tree)
-
- return tree
def parse_args():
@@ -391,44 +422,19 @@ will be generated in output_dir"""
def main():
args = parse_args()
- actions = []
-
- if args.html_dir_cpp:
- actions.append(("cpp build time impact", generate_cpp_bloat, args.html_dir_cpp))
-
- if args.html_dir_cpp:
- actions.append(("header build time impact", generate_header_bloat, args.html_dir_headers))
-
current_script_dir = os.path.dirname(os.path.realpath(__file__))
base_src_dir = os.path.normpath(os.path.join(current_script_dir, "../../.."))
# check we a in root of source tree
assert os.path.isfile(os.path.join(base_src_dir, "AUTHORS"))
- html_dir = os.path.join(current_script_dir, "html")
-
- for description, fn, output_path in actions:
- print("Performing '{}'".format(description))
- tree = fn(args.build_dir, output_path, base_src_dir)
-
- env = Environment(loader=FileSystemLoader(html_dir), undefined=StrictUndefined)
- types = [
- ("h", "Header", "#66C2A5"),
- ("cpp", "Cpp", "#FC8D62"),
- ("dir", "Dir", "#8DA0CB"),
- ]
- file_names = os.listdir(html_dir)
- os.makedirs(output_path, exist_ok=True)
- for file_name in file_names:
- data = env.get_template(file_name).render(types=types)
-
- dst_path = os.path.join(output_path, file_name)
- with open(dst_path, "w") as f:
- f.write(data)
-
- with open(os.path.join(output_path, "bloat.json"), "w") as f:
- f.write("var kTree = ")
- json.dump(tree, f, indent=4)
-
- print("Done '{}'".format(description))
+
+
+ if args.html_dir_cpp:
+ generate_cpp_bloat(args.build_dir, args.html_dir_cpp, base_src_dir)
+ print("Done '{}'".format("cpp build time impact"))
+ if args.html_dir_headers:
+ generate_header_bloat(args.build_dir, args.html_dir_headers, base_src_dir)
+ print("Done '{}'".format("header build time impact"))
+
if __name__ == "__main__":
diff --git a/ydb/ci/build_bloat/template_bloat.py b/ydb/ci/build_bloat/template_bloat.py
index 19892a07395..eb47e8583e7 100755
--- a/ydb/ci/build_bloat/template_bloat.py
+++ b/ydb/ci/build_bloat/template_bloat.py
@@ -1,30 +1,46 @@
#!/usr/bin/env python3
import argparse
import json
-import os
import sys
-from jinja2 import Environment, FileSystemLoader, StrictUndefined
+import tree_map
THRESHHOLD_TO_SHOW_ON_TREE_VIEW = 1024*10
def remove_brackets(name, b1, b2):
inside_template = 0
- final_name = ""
- for c in name:
+ final_name_builder = []
+ pos = 0
+ while pos != len(name):
+ pos_next_b1 = name.find(b1, pos)
+ pos_next_b2 = name.find(b2, pos)
+
+ pos_next = pos_next_b1
+ if pos_next == -1:
+ pos_next = pos_next_b2
+ elif pos_next_b2 != -1 and pos_next_b2 < pos_next:
+ pos_next = pos_next_b2
+
+ c = name[pos_next]
+
if c == b1:
inside_template += 1
if inside_template == 1:
- final_name += c
+ final_name_builder.append(name[pos:pos_next])
+
elif c == b2:
inside_template -= 1
if inside_template == 0:
- final_name += c
+ final_name_builder.append(c)
else:
- if inside_template:
- continue
- final_name += c
- return final_name
+ if inside_template == 0:
+ final_name_builder.append(name[pos:pos_next])
+
+ if pos_next == -1:
+ break
+ pos = pos_next + 1
+
+ return "".join(final_name_builder)
def get_aggregation_key(name):
final_name = name
@@ -89,51 +105,9 @@ def print_stat(f, d):
for s in sorted(p[2]):
print(" " + s, file=f)
-
-def add_to_tree(tree, path, value, count):
- tree["name"] = path[0]
- if "children" not in tree:
- tree["children"] = {}
- if len(path) == 1:
- # paths can be the same, but return value differs
- # assert "size" not in tree
- if "size" not in tree:
- tree["size"] = 0
- tree["size"] += value
- tree["type"] = "function"
- tree["count"] = count
- else:
- tree["type"] = "namespace"
- if path[1] not in tree["children"]:
- tree["children"][path[1]] = {}
- add_to_tree(tree["children"][path[1]], path[1:], value, count)
-
-def children_to_list(tree):
- if "children" not in tree:
- return
- tree["children"] = list(tree["children"].values())
- for child in tree["children"]:
- children_to_list(child)
-
-def propogate_size(tree):
- if "size" not in tree:
- tree["size"] = 0
- for child in tree.get("children", []):
- tree["size"] += propogate_size(child)
- return tree["size"]
-
-def enrich_names_with_sec(tree):
- area = 0
- for child_ in tree.get("children", []):
- enrich_names_with_sec(child_)
-
- tree["name"] = tree["name"] + " " + "{:_} KiB".format(int(tree["size"]/1024))
- if "count" in tree:
- tree["name"] += ", {} times".format(tree["count"])
-
-def build_tree(items):
- tree = {}
+def get_tree_paths(items):
total_size = 0
+ paths_to_add = []
for name, (size, count, obj_files, avg, min, max) in items:
# we skip small entities to order to make html view usable
if size < THRESHHOLD_TO_SHOW_ON_TREE_VIEW:
@@ -161,13 +135,12 @@ def build_tree(items):
root_name = "root (all function less than {} KiB are ommited)".format(THRESHHOLD_TO_SHOW_ON_TREE_VIEW // 1024)
path = [root_name] + path
-
- add_to_tree(tree, path, size, count)
- children_to_list(tree)
- propogate_size(tree)
- enrich_names_with_sec(tree)
- print("Total size =", total_size)
- return tree
+ path_with_info = [[chunk, "namespace", 0] for chunk in path]
+ path_with_info[-1][1] = "function"
+ path_with_info[-1][2] = size
+ path_with_info[-1][0] += ", {} times".format(count)
+ paths_to_add.append(path_with_info)
+ return paths_to_add
def parse_args():
@@ -194,7 +167,6 @@ def parse_args():
)
return parser.parse_args()
-
def main():
options = parse_args()
json_path = options.bloat_json
@@ -214,28 +186,12 @@ def main():
if options.html_template_bloat:
output_dir = options.html_template_bloat
- current_script_dir = os.path.dirname(os.path.realpath(__file__))
- html_dir = os.path.join(current_script_dir, "html")
-
- tree = build_tree(items)
-
- env = Environment(loader=FileSystemLoader(html_dir), undefined=StrictUndefined)
+ tree_paths = get_tree_paths(items)
types = [
("namespace", "Namespace", "#66C2A5"),
("function", "Function", "#FC8D62"),
]
- file_names = os.listdir(html_dir)
- os.makedirs(output_dir, exist_ok=True)
- for file_name in file_names:
- data = env.get_template(file_name).render(types=types)
-
- dst_path = os.path.join(output_dir, file_name)
- with open(dst_path, "w") as f:
- f.write(data)
-
- with open(os.path.join(output_dir, "bloat.json"), "w") as f:
- f.write("kTree = ")
- json.dump(tree, f, indent=4)
+ tree_map.generate_tree_map_html(output_dir, tree_paths, unit_name="KiB", factor=1.0/1024, types=types)
return 0
diff --git a/ydb/ci/build_bloat/tree_map.py b/ydb/ci/build_bloat/tree_map.py
new file mode 100755
index 00000000000..fc02b1ca6d9
--- /dev/null
+++ b/ydb/ci/build_bloat/tree_map.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+import json
+import os
+
+from jinja2 import Environment, FileSystemLoader, StrictUndefined
+
+def _add_to_tree(tree, path):
+ current_name, current_type, current_size = path[0]
+ tree["name"] = current_name
+ if "children" not in tree:
+ tree["children"] = {}
+ if "size" not in tree:
+ tree["size"] = 0
+
+ tree["size"] += current_size
+ tree["type"] = current_type
+
+ if len(path) == 1:
+ # paths can be the same, but return value differs
+ # assert "size" not in tree
+ pass
+ else:
+ next_name = path[1][0]
+ if next_name not in tree["children"]:
+ tree["children"][next_name] = {}
+ _add_to_tree(tree["children"][next_name], path[1:])
+
+def _children_to_list(tree):
+ if "children" not in tree:
+ return
+ tree["children"] = list(tree["children"].values())
+ for child in tree["children"]:
+ _children_to_list(child)
+
+def _propogate_size(tree):
+ for child in tree.get("children", []):
+ tree["size"] += _propogate_size(child)
+ return tree["size"]
+
+def _intify_size(tree):
+ for child in tree.get("children", []):
+ _intify_size(child)
+ tree["size"] = int(tree["size"])
+
+def _enrich_names_with_units(tree, unit_name, factor):
+ for child_ in tree.get("children", []):
+ _enrich_names_with_units(child_, unit_name, factor)
+
+ tree["name"] = tree["name"] + ", {:_} {}".format(int(tree["size"]*factor), unit_name)
+
+def _build_tree_map(paths_to_add, unit_name, factor):
+ tree = {}
+ for path in paths_to_add:
+ _add_to_tree(tree, path)
+ _children_to_list(tree)
+ _propogate_size(tree)
+ _intify_size(tree)
+ _enrich_names_with_units(tree, unit_name, factor)
+ return tree
+
+
+def generate_tree_map_html(output_dir: str, tree_paths: list[tuple[str, str, int]], unit_name: str, factor: float, types: list[tuple[str, str, str]]):
+ current_script_dir = os.path.dirname(os.path.realpath(__file__))
+ html_dir = os.path.join(current_script_dir, "html")
+
+ tree = _build_tree_map(tree_paths, unit_name, factor)
+
+ env = Environment(loader=FileSystemLoader(html_dir), undefined=StrictUndefined)
+ file_names = os.listdir(html_dir)
+ os.makedirs(output_dir, exist_ok=True)
+ for file_name in file_names:
+ data = env.get_template(file_name).render(types=types)
+
+ dst_path = os.path.join(output_dir, file_name)
+ with open(dst_path, "w") as f:
+ f.write(data)
+
+ with open(os.path.join(output_dir, "bloat.json"), "w") as f:
+ f.write("kTree = ")
+ json.dump(tree, f, indent=4)