diff options
author | Alexander Petrukhin <shmel1k@ydb.tech> | 2024-12-20 13:50:48 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-20 13:50:48 +0300 |
commit | 4ca7f5cf75693c2f379b190f621e681917d88952 (patch) | |
tree | d342fe2a4bfac6f35412a2863cf4ef4a26a76d35 | |
parent | f036a6c4feca5e538af0738c977b8620162051a9 (diff) | |
download | ydb-4ca7f5cf75693c2f379b190f621e681917d88952.tar.gz |
Feature/ydbd slice/allow slice creation using raw config yaml (#12762)
YdbWorkloadTopic.Full_Statistics_UseTx is muted now
-rw-r--r-- | ydb/tools/cfg/base.py | 24 | ||||
-rw-r--r-- | ydb/tools/cfg/static.py | 24 | ||||
-rw-r--r-- | ydb/tools/ydbd_slice/__init__.py | 97 | ||||
-rw-r--r-- | ydb/tools/ydbd_slice/baremetal/templates/block-4-2.yaml | 205 | ||||
-rw-r--r-- | ydb/tools/ydbd_slice/baremetal/templates/mirror-3-dc-3-nodes.yaml | 177 | ||||
-rw-r--r-- | ydb/tools/ydbd_slice/baremetal/templates/mirror-3-dc-9-nodes.yaml | 199 | ||||
-rw-r--r-- | ydb/tools/ydbd_slice/cluster_description.py | 2 | ||||
-rw-r--r-- | ydb/tools/ydbd_slice/handlers.py | 2 | ||||
-rw-r--r-- | ydb/tools/ydbd_slice/ya.make | 4 |
9 files changed, 710 insertions, 24 deletions
diff --git a/ydb/tools/cfg/base.py b/ydb/tools/cfg/base.py index d69726f4a8..f60e65069c 100644 --- a/ydb/tools/cfg/base.py +++ b/ydb/tools/cfg/base.py @@ -264,7 +264,7 @@ def normalize_domain(domain_name): class ClusterDetailsProvider(object): - def __init__(self, template, walle_provider, validator=None, database=None): + def __init__(self, template, walle_provider, validator=None, database=None, use_new_style_cfg=False): if not validator: validator = validation.default_validator() @@ -286,7 +286,7 @@ class ClusterDetailsProvider(object): self.__racks = {} self.__bodies = {} self.__dcs = {} - self.use_new_style_kikimr_cfg = self.__cluster_description.get("use_new_style_kikimr_cfg", False) + self.use_new_style_kikimr_cfg = self.__cluster_description.get("use_new_style_kikimr_cfg", use_new_style_cfg) self.need_generate_app_config = self.__cluster_description.get("need_generate_app_config", False) self.need_txt_files = self.__cluster_description.get("need_txt_files", True) self.use_auto_config = self.__cluster_description.get("use_auto_config", False) @@ -297,6 +297,7 @@ class ClusterDetailsProvider(object): self.table_profiles_config = self.__cluster_description.get("table_profiles_config") self.http_proxy_config = self.__cluster_description.get("http_proxy_config") self.blob_storage_config = self.__cluster_description.get("blob_storage_config") + self.channel_profile_config = self.__cluster_description.get("channel_profile_config") self.pdisk_key_config = self.__cluster_description.get("pdisk_key_config", {}) if not self.need_txt_files and not self.use_new_style_kikimr_cfg: assert "cannot remove txt files without new style kikimr cfg!" @@ -353,16 +354,25 @@ class ClusterDetailsProvider(object): def _get_datacenter(self, host_description): if host_description.get("datacenter") is not None: return str(host_description.get("datacenter")) + dc = host_description.get("location", {}).get("data_center", None) + if dc: + return str(dc) return str(self._walle.get_datacenter(host_description.get("name", host_description.get("host")))) def _get_rack(self, host_description): if host_description.get("rack") is not None: return str(host_description.get("rack")) + rack = host_description.get("location", {}).get("rack", None) + if rack: + return str(rack) return str(self._walle.get_rack(host_description.get("name", host_description.get("host")))) def _get_body(self, host_description): if host_description.get("body") is not None: return str(host_description.get("body")) + body = host_description.get("location", {}).get("body", None) + if body: + return str(body) return str(self._walle.get_body(host_description.get("name", host_description.get("host")))) def _collect_drives_info(self, host_description): @@ -555,14 +565,16 @@ class ClusterDetailsProvider(object): domain_name = domain.get("domain_name") storage_pool_kinds = { - pool_kind.get("kind"): self.__storage_pool_kind(pool_kind) for pool_kind in domain.get("storage_pool_kinds", []) + pool_kind.get("kind"): self.__storage_pool_kind(pool_kind) + for pool_kind in domain.get("storage_pool_kinds", []) } assert len(set(storage_pool_kinds.keys())) == len( storage_pool_kinds.keys() ), "required unique kind value in storage_pool_kinds items" storage_pools = [ - self.__storage_pool(storage_pool_kinds, pool_instance, domain_name) for pool_instance in domain.get("storage_pools", []) + self.__storage_pool(storage_pool_kinds, pool_instance, domain_name) + for pool_instance in domain.get("storage_pools", []) ] domains.append( @@ -604,7 +616,9 @@ class ClusterDetailsProvider(object): @property def fail_domain_type(self): - return types.FailDomainType.from_string(str(self.__cluster_description.get("fail_domain_type", DEFAULT_FAIL_DOMAIN_TYPE))) + return types.FailDomainType.from_string( + str(self.__cluster_description.get("fail_domain_type", DEFAULT_FAIL_DOMAIN_TYPE)) + ) @property def min_fail_domains(self): diff --git a/ydb/tools/cfg/static.py b/ydb/tools/cfg/static.py index 923f5e29e4..4520de5ab3 100644 --- a/ydb/tools/cfg/static.py +++ b/ydb/tools/cfg/static.py @@ -510,10 +510,14 @@ class StaticConfigGenerator(object): if 'pdisk_config' in vdisk_location: if 'expected_slot_count' in vdisk_location['pdisk_config']: vdisk_location['pdisk_config']['expected_slot_count'] = int(vdisk_location['pdisk_config']['expected_slot_count']) - if 'channel_profile_config' in normalized_config: - for profile in normalized_config['channel_profile_config']['profile']: - for channel in profile['channel']: - channel['pdisk_category'] = int(channel['pdisk_category']) + if self.__cluster_details.channel_profile_config is not None: + normalized_config["channel_profile_config"] = self.__cluster_details.channel_profile_config + else: + if 'channel_profile_config' in normalized_config: + for profile in normalized_config['channel_profile_config']['profile']: + for channel in profile['channel']: + print(channel) + channel['pdisk_category'] = int(channel['pdisk_category']) if 'system_tablets' in normalized_config: for tablets in normalized_config['system_tablets'].values(): for tablet in tablets: @@ -754,6 +758,8 @@ class StaticConfigGenerator(object): dc_enumeration = {} if not self.__cluster_details.get_service("static_groups"): + if self.__cluster_details.blob_storage_config: + return self.__proto_configs["bs.txt"] = self._read_generated_bs_config( str(self.__cluster_details.static_erasure), str(self.__cluster_details.min_fail_domains), @@ -835,13 +841,17 @@ class StaticConfigGenerator(object): if self.__cluster_details.nw_cache_file_path is not None: self.__proto_configs["bs.txt"].CacheFilePath = self.__cluster_details.nw_cache_file_path - def _read_generated_bs_config(self, static_erasure, min_fail_domains, static_pdisk_type, fail_domain_type, bs_format_config): + def _read_generated_bs_config( + self, static_erasure, min_fail_domains, static_pdisk_type, fail_domain_type, bs_format_config + ): result = config_pb2.TBlobStorageConfig() - with tempfile.NamedTemporaryFile(delete=True) as t_file: + with tempfile.NamedTemporaryFile(delete=False) as t_file: utils.write_proto_to_file(t_file.name, bs_format_config) - rx_begin, rx_end, dx_begin, dx_end = types.DistinctionLevels[types.FailDomainType.from_string(fail_domain_type)] + rx_begin, rx_end, dx_begin, dx_end = types.DistinctionLevels[ + types.FailDomainType.from_string(fail_domain_type) + ] cmd_base = [ self.__local_binary_path, diff --git a/ydb/tools/ydbd_slice/__init__.py b/ydb/tools/ydbd_slice/__init__.py index e11c1dd9e9..75a385661d 100644 --- a/ydb/tools/ydbd_slice/__init__.py +++ b/ydb/tools/ydbd_slice/__init__.py @@ -2,12 +2,14 @@ import os import sys import json import signal -import shutil import tempfile import logging import argparse import subprocess import warnings + +import library.python.resource as rs + from urllib3.exceptions import HTTPWarning from ydb.tools.cfg.walle import NopHostsInformationProvider @@ -499,7 +501,42 @@ def ssh_args(): metavar="SSH_USER", default=current_user, help="user for ssh interaction with slice. Default value is $USER " - "(which equals {user} now)".format(user=current_user), + "(which equals {user} now)".format(user=current_user), + ) + return args + + +def databases_config_path_args(): + args = argparse.ArgumentParser(add_help=False) + args.add_argument( + "--databases-config", + metavar="DATABASES_CONFIG", + default="", + required=False, + help="Path to file with databases configuration", + ) + return args + + +def cluster_type_args(): + args = argparse.ArgumentParser(add_help=False) + args.add_argument( + "--cluster-type", + metavar="CLUSTER_TYPE", + required=True, + help="Erasure type for slice", + choices=["block-4-2-8-nodes", "mirror-3-dc-3-nodes-in-memory", "mirror-3-dc-3-nodes", "mirror-3-dc-9-nodes"], + ) + return args + + +def output_file(): + args = argparse.ArgumentParser(add_help=False) + args.add_argument( + "--output-file", + metavar="OUTPUT_FILE", + required=False, + help="File to save cluster configuration", ) return args @@ -583,7 +620,7 @@ def dispatch_run(func, args, walle_provider): if clear_tmp: logger.debug("remove temp dirs '%s'", temp_dir) - shutil.rmtree(temp_dir) + # shutil.rmtree(temp_dir) def add_install_mode(modes, walle_provider): @@ -593,9 +630,17 @@ def add_install_mode(modes, walle_provider): mode = modes.add_parser( "install", conflict_handler='resolve', - parents=[direct_nodes_args(), cluster_description_args(), binaries_args(), component_args(), log_args(), ssh_args()], + parents=[ + direct_nodes_args(), + cluster_description_args(), + binaries_args(), + component_args(), + log_args(), + ssh_args(), + # databases_config_path_args(), + ], description="Full installation of the cluster from scratch. " - "You can use --hosts to specify particular hosts. But it is tricky." + "You can use --hosts to specify particular hosts. But it is tricky.", ) mode.set_defaults(handler=_run) @@ -672,7 +717,7 @@ def add_clear_mode(modes, walle_provider): "clear", parents=[direct_nodes_args(), cluster_description_args(), binaries_args(), component_args(), ssh_args()], description="Stop all kikimr instances at the nodes, format all kikimr drivers, shutdown dynamic slots. " - "And don't start nodes afrer it. " + "And don't start nodes after it. " "Use --hosts to specify particular hosts." ) mode.set_defaults(handler=_run) @@ -686,12 +731,42 @@ def add_format_mode(modes, walle_provider): "format", parents=[direct_nodes_args(), cluster_description_args(), binaries_args(), component_args(), ssh_args()], description="Stop all kikimr instances at the nodes, format all kikimr drivers at the nodes, start the instances. " - "If you call format for all cluster, you will spoil it. " - "Additional dynamic configuration will required after it. " - "If you call format for few nodes, cluster will regenerate after it. " - "Use --hosts to specify particular hosts." + "If you call format for all cluster, you will spoil it. " + "Additional dynamic configuration will required after it. " + "If you call format for few nodes, cluster will regenerate after it. " + "Use --hosts to specify particular hosts.", + ) + mode.set_defaults(handler=_run) + +def add_sample_config_mode(modes): + def _run(args): + cluster_type = args.cluster_type + template_path = "" + if cluster_type == "block-4-2-8-nodes": + template_path = "/ydbd_slice/baremetal/templates/block-4-2-8-nodes.yaml" + elif cluster_type == "mirror-3-dc-3-nodes-in-memory": + pass + elif cluster_type == "mirror-3-dc-3-nodes": + template_path = "/ydbd_slice/baremetal/templates/mirror-3-dc-3-nodes.yaml" + elif cluster_type == "mirror-3-dc-9-nodes": + template_path = "/ydbd_slice/baremetal/templates/mirror-3-dc-9-nodes.yaml" + else: + raise "Unreachable code" # TODO(shmel1k@): improve error + + f = rs.find(template_path).decode() + if args.output_file is not None and args.output_file != "": + with open(args.output_file, "w+") as f1: + f1.write(f) + else: + print(f) + + mode = modes.add_parser( + "sample-config", + parents=[cluster_type_args(), output_file()], + description="Generate default mock-configuration for provided cluster-type" ) + mode.set_defaults(handler=_run) @@ -1205,6 +1280,8 @@ def main(walle_provider=None): add_clear_mode(modes, walle_provider) add_format_mode(modes, walle_provider) add_explain_mode(modes, walle_provider) + add_sample_config_mode(modes) + add_docker_build_mode(modes) add_kube_generate_mode(modes) add_kube_install_mode(modes) diff --git a/ydb/tools/ydbd_slice/baremetal/templates/block-4-2.yaml b/ydb/tools/ydbd_slice/baremetal/templates/block-4-2.yaml new file mode 100644 index 0000000000..6487d9fadb --- /dev/null +++ b/ydb/tools/ydbd_slice/baremetal/templates/block-4-2.yaml @@ -0,0 +1,205 @@ +# YDB configuration options and their values +# are described in documentaion https://ydb.tech/en/docs/deploy/configuration/config + +# Option will be removed further +use_new_style_kikimr_cfg: true + +# static erasure is the parameter that +# describes the fault tolerance mode of the +# cluster. See docs for more details https://ydb.tech/en/docs/deploy/configuration/config#domains-blob + +host_configs: # the list of available host configurations in the cluster. +- drive: + - path: /dev/disk/by-partlabel/kikimr_nvme_01 # path of the first disk in the host configration. + type: SSD # kind of the disk: available kinds are SSD, NVME or HDD + - path: /dev/disk/by-partlabel/kikimr_nvme_02 + type: SSD + # Add more disks if required. + host_config_id: 1 # the unique id of the host config +hosts: +- host: ydb-node-zone-a-1.local # storage node DNS name, change if required. + host_config_id: 1 # numeric host configuration template identifier. + walle_location: # this parameter describes where host is located. + body: 1 # string representing a host serial number. + data_center: 'DCA' # string representing the datacenter / availability zone where the host is located. + # if cluster is deployed using mirror-3-dc fault tolerance mode, all hosts must be distributed + # across 3 datacenters. + rack: '1' # string representing a rack identifier where the host is located. + # if cluster is deployed using block-4-2 erasure, all hosts should be distrubited + # across at least 8 racks. + # For testing purpose it does not really matter, where all hosts are located. + # All hosts can be located in one datacenter and even in one rack. + # Just do not change `data_center` and `rack` options. + # NOTE(shmel1k@): If you host has label like '_4ssd', that means, that + # they will be located at `kikimr_ssd_01-04` + # If your host has label like '_2hdd', that means, that + # they will be located at `/dev/disk/by-partlabel/kikimr_hdd_03-04` (enumeration starts with 03) +- host: ydb-node-zone-a-2.local + host_config_id: 1 + location: + body: 2 + data_center: 'DCA' + rack: '2' +- host: ydb-node-zone-a-3.local + host_config_id: 1 + location: + body: 3 + data_center: 'DCA' + rack: '3' + +- host: ydb-node-zone-a-4.local + host_config_id: 1 + location: + body: 4 + data_center: 'DCA' + rack: '4' +- host: ydb-node-zone-a-5.local + host_config_id: 1 + location: + body: 5 + data_center: 'DCA' + rack: '5' +- host: ydb-node-zone-a-6.local + host_config_id: 1 + location: + body: 6 + data_center: 'DCA' + rack: '6' + +- host: ydb-node-zone-a-7.local + host_config_id: 1 + location: + body: 7 + data_center: 'DCA' + rack: '7' +- host: ydb-node-zone-a-8.local + host_config_id: 1 + location: + body: 8 + data_center: 'DCA' + rack: '8' + +# static erasure is the parameter that +# describes the fault tolerance mode of the +# cluster. See docs for more details https://ydb.tech/en/docs/deploy/configuration/config#domains-blob +static_erasure: block-4-2 +state_storage: + allow_incorrect: true + node_ids: [1, 2, 3, 4, 5, 6, 7, 8] +fail_domain_type: rack + +# NOTE(shmel1k@): this template domains_config differs from production configuration. +# It will be fixed soon, stay tuned. +domains: + # There can be only one root domain in a cluster. Domain name prefixes all scheme objects names, e.g. full name of a table table1 in database db1 + # in a cluster with domains_config.domain.name parameter set to Root would be equal to /Root/db1/table1 + - domain_name: Root + dynamic_slots: 8 + databases: + - name: "testdb" + storage_units: + - count: 1 # How many groups will be allocated for database + kind: ssd # What storage will group use + compute_units: + - count: 1 # How many dynamic nodes will database have + kind: slot + zone: any + storage_pools: + - kind: ssd + num_groups: 1 + storage_pool_kinds: + - kind: ssd + # fault tolerance mode name - none, block-4-2, or mirror-3-dc. + # See docs for more details https://ydb.tech/en/docs/deploy/configuration/config#domains-blob + erasure: block-4-2 + fail_domain_type: rack + filter_properties: + type: SSD # device type to match host_configs.drive.type + +table_service_config: + sql_version: 1 +actor_system_config: # the configuration of the actor system which descibes how cores of the instance are distributed + executor: # accross different types of workloads in the instance. + - name: System # system executor of the actor system. in this executor YDB launches system type of workloads, like system tablets + # and reads from storage. + threads: 2 # the number of threads allocated to system executor. + type: BASIC + - name: User # user executor of the actor system. In this executor YDB launches user workloads, like datashard activities, + # queries and rpc calls. + threads: 3 # the number of threads allocated to user executor. + type: BASIC + - name: Batch # user executor of the actor system. In this executor YDB launches batch operations, like scan queries, table + # compactions, background compactions. + threads: 2 # the number of threads allocated to the batch executor. + type: BASIC + - name: IO # the io executor. In this executor launches sync operations and writes logs. + threads: 1 + time_per_mailbox_micro_secs: 100 + type: IO + - name: IC # the interconnect executor which YDB uses for network communications accross different nodes of the cluster. + spin_threshold: 10 + threads: 1 # the number of threads allocated to the interconnect executor. + time_per_mailbox_micro_secs: 100 + type: BASIC + scheduler: + progress_threshold: 10000 + resolution: 256 + spin_threshold: 0 +blob_storage_config: # configuration of static blobstorage group. + # YDB uses this group to store system tablets' data, like SchemeShard + service_set: + groups: + - erasure_species: block-4-2 # fault tolerance mode name for the static group + rings: # in block-4-2 must have exactly 1 ring or availability zone. + - fail_domains: + - vdisk_locations: # fail domains of the static group describe where each vdisk of the static group should be located. + - node_id: "ydb-node-zone-a-1.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 1 + - vdisk_locations: + - node_id: "ydb-node-zone-a-2.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 2 + - vdisk_locations: + - node_id: "ydb-node-zone-a-3.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 3 + - vdisk_locations: + - node_id: "ydb-node-zone-a-4.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 4 + - vdisk_locations: + - node_id: "ydb-node-zone-a-5.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 5 + - vdisk_locations: + - node_id: "ydb-node-zone-a-6.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 6 + - vdisk_locations: + - node_id: "ydb-node-zone-a-7.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 7 + - vdisk_locations: + - node_id: "ydb-node-zone-a-8.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 8 +# TODO: migrate to channel_profile_config +profiles: + - channels: + - storage_pool_kind: ssd + - storage_pool_kind: ssd + - storage_pool_kind: ssd + +interconnect_config: + start_tcp: true + +grpc_config:
\ No newline at end of file diff --git a/ydb/tools/ydbd_slice/baremetal/templates/mirror-3-dc-3-nodes.yaml b/ydb/tools/ydbd_slice/baremetal/templates/mirror-3-dc-3-nodes.yaml new file mode 100644 index 0000000000..6c4ae8c798 --- /dev/null +++ b/ydb/tools/ydbd_slice/baremetal/templates/mirror-3-dc-3-nodes.yaml @@ -0,0 +1,177 @@ +# YDB configuration options and their values +# are described in documentaion https://ydb.tech/en/docs/deploy/configuration/config + +# Option will be removed further +use_new_style_kikimr_cfg: true + + +host_configs: # the list of available host configurations in the cluster. +# NOTE: in mirror-3-dc-3-nodes configuration we have to use hosts with more than 3 physical disks. +- drive: + - path: /dev/disk/by-partlabel/kikimr_nvme_01 # path of the first disk in the host configration. + type: SSD # kind of the disk: available kinds are SSD, NVME, ROT (HDD) + - path: /dev/disk/by-partlabel/kikimr_nvme_02 + type: SSD + - path: /dev/disk/by-partlabel/kikimr_nvme_03 + type: SSD + # Add more disks if required. + # NOTE(shmel1k@): If you host has nodeclaim-label like '_4ssd', that means, that + # they will be located at `kikimr_ssd_01-04` + # If your host has nodeclaim-label like '_2hdd', that means, that + # they will be located at `/dev/disk/by-partlabel/kikimr_hdd_03-04` (enumeration starts with 03) + host_config_id: 1 +hosts: +- host: ydb-node-zone-a.local # storage node DNS name + host_config_id: 1 # numeric host configuration template identifier + location: # this parameter describes where host is located. + body: 1 # string representing a host serial number. + data_center: 'DCA' # string representing the datacenter / availability zone where the host is located. + # if cluster is deployed using mirror-3-dc fault tolerance mode, all hosts must be distributed + # across 3 datacenters. + rack: '1' # string representing a rack identifier where the host is located. + # if cluster is deployed using block-4-2 erasure, all hosts should be distrubited + # accross at least 8 racks. +- host: ydb-node-zone-b.local + host_config_id: 1 + location: + body: 2 + data_center: 'DCB' + rack: '2' +- host: ydb-node-zone-c.local + host_config_id: 1 + location: + body: 3 + data_center: 'DCC' + rack: '3' + +# static erasure is the parameter that +# describes the fault tolerance mode of the +# cluster. See docs for more details https://ydb.tech/en/docs/deploy/configuration/config#domains-blob +static_erasure: mirror-3-dc +state_storage: + allow_incorrect: true + node_ids: [1, 2, 3] +fail_domain_type: disk + +# NOTE(shmel1k@): this template domains_config differs from production configuration. +# It will be fixed soon, stay tuned. +domains: + # There can be only one root domain in a cluster. Domain name prefixes all scheme objects names, e.g. full name of a table table1 in database db1. + # in a cluster with domains_config.domain.name parameter set to Root would be equal to /Root/db1/table1 + - domain_name: Root + dynamic_slots: 8 + databases: + - name: "testdb" + storage_units: + - count: 1 # How many groups will be allocated for database + kind: ssd # What storage will group use + compute_units: + - count: 1 # How many dynamic nodes will database have + kind: slot + zone: any + storage_pools: + - kind: ssd + num_groups: 1 + storage_pool_kinds: + - kind: ssd + # fault tolerance mode name - none, block-4-2, or mirror-3-dc. + # See docs for more details https://ydb.tech/en/docs/deploy/configuration/config#domains-blob + erasure: mirror-3-dc + fail_domain_type: disk + filter_properties: + type: SSD # device type to match host_configs.drive.type + +table_service_config: + sql_version: 1 +actor_system_config: # the configuration of the actor system which descibes how cores of the instance are distributed + executor: # accross different types of workloads in the instance. + - name: System # system executor of the actor system. in this executor YDB launches system type of workloads, like system tablets + # and reads from storage. + threads: 2 # the number of threads allocated to system executor. + type: BASIC + - name: User # user executor of the actor system. In this executor YDB launches user workloads, like datashard activities, + # queries and rpc calls. + threads: 3 # the number of threads allocated to user executor. + type: BASIC + - name: Batch # user executor of the actor system. In this executor YDB launches batch operations, like scan queries, table + # compactions, background compactions. + threads: 2 # the number of threads allocated to the batch executor. + type: BASIC + - name: IO # the io executor. In this executor launches sync operations and writes logs. + threads: 1 + time_per_mailbox_micro_secs: 100 + type: IO + - name: IC # the interconnect executor which YDB uses for network communications accross different nodes of the cluster. + spin_threshold: 10 + threads: 1 # the number of threads allocated to the interconnect executor. + time_per_mailbox_micro_secs: 100 + type: BASIC + scheduler: + progress_threshold: 10000 + resolution: 256 + spin_threshold: 0 +blob_storage_config: # configuration of static blobstorage group. + # YDB uses this group to store system tablets' data, like SchemeShard + service_set: + groups: + - erasure_species: mirror-3-dc # fault tolerance mode name for the static group + rings: # in mirror-3-dc must have exactly 3 rings or availability zones + - fail_domains: # first record: fail domains of the static group describe where each vdisk of the static group should be located. + - vdisk_locations: + - node_id: ydb-node-zone-a.local + pdisk_category: 1 # 1 - SSD, 0 - HDD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 1 + - vdisk_locations: + - node_id: ydb-node-zone-a.local + pdisk_category: 1 + path: /dev/disk/by-partlabel/kikimr_nvme_02 + pdisk_guid: 2 + - vdisk_locations: + - node_id: ydb-node-zone-a.local + pdisk_category: 1 + path: /dev/disk/by-partlabel/kikimr_nvme_03 + pdisk_guid: 3 + - fail_domains: # second ring: fail domains of the static group describe where each vdisk of the static group should be located. + - vdisk_locations: + - node_id: ydb-node-zone-b.local + pdisk_category: 1 + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 4 + - vdisk_locations: + - node_id: ydb-node-zone-b.local + pdisk_category: 1 + path: /dev/disk/by-partlabel/kikimr_nvme_02 + pdisk_guid: 5 + - vdisk_locations: + - node_id: ydb-node-zone-b.local + pdisk_category: 1 + path: /dev/disk/by-partlabel/kikimr_nvme_03 + pdisk_guid: 6 + - fail_domains: # third ring: fail domains of the static group describe where each vdisk of the static group should be located. + - vdisk_locations: + - node_id: ydb-node-zone-c.local + pdisk_category: 1 + path: /dev/disk/by-partlabel/kikimr_nvme_01 + pdisk_guid: 7 + - vdisk_locations: + - node_id: ydb-node-zone-c.local + pdisk_category: 1 + path: /dev/disk/by-partlabel/kikimr_nvme_02 + pdisk_guid: 8 + - vdisk_locations: + - node_id: ydb-node-zone-c.local + pdisk_category: 1 + path: /dev/disk/by-partlabel/kikimr_nvme_03 + pdisk_guid: 9 + +# TODO: migrate to channel_profile_config +profiles: + - channels: + - storage_pool_kind: ssd + - storage_pool_kind: ssd + - storage_pool_kind: ssd + +interconnect_config: + start_tcp: true +grpc_config:
\ No newline at end of file diff --git a/ydb/tools/ydbd_slice/baremetal/templates/mirror-3-dc-9-nodes.yaml b/ydb/tools/ydbd_slice/baremetal/templates/mirror-3-dc-9-nodes.yaml new file mode 100644 index 0000000000..24cbcec4cf --- /dev/null +++ b/ydb/tools/ydbd_slice/baremetal/templates/mirror-3-dc-9-nodes.yaml @@ -0,0 +1,199 @@ +# static erasure is the parameter that +# describes the fault tolerance mode of the +# cluster. See docs for more details https://ydb.tech/en/docs/deploy/configuration/config#domains-blob +static_erasure: mirror-3-dc +host_configs: # the list of available host configurations in the cluster. +- drive: + - path: /dev/disk/by-partlabel/kikimr_nvme_01 # path of the first disk in the host configration. + type: SSD # kind of the disk: available kinds are SSD, NVME or HDD + - path: /dev/disk/by-partlabel/kikimr_nvme_02 + type: SSD + # Add more disks if required. + # NOTE(shmel1k@): If you host has nodeclaim-label like '_4ssd', that means, that + # they will be located at `kikimr_ssd_01-04` + # If your host has nodeclaim-label like '_2hdd', that means, that + # they will be located at `/dev/disk/by-partlabel/kikimr_hdd_03-04` (enumeration starts with 03) + host_config_id: 1 # the unique id of the host config +hosts: +- host: ydb-node-zone-a-1.local # storage node DNS name, change if required. + host_config_id: 1 # numeric host configuration template identifier. + walle_location: # this parameter describes where host is located. + body: 1 # string representing a host serial number. + data_center: 'zone-a' # string representing the datacenter / availability zone where the host is located. + # if cluster is deployed using mirror-3-dc fault tolerance mode, all hosts must be distributed + # across 3 datacenters. + rack: '1' # string representing a rack identifier where the host is located. + # if cluster is deployed using block-4-2 erasure, all hosts should be distrubited + # across at least 8 racks. + # For testing purpose it does not really matter, where all hosts are located. + # All hosts can be located in one datacenter and even in one rack. + # Just do not change `data_center` and `rack` options. +- host: ydb-node-zone-a-2.local + host_config_id: 1 + walle_location: + body: 2 + data_center: 'zone-a' + rack: '2' +- host: ydb-node-zone-a-3.local + host_config_id: 1 + walle_location: + body: 3 + data_center: 'zone-a' + rack: '3' + +- host: ydb-node-zone-b-1.local + host_config_id: 1 + walle_location: + body: 4 + data_center: 'zone-b' + rack: '4' +- host: ydb-node-zone-b-2.local + host_config_id: 1 + walle_location: + body: 5 + data_center: 'zone-b' + rack: '5' +- host: ydb-node-zone-b-3.local + host_config_id: 1 + walle_location: + body: 6 + data_center: 'zone-b' + rack: '6' + +- host: ydb-node-zone-c-1.local + host_config_id: 1 + walle_location: + body: 7 + data_center: 'zone-c' + rack: '7' +- host: ydb-node-zone-c-2.local + host_config_id: 1 + walle_location: + body: 8 + data_center: 'zone-c' + rack: '8' +- host: ydb-node-zone-c-3.local + host_config_id: 1 + walle_location: + body: 9 + data_center: 'zone-c' + rack: '9' + +# NOTE(shmel1k@): this template domains_config differs from production configuration. +# It will be fixed soon, stay tuned. +domains: + # There can be only one root domain in a cluster. Domain name prefixes all scheme objects names, e.g. full name of a table table1 in database db1 + # in a cluster with domains_config.domain.name parameter set to Root would be equal to /Root/db1/table1 + - domain_name: Root + dynamic_slots: 8 + databases: + - name: "testdb" + storage_units: + - count: 1 # How many groups will be allocated for database + kind: ssd # What storage will group use + compute_units: + - count: 1 # How many dynamic nodes will database have + kind: slot + zone: any + storage_pool_kinds: + - kind: ssd + # fault tolerance mode name - none, block-4-2, or mirror-3-dc. + # See docs for more details https://ydb.tech/en/docs/deploy/configuration/config#domains-blob + erasure: mirror-3-dc + filter_properties: + type: SSD # device type to match host_configs.drive.type + state_storage: + - ring: + node: [1, 2, 3, 4, 5, 6, 7, 8, 9] + nto_select: 9 + ssid: 1 +table_service_config: + sql_version: 1 +actor_system_config: # the configuration of the actor system which descibes how cores of the instance are distributed + executor: # accross different types of workloads in the instance. + - name: System # system executor of the actor system. in this executor YDB launches system type of workloads, like system tablets + # and reads from storage. + threads: 2 # the number of threads allocated to system executor. + type: BASIC + - name: User # user executor of the actor system. In this executor YDB launches user workloads, like datashard activities, + # queries and rpc calls. + threads: 3 # the number of threads allocated to user executor. + type: BASIC + - name: Batch # user executor of the actor system. In this executor YDB launches batch operations, like scan queries, table + # compactions, background compactions. + threads: 2 # the number of threads allocated to the batch executor. + type: BASIC + - name: IO # the io executor. In this executor launches sync operations and writes logs. + threads: 1 + time_per_mailbox_micro_secs: 100 + type: IO + - name: IC # the interconnect executor which YDB uses for network communications accross different nodes of the cluster. + spin_threshold: 10 + threads: 1 # the number of threads allocated to the interconnect executor. + time_per_mailbox_micro_secs: 100 + type: BASIC + scheduler: + progress_threshold: 10000 + resolution: 256 + spin_threshold: 0 +blob_storage_config: # configuration of static blobstorage group. + # YDB uses this group to store system tablets' data, like SchemeShard + service_set: + groups: + - erasure_species: mirror-3-dc # fault tolerance mode name for the static group + rings: # in mirror-3-dc must have exactly 3 rings: one in every availability zone. + - fail_domains: + - vdisk_locations: + - node_id: "ydb-node-zone-a-1.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + - vdisk_locations: + - node_id: "ydb-node-zone-a-2.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + - vdisk_locations: + - node_id: "ydb-node-zone-a-3.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + - fail_domains: + - vdisk_locations: + - node_id: "ydb-node-zone-b-1.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + - vdisk_locations: + - node_id: "ydb-node-zone-b-2.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + - vdisk_locations: + - node_id: "ydb-node-zone-b-3.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + - fail_domains: + - vdisk_locations: + - node_id: "ydb-node-zone-c-1.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + - vdisk_locations: + - node_id: "ydb-node-zone-c-2.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 + - vdisk_locations: + - node_id: "ydb-node-zone-c-3.local" + pdisk_category: SSD + path: /dev/disk/by-partlabel/kikimr_nvme_01 +channel_profile_config: + profile: + - channel: + - erasure_species: mirror-3-dc + pdisk_category: 1 + storage_pool_kind: ssd + - erasure_species: mirror-3-dc + pdisk_category: 1 + storage_pool_kind: ssd + - erasure_species: mirror-3-dc + pdisk_category: 1 + storage_pool_kind: ssd + profile_id: 0 +interconnect_config: + start_tcp: true +grpc_config:
\ No newline at end of file diff --git a/ydb/tools/ydbd_slice/cluster_description.py b/ydb/tools/ydbd_slice/cluster_description.py index 4d1bac8400..39e9973cfc 100644 --- a/ydb/tools/ydbd_slice/cluster_description.py +++ b/ydb/tools/ydbd_slice/cluster_description.py @@ -35,7 +35,7 @@ class ClusterDetails(ClusterDetailsProvider): self._cluster_description_file = cluster_description_path self._walle_provider = walle_provider - super(ClusterDetails, self).__init__(self.template, self._walle_provider) + super(ClusterDetails, self).__init__(self.template, self._walle_provider, use_new_style_cfg=True) @property def template(self): diff --git a/ydb/tools/ydbd_slice/handlers.py b/ydb/tools/ydbd_slice/handlers.py index 9d14552f05..7dbb324f30 100644 --- a/ydb/tools/ydbd_slice/handlers.py +++ b/ydb/tools/ydbd_slice/handlers.py @@ -16,7 +16,7 @@ class CalledProcessError(subprocess.CalledProcessError): return "Command '%s' returned non-zero exit status %d and output was '%s'" % ( self.cmd, self.returncode, - self.output + self.output, ) diff --git a/ydb/tools/ydbd_slice/ya.make b/ydb/tools/ydbd_slice/ya.make index e6d7b63cf3..b8ca7b162b 100644 --- a/ydb/tools/ydbd_slice/ya.make +++ b/ydb/tools/ydbd_slice/ya.make @@ -39,6 +39,10 @@ RESOURCE( kube/templates/8-node-block-4-2/storage.yaml /ydbd_slice/templates/8-node-block-4-2/storage.yaml kube/templates/legacy-cms-config-items/table-profile.txt /ydbd_slice/templates/legacy-cms-config-items/table-profile.txt kube/templates/legacy-cms-config-items/unified-agent.txt /ydbd_slice/templates/legacy-cms-config-items/unified-agent.txt + + baremetal/templates/block-4-2.yaml /ydbd_slice/baremetal/templates/block-4-2.yaml + baremetal/templates/mirror-3-dc-9-nodes.yaml /ydbd_slice/baremetal/templates/mirror-3-dc-9-nodes.yaml + baremetal/templates/mirror-3-dc-3-nodes.yaml /ydbd_slice/baremetal/templates/mirror-3-dc-3-nodes.yaml ) END() |