diff options
author | artem-trof <artem-trof@yandex-team.com> | 2023-02-08 22:59:57 +0300 |
---|---|---|
committer | artem-trof <artem-trof@yandex-team.com> | 2023-02-08 22:59:57 +0300 |
commit | 10cb6c3a57347bb0119750e987fba84c0d425eba (patch) | |
tree | 1f79b060bacc8c346d43fb6d87911068e9809a7b | |
parent | 108a09f8e8729a86f9ebe0dd891e5dfc36c66eb6 (diff) | |
download | ydb-10cb6c3a57347bb0119750e987fba84c0d425eba.tar.gz |
PR from branch users/artem-trof/ydbops-6676-setup-monitoring
ydbops-6676: add separate helmfile for grafana dashboards
ydbops-6676: move chart contents to right folder
ydbops-6676: rename chart
ydbops-6676: add grafana-dashboards helm chart
ydbops-6676: fix metric relabeling & chart values
ydbops-6676: rename release
ydbops-6676: disable node selector
ydbops-6676: move to ydb-testing
ydbops-6676: fix monitoring port & counters
ydbops-6676: add helmfile & values for monitoring
19 files changed, 362 insertions, 2 deletions
diff --git a/ydb/deploy/grafana_dashboards/README.md b/ydb/deploy/grafana_dashboards/README.md new file mode 100644 index 0000000000..03361dcb68 --- /dev/null +++ b/ydb/deploy/grafana_dashboards/README.md @@ -0,0 +1,3 @@ +# Grafana dashboards + +Grafana dashboards moved [here](../helm/ydb-prometheus/dashboards/) diff --git a/ydb/deploy/grafana_dashboards/local_upload_dashboards.sh b/ydb/deploy/grafana_dashboards/local_upload_dashboards.sh index 5b052e2ab2..a904250810 100755 --- a/ydb/deploy/grafana_dashboards/local_upload_dashboards.sh +++ b/ydb/deploy/grafana_dashboards/local_upload_dashboards.sh @@ -5,6 +5,5 @@ GRAFANA_API="http://admin:admin@localhost:3000/api" curl -X POST -H "Content-Type: application/json" ${GRAFANA_API}/folders --data-ascii '{ "uid": "ydb", "title": "YDB" }' for DASH in cpu dboverview dbstatus actors grpc queryengine txproxy datashard; do - cat ${DASH}.json | jq '{ folderUid: "ydb", dashboard: . }' | curl -X POST -H "Content-Type: application/json" ${GRAFANA_API}/dashboards/db -d @- + cat ../helm/ydb-prometheus/dashboards/${DASH}.json | jq '{ folderUid: "ydb", dashboard: . }' | curl -X POST -H "Content-Type: application/json" ${GRAFANA_API}/dashboards/db -d @- done - diff --git a/ydb/deploy/helm/ydb-prometheus/.helmignore b/ydb/deploy/helm/ydb-prometheus/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/ydb/deploy/helm/ydb-prometheus/Chart.lock b/ydb/deploy/helm/ydb-prometheus/Chart.lock new file mode 100644 index 0000000000..580754a450 --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: kube-prometheus-stack + repository: https://prometheus-community.github.io/helm-charts + version: 44.3.1 +digest: sha256:a47a360d87953897ca25b27a7eb67767773d324b4e5c58f3cd7af4aa9da7a375 +generated: "2023-02-08T12:54:45.749905+03:00" diff --git a/ydb/deploy/helm/ydb-prometheus/Chart.yaml b/ydb/deploy/helm/ydb-prometheus/Chart.yaml new file mode 100644 index 0000000000..82e93e473e --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +description: Chart with resources for monitoring YDB clusters with Prometheus and Grafana. +icon: https://raw.githubusercontent.com/ydb-platform/ydb/main/ydb/docs/_assets/logo.svg +type: application +maintainers: + - name: YDB + email: info@ydb.tech +name: ydb-prometheus +sources: +- https://github.com/ydb-platform/ydb +version: 0.1.0 +appVersion: "0.1.0" +keywords: + - "prometheus" + - "kube-prometheus" + - "monitoring" + - "ydb" + +dependencies: + - name: kube-prometheus-stack + version: 44.3.* + repository: https://prometheus-community.github.io/helm-charts + condition: kube-prometheus-stack.enabled
\ No newline at end of file diff --git a/ydb/deploy/helm/ydb-prometheus/README.md b/ydb/deploy/helm/ydb-prometheus/README.md new file mode 100644 index 0000000000..25eccdf80f --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/README.md @@ -0,0 +1,49 @@ +# YDB Monitoring Prometheus + +Chart with resources for monitoring YDB clusters with Prometheus and Grafana. + +This chart depends on [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/8a4f9ea1fb0fe32f3169cbfbd9f0fc517f4aaa10/charts/kube-prometheus-stack). + +This chart installs following resources: + +* Prometheus [additional scrape configs](https://github.com/prometheus-community/helm-charts/blob/8a4f9ea1fb0fe32f3169cbfbd9f0fc517f4aaa10/charts/kube-prometheus-stack/values.yaml#L3031) for external cluster +* Prometheus service-monitor object for internal cluster +* Configmaps with Grafana dashboards + +## Monitoring External YDB Cluster (bare metal or virtual machines) + +1. Set following in values.yaml: + +```yaml +kube-prometheus-stack: + prometheus: + prometheusSpec: + additionalScrapeConfigsSecret: + enabled: true + name: ydb-prometheus-additional-scrape-configs + key: additional-scrape-configs.yaml +``` + +2. Secret (ydb-prometheus-additional-scrape-configs) will be generated with chart installation and referenced in prometheus CRD. + +3. Set following in values.yaml to cluster monitor: + +```yaml + +ydb: + clusters: + - cluster: <cluster-name> + type: external + ports: + static: <static nodes port> + dynamic: + - <dynamic nodes ports, one per database (tenant)> + hosts: + - <ydb host> +``` + +4. Install chart with `helm` + +## Monitoring Internal YDB Cluster (deployed with ydb-operator) + +Work in progress diff --git a/ydb/deploy/grafana_dashboards/actors.json b/ydb/deploy/helm/ydb-prometheus/dashboards/actors.json index e563e575c9..e563e575c9 100644 --- a/ydb/deploy/grafana_dashboards/actors.json +++ b/ydb/deploy/helm/ydb-prometheus/dashboards/actors.json diff --git a/ydb/deploy/grafana_dashboards/cpu.json b/ydb/deploy/helm/ydb-prometheus/dashboards/cpu.json index 8848c45014..8848c45014 100644 --- a/ydb/deploy/grafana_dashboards/cpu.json +++ b/ydb/deploy/helm/ydb-prometheus/dashboards/cpu.json diff --git a/ydb/deploy/grafana_dashboards/datashard.json b/ydb/deploy/helm/ydb-prometheus/dashboards/datashard.json index 65b9711bc2..65b9711bc2 100644 --- a/ydb/deploy/grafana_dashboards/datashard.json +++ b/ydb/deploy/helm/ydb-prometheus/dashboards/datashard.json diff --git a/ydb/deploy/grafana_dashboards/dboverview.json b/ydb/deploy/helm/ydb-prometheus/dashboards/dboverview.json index 263942394c..263942394c 100644 --- a/ydb/deploy/grafana_dashboards/dboverview.json +++ b/ydb/deploy/helm/ydb-prometheus/dashboards/dboverview.json diff --git a/ydb/deploy/grafana_dashboards/dbstatus.json b/ydb/deploy/helm/ydb-prometheus/dashboards/dbstatus.json index ea420fa151..ea420fa151 100644 --- a/ydb/deploy/grafana_dashboards/dbstatus.json +++ b/ydb/deploy/helm/ydb-prometheus/dashboards/dbstatus.json diff --git a/ydb/deploy/grafana_dashboards/grpc.json b/ydb/deploy/helm/ydb-prometheus/dashboards/grpc.json index 0c9dec4434..0c9dec4434 100644 --- a/ydb/deploy/grafana_dashboards/grpc.json +++ b/ydb/deploy/helm/ydb-prometheus/dashboards/grpc.json diff --git a/ydb/deploy/grafana_dashboards/queryengine.json b/ydb/deploy/helm/ydb-prometheus/dashboards/queryengine.json index ae862b7a04..ae862b7a04 100644 --- a/ydb/deploy/grafana_dashboards/queryengine.json +++ b/ydb/deploy/helm/ydb-prometheus/dashboards/queryengine.json diff --git a/ydb/deploy/grafana_dashboards/txproxy.json b/ydb/deploy/helm/ydb-prometheus/dashboards/txproxy.json index 51d4666a61..51d4666a61 100644 --- a/ydb/deploy/grafana_dashboards/txproxy.json +++ b/ydb/deploy/helm/ydb-prometheus/dashboards/txproxy.json diff --git a/ydb/deploy/helm/ydb-prometheus/templates/_helpers.tpl b/ydb/deploy/helm/ydb-prometheus/templates/_helpers.tpl new file mode 100644 index 0000000000..e2b4a0424c --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/templates/_helpers.tpl @@ -0,0 +1,119 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "ydb-prometheus.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "ydb-prometheus.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ydb-prometheus.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "ydb-prometheus.labels" -}} +helm.sh/chart: {{ include "ydb-prometheus.chart" . }} +app.kubernetes.io/name: {{ include "ydb-prometheus.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{- define "ydb-prometheus.labels.grafana" -}} +{{- $context := . -}} +{{ $context.Values.grafanaDashboards.markerLabel.key }}: {{ $context.Values.grafanaDashboards.markerLabel.value | quote }} +{{- end -}} + +{{- define "ydb-prometheus.externalCluster.config" -}} +{{- $context := . -}} +{{- $counters := .Values.ydb.counters -}} +{{- $clusters := .Values.ydb.clusters -}} +{{- $result := list -}} +{{- range $cluster := $clusters }} + {{- if eq $cluster.type "external" }} + + {{- range $counter := $counters }} + {{- $name := "staticNode" }} + {{- $port := $cluster.ports.static }} + {{- $type := default "all" $counter.type }} + {{- if or (eq $type "static") (eq $type "all") }} + {{- $config := include "ydb-prometheus.externalCluster.targetCounter" (tuple $cluster $counter $name $port) | fromYaml }} + {{- $result = append $result $config }} + {{- end }} + {{- end }} + + {{- range $port := $cluster.ports.dynamic }} + {{- range $counter := $counters }} + {{- $name := printf "dynamicNode/%d" ($port | int) }} + {{- $type := default "all" $counter.type }} + {{- if or (eq $type "dynamic") (eq $type "all") }} + {{- $config := include "ydb-prometheus.externalCluster.targetCounter" (tuple $cluster $counter $name $port) | fromYaml }} + {{- $result = append $result $config }} + {{- end }} + {{- end }} + {{- end }} + +{{- end }} +{{- end }} + +{{- $result | toYaml }} +{{- end -}} + +{{- define "ydb-prometheus.externalCluster.targetCounter" }} +{{- $context := . }} +{{- $cluster := index $context 0 }} +{{- $counter := index $context 1 }} +{{- $name := index $context 2 }} +{{- $port := index $context 3 }} +{{- $metricsPath := (printf "/counters/counters=%s/prometheus" $counter.counter) }} +{{- if $counter.metricsPath }} +{{- $metricsPath = $counter.metricsPath }} +{{- end }} +job_name: {{ printf "ydb/%s/%s/counter/%s" $cluster.cluster $name $counter.counter | quote }} +metrics_path: {{ $metricsPath | quote }} +relabel_configs: +- source_labels: + - __address__ + target_label: instance + regex: '([^:]+)(:[0-9]+)?' + replacement: '${1}' +metric_relabel_configs: +- source_labels: + - __name__ + target_label: __name__ + regex: (.*) + replacement: {{ $counter.counter }}_$1 +static_configs: +- targets: + {{- range $host := $cluster.hosts }} + {{- printf "- %s:%d" $host ($port | int) | nindent 4 }} + {{- end }} + labels: + project: {{ $cluster.cluster }} + counter: {{ $counter.counter | quote }} + container: ydb-dynamic +{{- end }}
\ No newline at end of file diff --git a/ydb/deploy/helm/ydb-prometheus/templates/configmaps-grafana.yaml b/ydb/deploy/helm/ydb-prometheus/templates/configmaps-grafana.yaml new file mode 100644 index 0000000000..93db46b7d9 --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/templates/configmaps-grafana.yaml @@ -0,0 +1,20 @@ +{{- $namespace := .Release.Namespace -}} +{{- $files := .Files.Glob "dashboards/*.json" }} +{{- if and .Values.grafanaDashboards.enabled $files }} +apiVersion: v1 +kind: ConfigMapList +items: +{{- range $path, $fileContents := $files }} +{{- $dashboardName := regexReplaceAll "(^.*/)(.*)\\.json$" $path "${2}" }} +- apiVersion: v1 + kind: ConfigMap + metadata: + name: {{ printf "%s-%s" (include "ydb-prometheus.fullname" $) $dashboardName | trunc 63 | trimSuffix "-" }} + namespace: {{ $namespace }} + labels: +{{- include "ydb-prometheus.labels" $ | nindent 6 }} +{{- include "ydb-prometheus.labels.grafana" $ | nindent 6 }} + data: + {{ $dashboardName }}.json: {{ $.Files.Get $path | toJson }} +{{- end }} +{{- end }} diff --git a/ydb/deploy/helm/ydb-prometheus/templates/secret-prometheus-static-targets.yaml b/ydb/deploy/helm/ydb-prometheus/templates/secret-prometheus-static-targets.yaml new file mode 100644 index 0000000000..9f1081d18c --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/templates/secret-prometheus-static-targets.yaml @@ -0,0 +1,19 @@ +{{- if .Values.prometheusConfig.enabled -}} +{{- $namespace := .Release.Namespace -}} +{{- $name := .Values.prometheusConfig.additionalScrapeConfigs.name -}} + {{- if not $name -}} + {{- $name = printf "%s-additional-scrape-configs" (include "ydb-prometheus.fullname" $) -}} + {{- end -}} + {{- $data := include "ydb-prometheus.externalCluster.config" $ -}} + {{- if $data -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ $name | trunc 63 | trimSuffix "-" }} + namespace: {{ $namespace }} + labels: +{{- include "ydb-prometheus.labels" $ | nindent 4 }} +data: + {{ .Values.prometheusConfig.additionalScrapeConfigs.key }}: {{ $data | b64enc | quote }} + {{- end -}} +{{- end -}}
\ No newline at end of file diff --git a/ydb/deploy/helm/ydb-prometheus/templates/service-monitor-cluster.yaml b/ydb/deploy/helm/ydb-prometheus/templates/service-monitor-cluster.yaml new file mode 100644 index 0000000000..dc805a4865 --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/templates/service-monitor-cluster.yaml @@ -0,0 +1,2 @@ +{{- if .Values.prometheusConfig.enabled -}} +{{- end -}}
\ No newline at end of file diff --git a/ydb/deploy/helm/ydb-prometheus/values.yaml b/ydb/deploy/helm/ydb-prometheus/values.yaml new file mode 100644 index 0000000000..f7ee1d284a --- /dev/null +++ b/ydb/deploy/helm/ydb-prometheus/values.yaml @@ -0,0 +1,97 @@ +# Allows to override chart & chart resources name +nameOverride: "" +fullnameOverride: "" + +# Enable kube-prometheus-stack chart +kube-prometheus-stack: + enabled: true + + # Install prometheus crd object + prometheus: + enabled: true + + # Enable only for monitoring ydb external clusters + # prometheusSpec: + # additionalScrapeConfigsSecret: + # enabled: true + # name: ydb-prometheus-additional-scrape-configs + # key: additional-scrape-configs.yaml + +# Grafana dashboards +grafanaDashboards: + # Enable dashboards generation + enabled: true + + # Label which will be used to mark configmaps with grafana dashboards + markerLabel: + key: grafana_dashboard + value: "1" + +# Prometheus server related configuration +prometheusConfig: + # Enable service monitors & scrape configs generation + enabled: true + + # Additional scrape configs that referenced in prometheus.spec + # + additionalScrapeConfigs: + # Generated secret key + key: additional-scrape-configs.yaml + # Generated secret name, if empty chart name will be used + name: "" + +# YDB Monitoring related configuration +ydb: + # Predefined monitoring counters to scrape + counters: + # Counter name + # counter: <counter name> + # + # Counter type + # If unspecified, will be used for both targets + # type: <static|dynamic> + # + # Metrics path + # If unspecified, will be used /counters/counters=%s/prometheus + # metricsPath: <url to prometheus metrics> + # + - counter: auth + - counter: compile + - counter: dsproxy + - counter: dsproxy_percentile + - counter: dsproxy_queue + - counter: dsproxynode + - counter: grpc + - counter: interconnect + - counter: kqp + - counter: netclassifier + - counter: pdisks + type: static + - counter: pqproxy + - counter: proxy + - counter: quoter_service + - counter: tablets + - counter: vdisks + type: static + - counter: utils + - counter: ydb + metricsPath: /counters/counters=ydb/name_label=name/prometheus + + # Cluster which will be monitored + clusters: [] + # Cluster name + # cluster: <cluster name> + # + # Cluster type + # For external clusters, additional scrape config secret will be generated + # For internal clusters, service monitor with appropriate configuration will be generated + # type: <external|internal> + # + # External cluster targets, used only if type == external + # external: + # Monitoring ports for static and dynamic nodes + # ports: + # static: <static (storage) node port> + # dynamic: <dynamic (tenants) node ports> + # Monitoring hosts + # hosts: <cluster targets> |