diff options
author | arcadia-devtools <arcadia-devtools@yandex-team.ru> | 2022-02-10 16:48:02 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:02 +0300 |
commit | a61e8c559b6b690a019253067224d595e3d642f1 (patch) | |
tree | b222e5ac2e2e98872661c51ccceee5da0d291e13 | |
parent | 659131349be7796c633c453b4d8b4fa20b8c6ee9 (diff) | |
download | ydb-a61e8c559b6b690a019253067224d595e3d642f1.tar.gz |
Restoring authorship annotation for <arcadia-devtools@yandex-team.ru>. Commit 2 of 2.
273 files changed, 27240 insertions, 27240 deletions
diff --git a/build/external_resources/ymake/ya.make.inc b/build/external_resources/ymake/ya.make.inc index 9c73abab34..b88f36c4db 100644 --- a/build/external_resources/ymake/ya.make.inc +++ b/build/external_resources/ymake/ya.make.inc @@ -1,13 +1,13 @@ IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64 OR HOST_OS_DARWIN AND HOST_ARCH_ARM64 OR HOST_OS_LINUX AND HOST_ARCH_PPC64LE OR HOST_OS_LINUX AND HOST_ARCH_X86_64 OR HOST_OS_WINDOWS AND HOST_ARCH_X86_64) -ELSE() +ELSE() MESSAGE(FATAL_ERROR Unsupported host platform for YMAKE) ENDIF() DECLARE_EXTERNAL_HOST_RESOURCES_BUNDLE( YMAKE - sbr:2763560807 FOR DARWIN - sbr:2763561138 FOR DARWIN-ARM64 - sbr:2763560653 FOR LINUX-PPC64LE - sbr:2763560979 FOR LINUX - sbr:2763560492 FOR WIN32 + sbr:2763560807 FOR DARWIN + sbr:2763561138 FOR DARWIN-ARM64 + sbr:2763560653 FOR LINUX-PPC64LE + sbr:2763560979 FOR LINUX + sbr:2763560492 FOR WIN32 ) diff --git a/build/platform/test_tool/host.ya.make.inc b/build/platform/test_tool/host.ya.make.inc index 300a4df446..c25f2b1326 100644 --- a/build/platform/test_tool/host.ya.make.inc +++ b/build/platform/test_tool/host.ya.make.inc @@ -1,16 +1,16 @@ -IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984950) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990673) -ELSEIF (HOST_OS_DARWIN AND HOST_ARCH_ARM64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984688) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990185) -ELSEIF (HOST_OS_LINUX AND HOST_ARCH_PPC64LE) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984559) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990014) -ELSEIF (HOST_OS_LINUX AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764985330) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990852) -ELSEIF (HOST_OS_WINDOWS AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984404) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764989842) +IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984950) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990673) +ELSEIF (HOST_OS_DARWIN AND HOST_ARCH_ARM64) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984688) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990185) +ELSEIF (HOST_OS_LINUX AND HOST_ARCH_PPC64LE) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984559) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990014) +ELSEIF (HOST_OS_LINUX AND HOST_ARCH_X86_64) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764985330) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990852) +ELSEIF (HOST_OS_WINDOWS AND HOST_ARCH_X86_64) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984404) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764989842) ENDIF() diff --git a/build/plugins/java.py b/build/plugins/java.py index da751537d1..16fc126734 100644 --- a/build/plugins/java.py +++ b/build/plugins/java.py @@ -40,18 +40,18 @@ def on_run_jbuild_program(unit, *args): flat, kv = common.sort_by_keywords({'IN': -1, 'IN_DIR': -1, 'OUT': -1, 'OUT_DIR': -1, 'CWD': 1, 'CLASSPATH': -1, 'CP_USE_COMMAND_FILE': 1, 'ADD_SRCS_TO_CLASSPATH': 0}, args) depends = kv.get('CLASSPATH', []) + kv.get('JAR', []) - fake_out = None + fake_out = None if depends: # XXX: hack to force ymake to build dependencies - fake_out = "fake.out.{}".format(hash(tuple(args))) - unit.on_run_java(['TOOL'] + depends + ["OUT", fake_out]) + fake_out = "fake.out.{}".format(hash(tuple(args))) + unit.on_run_java(['TOOL'] + depends + ["OUT", fake_out]) if not kv.get('CP_USE_COMMAND_FILE'): args += ['CP_USE_COMMAND_FILE', unit.get(['JAVA_PROGRAM_CP_USE_COMMAND_FILE']) or 'yes'] - if fake_out is not None: - args += ['FAKE_OUT', fake_out] - + if fake_out is not None: + args += ['FAKE_OUT', fake_out] + prev = unit.get(['RUN_JAVA_PROGRAM_VALUE']) or '' new_val = (prev + ' ' + base64.b64encode(json.dumps(list(args), encoding='utf-8'))).strip() unit.set(['RUN_JAVA_PROGRAM_VALUE', new_val]) diff --git a/build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource b/build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource index 9ae9c68baa..8edf2e5d4a 100644 --- a/build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource +++ b/build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource @@ -1,9 +1,9 @@ IF (OS_DARWIN AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 2433625017) + SET(SANDBOX_RESOURCE_ID 2433625017) ELSEIF (OS_LINUX AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 2433625425) + SET(SANDBOX_RESOURCE_ID 2433625425) ELSEIF (OS_WINDOWS AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 2433624379) + SET(SANDBOX_RESOURCE_ID 2433624379) ELSE() SET(SANDBOX_RESOURCE_ID) ENDIF() diff --git a/build/prebuilt/contrib/tools/protoc_std/ya.make.resource b/build/prebuilt/contrib/tools/protoc_std/ya.make.resource index 650f87a0cc..738c7da7ec 100644 --- a/build/prebuilt/contrib/tools/protoc_std/ya.make.resource +++ b/build/prebuilt/contrib/tools/protoc_std/ya.make.resource @@ -1,9 +1,9 @@ IF (OS_DARWIN AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 1714771857) + SET(SANDBOX_RESOURCE_ID 1714771857) ELSEIF (OS_LINUX AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 1714772118) + SET(SANDBOX_RESOURCE_ID 1714772118) ELSEIF (OS_WINDOWS AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 1714771351) + SET(SANDBOX_RESOURCE_ID 1714771351) ELSE() SET(SANDBOX_RESOURCE_ID) ENDIF() diff --git a/build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource b/build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource index 34148b1537..05fcd0cbd5 100644 --- a/build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource +++ b/build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource @@ -1,9 +1,9 @@ IF (OS_DARWIN AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 2297961019) + SET(SANDBOX_RESOURCE_ID 2297961019) ELSEIF (OS_LINUX AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 2297961241) + SET(SANDBOX_RESOURCE_ID 2297961241) ELSEIF (OS_WINDOWS AND ARCH_X86_64) - SET(SANDBOX_RESOURCE_ID 2297960716) + SET(SANDBOX_RESOURCE_ID 2297960716) ELSE() SET(SANDBOX_RESOURCE_ID) ENDIF() diff --git a/build/rules/flake8/migrations.yaml b/build/rules/flake8/migrations.yaml index 1fbca469ad..6e54bf2e62 100644 --- a/build/rules/flake8/migrations.yaml +++ b/build/rules/flake8/migrations.yaml @@ -1,464 +1,464 @@ migrations: - W605: - ignore: - - W605 - prefixes: - - addappter/backend/testing - - addappter/backend/testing/fixtures/configuration/ios - - adfox/amacs/tests/functional/tests_amacs/bugs/medium - - adfox/amacs/tests/functional/tests_amacs/dynamic_monetization/v2 - - adfox/amacs/tests/functional/tests_amacs/targeting/targeting_logic_tree_puids - - adfox/amacs/tests/functional/utils - - adfox/amacs/tests/functional/utils/db - - adfox/amacs/tests/functional/utils/tools - - ads/bsyeti/servants/bot - - ads/libs/py_autobudget - - ads/libs/py_bid_correction - - ads/libs/py_cliutils - - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/mapreducelib - - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs - - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabtools - - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabutils - - ads/libs/py_ml_factors/factor - - ads/libs/py_ml_factors/matrixnet - - ads/libs/py_test_mapreduce - - ads/ml_engine/learn/result/flow_generate_extended_fstr - - ads/ml_engine/learn/result/local_extended_fstr - - ads/ml_engine/learn/result/local_extended_fstr/lib - - ads/ml_engine/lib - - ads/ml_monitoring/alerts/ut - - ads/nirvana/automl/lib/batch_processor - - ads/nirvana/difacto/online_loss_processor - - ads/nirvana/graph_retrier/lib - - ads/nirvana/online_learning/move_dmlc_dumps/lib - - ads/nirvana/online_learning/pipeline_launcher/task_utils/lib - - ads/nirvana/sequential_learning - - ads/nirvana/tools/apc_check - - ads/quality/apc/gmg/generate_workflow - - ads/quality/apc/gmg/make_lm_pool - - ads/quality/apc/gmg/make_lm_pool/lib - - ads/quality/apc/prgmgv3/workflow_constructor/lib - - ads/quality/bid_correction/lib - - ads/quality/dssm/lib - - ads/quality/dssm/prgmg/make_pool - - ads/quality/dssm/search/201708/make_pool - - ads/quality/dssm/synonyms/search/tools/join_fields - - ads/quality/max_positions - - ads/quality/search_lm_conv/lib - - ads/quality/tools/adj_calc - - ads/sandbox_scripts/bmcategory_queryage_coeffs/lib - - ads/sandbox_scripts/build_tag_rules_table - - ads/sandbox_scripts/clean_mapreduce/ut - - ads/sandbox_scripts/join_yabar - - ads/tools/mranalyze - - ads/tools/yt_operations_analyzer - - ads/watchman/contrib/apispec-patched - - ads/watchman/contrib/apispec-patched/apispec - - ads/watchman/experiments/lib - - advq/offline_phits/monitoring - - alice/boltalka/generative/tfnn/preprocess - - alice/boltalka/generative/training/data/nn/filtered_twitter - - alice/boltalka/generative/training/data/nn/util - - alice/boltalka/generative/training/data/nn/util/dict - - alice/boltalka/tools/dssm_preprocessing/preprocessing/lib - - alice/nlu/py_libs/tokenizer/ut/py2 - - alice/nlu/py_libs/tokenizer/ut/py3 - - alice/nlu/tools/paraphrase_finder - - alice/paskills/nirvana_inflector - - alice/paskills/recipe_utils/lib - - alice/tests/difftest/request_miner - - antirobot/tools/daily_routine/lib - - april/web/bas/bm - - april/web/bas/bm/utils - - april/web/bas/collector - - aurora/aurora/core - - aurora/aurora/core/services - - aurora/aurora/core/toloka - - aurora/xpath/api/utils - - aurora/xpath/applier - - balancer/test/functional/admin/admin - - balancer/test/functional/regexp_host - - balancer/test/util - - balancer/test/util/dnsfake - - billing/apikeys/apikeys - - billing/apikeys/apikeys/butils_port - - billing/apikeys/apikeys/butils_port/application - - billing/dcs/dcs - - billing/dcs/dcs/temporary/butils - - billing/dcs/dcs/temporary/butils/application - - cloud/bootstrap/db/src/admin - - cloud/iam/codegen/python/codegen - - cloud/netinfra/rknfilter/yc_rkn_s3tools - - commerce/adv_backend - - commerce/adv_backend/backend/management/commands/migrator - - commerce/adv_backend/backend/validators - - contrib/nginx/tests/tap - - cv/imageproc/ocr/tools/nirvana/blocks_dataset/extract_pdf_boxes - - cv/imageproc/ocr/tools/nirvana/blocks_dataset/g_blocks - - cv/imageproc/ocr/tools/nirvana/blocks_dataset/rotate_pdf - - cv/imageproc/ocr/tools/nirvana/confidence_factors/format_utils - - cv/imageproc/ocr/tools/nirvana/recaptcha/generate_captcha_images - - cv/imageproc/ocr/tools/statistic/auto_tests/ocr_test_lib - - cv/imageproc/ocr/tools/statistic/ocr_intent_statistic - - cv/imageproc/ocr/tools/statistic/rectify_evaluation/document_edges_statistic - - cv/imageproc/ocr/tools/statistic/rectify_evaluation/ocr_statistic - - cv/imageproc/ocr/tools/statistic/rectify_evaluation/yt_runner - - cv/imageproc/ocr/tools/toloka_labeling/add_main_areas_to_labelling - - cv/imageproc/ocr/tools/toloka_labeling/add_ocr_line_recognition - - cv/imageproc/ocr/tools/toloka_labeling/convert - - cv/imageproc/ocr/tools/toloka_labeling/convert_toloka_labeling_to_asessors_labeling - - cv/imageproc/ocr/tools/toloka_labeling/get_labeling - - cv/imageproc/ocr/tools/toloka_labeling/labeling - - cv/imageproc/ocr/tools/toloka_labeling/labeling_assessors - - cv/imageproc/ocr/tools/toloka_labeling/labeling_captcha - - cv/imageproc/ocr/tools/toloka_labeling/paint_good - - cv/imageproc/ocr/tools/toloka_labeling/studier/generate_blocks_images - - cv/imageproc/ocr/tools/toloka_labeling/studier/get_queries_info - - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/convert_box_labeling - - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/extact_ocr_with_gt_blocks - - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_bleu_statistic - - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_etalon_ocr_result - - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_translate_orig_from_ocr_labelling - - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/match_blocks - - cv/imgclassifiers/framework/gpu_utilization - - cv/imgclassifiers/framework/nirvana/runners/train/statistics_parser - - datacloud/score_api/validators - - devtools/adept - - devtools/distbuild/deploy/servants - - devtools/dummy_arcadia/test/test_cache_invalidation - - devtools/import_contrib/projects/phonenumber - - devtools/qafw/qyp - - devtools/qafw/rtc_deploy/lib - - devtools/qafw/selenium/lib - - devtools/signer/signer/utils - - devtools/unitybot - - devtools/unitybot/responsibility - - devtools/ya/test - - devtools/ya/test/tests/lib - - dict/moria/nirvana/context/begemot_thesaurus_rule - - dict/moria/nirvana/lib/util - - dict/mt/alice/scenarios/prepare_data - - dict/mt/analytics/metrics/quality/mt/stupids/lib - - dict/mt/analytics/sentence_breaking/toloka_binary/lib - - dict/mt/eval/eval_viewer/lib/spec/mt - - dict/mt/eval/lib/metrics/bleu - - dict/mt/g2p/rule_based - - dict/mt/make/libs/common - - dict/mt/make/libs/eval - - dict/mt/make/libs/tfnn - - dict/mt/make/libs/translate_mtd - - dict/mt/make/modules/corpus_generation/common - - dict/mt/make/tools/lm_diff - - dict/mt/make/tools/make_cap_model/score_caps - - dict/mt/make/tools/opus_merge - - dict/mt/make/tools/tfnn/convert_mtd_to_tfnn - - dict/mt/make/tools/yt_transform/tests - - dict/mt/mtdict/make/definitions/modules/extracts/remove_etym - - dict/mt/scripts/testsets/crowdsource/globalvoices/find_parallel_pages/extract_page_info - - dict/mt/tools/log_converter/rem_tool - - dict/mt/tools/tmx_to_plain_text - - dict/ontodb/cardsparser/lib - - dict/ontodb/daily/merge_cache - - dict/ontodb/daily/validator - - dict/ontodb/images/lib - - dict/ontodb/isa/subtitle/add_subtitles - - dict/ontodb/lists/toloka/parsing - - dict/ontodb/onto_lib/card_to_url - - dict/ontodb/onto_lib/card_utils - - dict/ontodb/onto_lib/sources/discogs - - dict/ontodb/onto_lib/sources/fantlab - - dict/ontodb/onto_lib/sources/freebase - - dict/ontodb/onto_lib/sources/freebase/mediators - - dict/ontodb/onto_lib/sources/google_play - - dict/ontodb/onto_lib/sources/itunes - - dict/ontodb/onto_lib/sources/kinopoisk - - dict/ontodb/onto_lib/sources/labirint - - dict/ontodb/onto_lib/sources/musicbrainz - - dict/ontodb/onto_lib/sources/transfermarkt - - dict/ontodb/onto_lib/sources/www - - dict/ontodb/ontodb_log/ctr - - dict/ontodb/report/common - - dict/ontodb/report/film_stuff - - dict/ontodb/schema - - dict/ontodb/tools/entity_lists/build_watson_sources/config - - dict/ontodb/tools/entity_lists/build_watson_sources/src/filmkomedia - - dict/ontodb/tools/entity_lists/build_watson_sources/src/filmzor - - dict/ontodb/tools/entity_lists/build_watson_sources/src/imdb - - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinolinehd - - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinotime - - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinovolt - - dict/ontodb/tools/entity_lists/is_series_film_query - - dict/ontodb/tools/entity_lists/mine_titles - - dict/ontodb/tools/entity_lists/parser/config - - dict/ontodb/tools/entity_lists/parser/src/adme - - dict/ontodb/tools/entity_lists/parser/src/afisha - - dict/ontodb/tools/entity_lists/parser/src/allbestmovies - - dict/ontodb/tools/entity_lists/parser/src/cinemacc - - dict/ontodb/tools/entity_lists/parser/src/cinetoday - - dict/ontodb/tools/entity_lists/parser/src/cobrafilm - - dict/ontodb/tools/entity_lists/parser/src/detifilm - - dict/ontodb/tools/entity_lists/parser/src/dostfilms - - dict/ontodb/tools/entity_lists/parser/src/fasttorrentsu - - dict/ontodb/tools/entity_lists/parser/src/filmanias - - dict/ontodb/tools/entity_lists/parser/src/filmhd1080 - - dict/ontodb/tools/entity_lists/parser/src/filmkomedia - - dict/ontodb/tools/entity_lists/parser/src/filmov1000 - - dict/ontodb/tools/entity_lists/parser/src/filmpro - - dict/ontodb/tools/entity_lists/parser/src/filmuzhasov - - dict/ontodb/tools/entity_lists/parser/src/filmzor - - dict/ontodb/tools/entity_lists/parser/src/hdkinoclub - - dict/ontodb/tools/entity_lists/parser/src/iceagemult - - dict/ontodb/tools/entity_lists/parser/src/imdb - - dict/ontodb/tools/entity_lists/parser/src/ivi - - dict/ontodb/tools/entity_lists/parser/src/kinohabr - - dict/ontodb/tools/entity_lists/parser/src/kinohorror - - dict/ontodb/tools/entity_lists/parser/src/kinolinehd - - dict/ontodb/tools/entity_lists/parser/src/kinomliff - - dict/ontodb/tools/entity_lists/parser/src/kinoonlinetop - - dict/ontodb/tools/entity_lists/parser/src/kinopod - - dict/ontodb/tools/entity_lists/parser/src/kinopoisk - - dict/ontodb/tools/entity_lists/parser/src/kinorip - - dict/ontodb/tools/entity_lists/parser/src/kinosky - - dict/ontodb/tools/entity_lists/parser/src/kinotime - - dict/ontodb/tools/entity_lists/parser/src/kinotop - - dict/ontodb/tools/entity_lists/parser/src/kinovolt - - dict/ontodb/tools/entity_lists/parser/src/luchshiespiski - - dict/ontodb/tools/entity_lists/parser/src/megogo - - dict/ontodb/tools/entity_lists/parser/src/multikstv - - dict/ontodb/tools/entity_lists/parser/src/multyasha - - dict/ontodb/tools/entity_lists/parser/src/newfilmpro - - dict/ontodb/tools/entity_lists/parser/src/okino - - dict/ontodb/tools/entity_lists/parser/src/okomediya - - dict/ontodb/tools/entity_lists/parser/src/onlinekinohd - - dict/ontodb/tools/entity_lists/parser/src/parkhorror - - dict/ontodb/tools/entity_lists/parser/src/prostotop - - dict/ontodb/tools/entity_lists/parser/src/rosmovies - - dict/ontodb/tools/entity_lists/parser/src/rserial - - dict/ontodb/tools/entity_lists/parser/src/shikimori - - dict/ontodb/tools/entity_lists/parser/src/strahzona - - dict/ontodb/tools/entity_lists/parser/src/tabfilm - - dict/ontodb/tools/entity_lists/parser/src/thecinemaclub - - dict/ontodb/tools/entity_lists/parser/src/tlum - - dict/ontodb/tools/entity_lists/parser/src/topspiski - - dict/ontodb/tools/entity_lists/parser/src/vmirefilmov - - dict/ontodb/tools/entity_lists/parser/src/vokrugtv - - dict/ontodb/tools/entity_lists/parser/src/westernfilm - - dict/ontodb/tools/entity_lists/relev - - dict/ontodb/tools/entity_lists/sticky - - dict/ontodb/tools/fields_diff/lib - - dict/ontodb/tools/ontodb_viewer - - dict/ontodb/tools/ontodbfixes/import_fixes/lib - - dict/ontodb/tools/ontodbfixes/viewer - - dict/ontodb/tools/url_answer/lib - - dict/ontodb/user_logs/serp_clicks/lib - - dict/ontodb/user_logs/wiki_spy_clicks - - dict/ontodb/utils - - dict/ontodb/utils/add_clicks - - dict/ontodb/utils/build_helpers - - dict/ontodb/utils/import_json_timelines/lib - - dict/ontodb/utils/map_card_data - - dict/ontodb/utils/monitoring - - dict/ontodb/utils/music - - dict/ontodb/utils/norm_ontoids_in_gzt - - dict/ontodb/utils/norm_urls - - dict/ontodb/utils/string_utils - - dict/ontodb/utils/support_words - - dict/ontodb/utils/update_links - - dict/ontodb/wikicommon - - dict/ontodb/wikicommon/get_defin - - dict/ontodb/wikicommon/infobox - - dict/ontodb/wikicommon/link_to_ontoid - - dict/ontodb/wikicommon/on_add_short_defin - - dict/ontodb/wikicommon/on_build_card - - dict/ontodb/wikicommon/resource_files/wiki_fields - - dict/ontodb/wikicommon/text_mine_film_participants - - dict/ontodb/wikicommon/text_mine_interesting_facts - - dict/ontodb/wikicommon/text_mine_projects - - dict/ontodb/wikicommon/text_mine_sport_team_participants - - dict/ontodb/wikicommon/wiki - - dict/ontodb/wikicommon/wiki_syntax - - dict/tools/find_synonym - - disk/admin/monitors/common - - disk/admin/robot_switcher - - dj/tools/viewer/custom/entity + W605: + ignore: + - W605 + prefixes: + - addappter/backend/testing + - addappter/backend/testing/fixtures/configuration/ios + - adfox/amacs/tests/functional/tests_amacs/bugs/medium + - adfox/amacs/tests/functional/tests_amacs/dynamic_monetization/v2 + - adfox/amacs/tests/functional/tests_amacs/targeting/targeting_logic_tree_puids + - adfox/amacs/tests/functional/utils + - adfox/amacs/tests/functional/utils/db + - adfox/amacs/tests/functional/utils/tools + - ads/bsyeti/servants/bot + - ads/libs/py_autobudget + - ads/libs/py_bid_correction + - ads/libs/py_cliutils + - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/mapreducelib + - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs + - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabtools + - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabutils + - ads/libs/py_ml_factors/factor + - ads/libs/py_ml_factors/matrixnet + - ads/libs/py_test_mapreduce + - ads/ml_engine/learn/result/flow_generate_extended_fstr + - ads/ml_engine/learn/result/local_extended_fstr + - ads/ml_engine/learn/result/local_extended_fstr/lib + - ads/ml_engine/lib + - ads/ml_monitoring/alerts/ut + - ads/nirvana/automl/lib/batch_processor + - ads/nirvana/difacto/online_loss_processor + - ads/nirvana/graph_retrier/lib + - ads/nirvana/online_learning/move_dmlc_dumps/lib + - ads/nirvana/online_learning/pipeline_launcher/task_utils/lib + - ads/nirvana/sequential_learning + - ads/nirvana/tools/apc_check + - ads/quality/apc/gmg/generate_workflow + - ads/quality/apc/gmg/make_lm_pool + - ads/quality/apc/gmg/make_lm_pool/lib + - ads/quality/apc/prgmgv3/workflow_constructor/lib + - ads/quality/bid_correction/lib + - ads/quality/dssm/lib + - ads/quality/dssm/prgmg/make_pool + - ads/quality/dssm/search/201708/make_pool + - ads/quality/dssm/synonyms/search/tools/join_fields + - ads/quality/max_positions + - ads/quality/search_lm_conv/lib + - ads/quality/tools/adj_calc + - ads/sandbox_scripts/bmcategory_queryage_coeffs/lib + - ads/sandbox_scripts/build_tag_rules_table + - ads/sandbox_scripts/clean_mapreduce/ut + - ads/sandbox_scripts/join_yabar + - ads/tools/mranalyze + - ads/tools/yt_operations_analyzer + - ads/watchman/contrib/apispec-patched + - ads/watchman/contrib/apispec-patched/apispec + - ads/watchman/experiments/lib + - advq/offline_phits/monitoring + - alice/boltalka/generative/tfnn/preprocess + - alice/boltalka/generative/training/data/nn/filtered_twitter + - alice/boltalka/generative/training/data/nn/util + - alice/boltalka/generative/training/data/nn/util/dict + - alice/boltalka/tools/dssm_preprocessing/preprocessing/lib + - alice/nlu/py_libs/tokenizer/ut/py2 + - alice/nlu/py_libs/tokenizer/ut/py3 + - alice/nlu/tools/paraphrase_finder + - alice/paskills/nirvana_inflector + - alice/paskills/recipe_utils/lib + - alice/tests/difftest/request_miner + - antirobot/tools/daily_routine/lib + - april/web/bas/bm + - april/web/bas/bm/utils + - april/web/bas/collector + - aurora/aurora/core + - aurora/aurora/core/services + - aurora/aurora/core/toloka + - aurora/xpath/api/utils + - aurora/xpath/applier + - balancer/test/functional/admin/admin + - balancer/test/functional/regexp_host + - balancer/test/util + - balancer/test/util/dnsfake + - billing/apikeys/apikeys + - billing/apikeys/apikeys/butils_port + - billing/apikeys/apikeys/butils_port/application + - billing/dcs/dcs + - billing/dcs/dcs/temporary/butils + - billing/dcs/dcs/temporary/butils/application + - cloud/bootstrap/db/src/admin + - cloud/iam/codegen/python/codegen + - cloud/netinfra/rknfilter/yc_rkn_s3tools + - commerce/adv_backend + - commerce/adv_backend/backend/management/commands/migrator + - commerce/adv_backend/backend/validators + - contrib/nginx/tests/tap + - cv/imageproc/ocr/tools/nirvana/blocks_dataset/extract_pdf_boxes + - cv/imageproc/ocr/tools/nirvana/blocks_dataset/g_blocks + - cv/imageproc/ocr/tools/nirvana/blocks_dataset/rotate_pdf + - cv/imageproc/ocr/tools/nirvana/confidence_factors/format_utils + - cv/imageproc/ocr/tools/nirvana/recaptcha/generate_captcha_images + - cv/imageproc/ocr/tools/statistic/auto_tests/ocr_test_lib + - cv/imageproc/ocr/tools/statistic/ocr_intent_statistic + - cv/imageproc/ocr/tools/statistic/rectify_evaluation/document_edges_statistic + - cv/imageproc/ocr/tools/statistic/rectify_evaluation/ocr_statistic + - cv/imageproc/ocr/tools/statistic/rectify_evaluation/yt_runner + - cv/imageproc/ocr/tools/toloka_labeling/add_main_areas_to_labelling + - cv/imageproc/ocr/tools/toloka_labeling/add_ocr_line_recognition + - cv/imageproc/ocr/tools/toloka_labeling/convert + - cv/imageproc/ocr/tools/toloka_labeling/convert_toloka_labeling_to_asessors_labeling + - cv/imageproc/ocr/tools/toloka_labeling/get_labeling + - cv/imageproc/ocr/tools/toloka_labeling/labeling + - cv/imageproc/ocr/tools/toloka_labeling/labeling_assessors + - cv/imageproc/ocr/tools/toloka_labeling/labeling_captcha + - cv/imageproc/ocr/tools/toloka_labeling/paint_good + - cv/imageproc/ocr/tools/toloka_labeling/studier/generate_blocks_images + - cv/imageproc/ocr/tools/toloka_labeling/studier/get_queries_info + - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/convert_box_labeling + - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/extact_ocr_with_gt_blocks + - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_bleu_statistic + - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_etalon_ocr_result + - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_translate_orig_from_ocr_labelling + - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/match_blocks + - cv/imgclassifiers/framework/gpu_utilization + - cv/imgclassifiers/framework/nirvana/runners/train/statistics_parser + - datacloud/score_api/validators + - devtools/adept + - devtools/distbuild/deploy/servants + - devtools/dummy_arcadia/test/test_cache_invalidation + - devtools/import_contrib/projects/phonenumber + - devtools/qafw/qyp + - devtools/qafw/rtc_deploy/lib + - devtools/qafw/selenium/lib + - devtools/signer/signer/utils + - devtools/unitybot + - devtools/unitybot/responsibility + - devtools/ya/test + - devtools/ya/test/tests/lib + - dict/moria/nirvana/context/begemot_thesaurus_rule + - dict/moria/nirvana/lib/util + - dict/mt/alice/scenarios/prepare_data + - dict/mt/analytics/metrics/quality/mt/stupids/lib + - dict/mt/analytics/sentence_breaking/toloka_binary/lib + - dict/mt/eval/eval_viewer/lib/spec/mt + - dict/mt/eval/lib/metrics/bleu + - dict/mt/g2p/rule_based + - dict/mt/make/libs/common + - dict/mt/make/libs/eval + - dict/mt/make/libs/tfnn + - dict/mt/make/libs/translate_mtd + - dict/mt/make/modules/corpus_generation/common + - dict/mt/make/tools/lm_diff + - dict/mt/make/tools/make_cap_model/score_caps + - dict/mt/make/tools/opus_merge + - dict/mt/make/tools/tfnn/convert_mtd_to_tfnn + - dict/mt/make/tools/yt_transform/tests + - dict/mt/mtdict/make/definitions/modules/extracts/remove_etym + - dict/mt/scripts/testsets/crowdsource/globalvoices/find_parallel_pages/extract_page_info + - dict/mt/tools/log_converter/rem_tool + - dict/mt/tools/tmx_to_plain_text + - dict/ontodb/cardsparser/lib + - dict/ontodb/daily/merge_cache + - dict/ontodb/daily/validator + - dict/ontodb/images/lib + - dict/ontodb/isa/subtitle/add_subtitles + - dict/ontodb/lists/toloka/parsing + - dict/ontodb/onto_lib/card_to_url + - dict/ontodb/onto_lib/card_utils + - dict/ontodb/onto_lib/sources/discogs + - dict/ontodb/onto_lib/sources/fantlab + - dict/ontodb/onto_lib/sources/freebase + - dict/ontodb/onto_lib/sources/freebase/mediators + - dict/ontodb/onto_lib/sources/google_play + - dict/ontodb/onto_lib/sources/itunes + - dict/ontodb/onto_lib/sources/kinopoisk + - dict/ontodb/onto_lib/sources/labirint + - dict/ontodb/onto_lib/sources/musicbrainz + - dict/ontodb/onto_lib/sources/transfermarkt + - dict/ontodb/onto_lib/sources/www + - dict/ontodb/ontodb_log/ctr + - dict/ontodb/report/common + - dict/ontodb/report/film_stuff + - dict/ontodb/schema + - dict/ontodb/tools/entity_lists/build_watson_sources/config + - dict/ontodb/tools/entity_lists/build_watson_sources/src/filmkomedia + - dict/ontodb/tools/entity_lists/build_watson_sources/src/filmzor + - dict/ontodb/tools/entity_lists/build_watson_sources/src/imdb + - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinolinehd + - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinotime + - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinovolt + - dict/ontodb/tools/entity_lists/is_series_film_query + - dict/ontodb/tools/entity_lists/mine_titles + - dict/ontodb/tools/entity_lists/parser/config + - dict/ontodb/tools/entity_lists/parser/src/adme + - dict/ontodb/tools/entity_lists/parser/src/afisha + - dict/ontodb/tools/entity_lists/parser/src/allbestmovies + - dict/ontodb/tools/entity_lists/parser/src/cinemacc + - dict/ontodb/tools/entity_lists/parser/src/cinetoday + - dict/ontodb/tools/entity_lists/parser/src/cobrafilm + - dict/ontodb/tools/entity_lists/parser/src/detifilm + - dict/ontodb/tools/entity_lists/parser/src/dostfilms + - dict/ontodb/tools/entity_lists/parser/src/fasttorrentsu + - dict/ontodb/tools/entity_lists/parser/src/filmanias + - dict/ontodb/tools/entity_lists/parser/src/filmhd1080 + - dict/ontodb/tools/entity_lists/parser/src/filmkomedia + - dict/ontodb/tools/entity_lists/parser/src/filmov1000 + - dict/ontodb/tools/entity_lists/parser/src/filmpro + - dict/ontodb/tools/entity_lists/parser/src/filmuzhasov + - dict/ontodb/tools/entity_lists/parser/src/filmzor + - dict/ontodb/tools/entity_lists/parser/src/hdkinoclub + - dict/ontodb/tools/entity_lists/parser/src/iceagemult + - dict/ontodb/tools/entity_lists/parser/src/imdb + - dict/ontodb/tools/entity_lists/parser/src/ivi + - dict/ontodb/tools/entity_lists/parser/src/kinohabr + - dict/ontodb/tools/entity_lists/parser/src/kinohorror + - dict/ontodb/tools/entity_lists/parser/src/kinolinehd + - dict/ontodb/tools/entity_lists/parser/src/kinomliff + - dict/ontodb/tools/entity_lists/parser/src/kinoonlinetop + - dict/ontodb/tools/entity_lists/parser/src/kinopod + - dict/ontodb/tools/entity_lists/parser/src/kinopoisk + - dict/ontodb/tools/entity_lists/parser/src/kinorip + - dict/ontodb/tools/entity_lists/parser/src/kinosky + - dict/ontodb/tools/entity_lists/parser/src/kinotime + - dict/ontodb/tools/entity_lists/parser/src/kinotop + - dict/ontodb/tools/entity_lists/parser/src/kinovolt + - dict/ontodb/tools/entity_lists/parser/src/luchshiespiski + - dict/ontodb/tools/entity_lists/parser/src/megogo + - dict/ontodb/tools/entity_lists/parser/src/multikstv + - dict/ontodb/tools/entity_lists/parser/src/multyasha + - dict/ontodb/tools/entity_lists/parser/src/newfilmpro + - dict/ontodb/tools/entity_lists/parser/src/okino + - dict/ontodb/tools/entity_lists/parser/src/okomediya + - dict/ontodb/tools/entity_lists/parser/src/onlinekinohd + - dict/ontodb/tools/entity_lists/parser/src/parkhorror + - dict/ontodb/tools/entity_lists/parser/src/prostotop + - dict/ontodb/tools/entity_lists/parser/src/rosmovies + - dict/ontodb/tools/entity_lists/parser/src/rserial + - dict/ontodb/tools/entity_lists/parser/src/shikimori + - dict/ontodb/tools/entity_lists/parser/src/strahzona + - dict/ontodb/tools/entity_lists/parser/src/tabfilm + - dict/ontodb/tools/entity_lists/parser/src/thecinemaclub + - dict/ontodb/tools/entity_lists/parser/src/tlum + - dict/ontodb/tools/entity_lists/parser/src/topspiski + - dict/ontodb/tools/entity_lists/parser/src/vmirefilmov + - dict/ontodb/tools/entity_lists/parser/src/vokrugtv + - dict/ontodb/tools/entity_lists/parser/src/westernfilm + - dict/ontodb/tools/entity_lists/relev + - dict/ontodb/tools/entity_lists/sticky + - dict/ontodb/tools/fields_diff/lib + - dict/ontodb/tools/ontodb_viewer + - dict/ontodb/tools/ontodbfixes/import_fixes/lib + - dict/ontodb/tools/ontodbfixes/viewer + - dict/ontodb/tools/url_answer/lib + - dict/ontodb/user_logs/serp_clicks/lib + - dict/ontodb/user_logs/wiki_spy_clicks + - dict/ontodb/utils + - dict/ontodb/utils/add_clicks + - dict/ontodb/utils/build_helpers + - dict/ontodb/utils/import_json_timelines/lib + - dict/ontodb/utils/map_card_data + - dict/ontodb/utils/monitoring + - dict/ontodb/utils/music + - dict/ontodb/utils/norm_ontoids_in_gzt + - dict/ontodb/utils/norm_urls + - dict/ontodb/utils/string_utils + - dict/ontodb/utils/support_words + - dict/ontodb/utils/update_links + - dict/ontodb/wikicommon + - dict/ontodb/wikicommon/get_defin + - dict/ontodb/wikicommon/infobox + - dict/ontodb/wikicommon/link_to_ontoid + - dict/ontodb/wikicommon/on_add_short_defin + - dict/ontodb/wikicommon/on_build_card + - dict/ontodb/wikicommon/resource_files/wiki_fields + - dict/ontodb/wikicommon/text_mine_film_participants + - dict/ontodb/wikicommon/text_mine_interesting_facts + - dict/ontodb/wikicommon/text_mine_projects + - dict/ontodb/wikicommon/text_mine_sport_team_participants + - dict/ontodb/wikicommon/wiki + - dict/ontodb/wikicommon/wiki_syntax + - dict/tools/find_synonym + - disk/admin/monitors/common + - disk/admin/robot_switcher + - dj/tools/viewer/custom/entity - education/lib/nirvana/operations/yt_nodes_deep_diff - - education/schoolbook/analytics/adhoc/ANALITICSEDU-515 - - education/schoolbook/analytics/adhoc/ANALITICSEDU-687 - - entity/ontodb/tasks/backup_yt_tables/lib - - entity/ontodb/tasks/clean_old_data - - entity/ontodb/tasks/import_museums/lib - - entity/ontodb/tasks/import_yam/lib - - entity/ontodb/tasks/send_table_checker_sensors/lib - - entity/ontodb/tasks/watson_converter/lib - - entity/ontodb/util/wiki - - entity/quality/helix - - entity/quality/metrics/film_lists/combine_attributes_for_stupids_marking - - entity/quality/metrics/not_film_lists/combine_attributes_for_stupids_marking - - entity/quality/metrics/passport - - entity/ugc/db/python/offline_processing - - entity/ugc/quality/reviews_ranking/ml/lib/trueskill - - entity/ugc/tests/lib - - entity/ugc/tools/bell/replay_requests - - entity/ugc/tools/comments/prepare_business_replies_for_cmnt - - entity/ugc/tools/tank/user_poll_ammo - - entity/ugc/tools/viewer_server/lib - - entity/ugc/tools/viewer_server/lib/core - - extsearch/audio/yamrec/query_browser - - extsearch/collections/tools/mmeta2metrics_serp - - extsearch/collections/tools/nirvana/collections_indexer - - extsearch/collections/tools/upper2metrics_serp - - extsearch/geo/conveyors/annotations/filtrate_banned_v2 - - extsearch/geo/conveyors/annotations/metro_extractor/get_metro_to_ll - - extsearch/geo/conveyors/experimental/annotations/fast_annotations/collection_keywords_extractor - - extsearch/geo/tools/similar_orgs/read_user_session - - extsearch/geo/tools/special_features/mining_result_merger - - extsearch/images/library/ytscraper - - extsearch/images/money/scripts/robot - - extsearch/images/money/scripts/robot/util - - extsearch/images/money/tools/commercial_serps_downloader - - extsearch/images/robot/index/testlib/index_process_description - - extsearch/images/robot/library/pycm - - extsearch/images/robot/scripts/cm/semidup2 - - extsearch/images/robot/tools/index_snapshot_cleaner - - extsearch/images/robot/tools/indexrotate - - extsearch/images/robot/tools/robot_losses/python/rotor_download_images - - extsearch/video/python/yql - - extsearch/video/quality/series/base/builder/common - - extsearch/video/quality/series/base/builder/wrappers - - extsearch/video/robot/cm/deletes/cmpy/playerdata - - extsearch/video/robot/cm/library - - extsearch/video/robot/hostsdb/tool/config/add_rules - - extsearch/video/robot/rt_transcoder/metarobot/tests - - extsearch/video/robot/rt_transcoder/transcoder/tests - - haas/cmis/hwr_allocation - - haas/cmis/hwr_allocation/allocation_methods - - health/articles/articles_pipeline - - health/articles/articles_pipeline/actions - - health/articles/articles_pipeline/lib - - health/articles/articles_pipeline/lib/util - - health/common_libs/utils - - health/yamd/health_import - - health/yamd/health_import/data_tests - - health/yamd/libs/cross_links - - health/yamd/libs/utils - - health/yamd/libs/utils/parsers - - infra/awacs/vendor/awacs/pire/ut - - infra/callisto/controllers/viewer/lib2 - - infra/cqudp/src - - infra/cqudp/src/eggs - - infra/deploy/tools/yd_migrate/lib - - infra/gencfg-gui - - infra/heartbeat/src - - infra/heartbeat/src/daemon - - infra/kernel/tools/coroner - - infra/netconfig/utils/pinger/lib - - infra/porto/api_py - - infra/portoshell - - infra/portoshell/src - - infra/qyp/vmctl/src - - infra/reconf_juggler/tools/jdiff/tests - - infra/rtc/janitor - - infra/scripts/eventlog_uploader/script - - infra/yp_drcp/lib - - infra/yp_dru/bin - - irt/bmgen/market_data - - irt/bmgen/synonyms - - keyboard/analytics/toloka/nirvana/blocks - - keyboard/analytics/toloka/nirvana/swipe - - keyboard/analytics/toloka/nirvana/typing - - keyboard/dict/nirvana/blocks - - library/python/testing/filter - - library/python/testing/gtest - - locdoc/doc_tools/yoda/friends/okapi - - locdoc/doc_tools/yoda/friends/okapi/operations - - logbroker/tools/manual/alter_sqs_tables - - logbroker/tools/manual/create_logbroker_account/lib - - logbroker/tools/manual/load_test/load - - mail/contrib/ccs-caldavtester - - mail/contrib/ccs-caldavtester/src - - mail/contrib/ccs-caldavtester/verifiers - - mail/contrib/tatsu/test - - mail/contrib/tatsu/test/grammar - - mail/python/theatre/app - - mail/tools/ews_call/ews_call - - mail/tools/safely_delete_stids/lib - - mail/yasm/lib - - mail/yasm/lib/calendar/alerts - - mail/yasm/lib/calendar/panels - - mapreduce/yt/tools/du-yt - - maps/analyzer/pylibs/watchman_api/lib - - maps/automotive/proxy/config_generator - - maps/automotive/remote_access/autotests/tests/data_types - - maps/b2bgeo/mvrp_solver/backend/tests_lib - - maps/carparks/regression/renderer/generate_ammo - - maps/carparks/tools/route_lost_viewer/bin - - maps/infra/monitoring/sla_calculator/core/services - - maps/jams/renderer2/common/ecstatic/lib - - maps/mobile/server/tools/cache_deprecator - - maps/mobile/tools/android-manifest - - maps/pylibs/dataset_collector - - maps/pylibs/monitoring/lib - - maps/pylibs/monitoring/tests - - maps/renderer/tilesgen/tools/lib - - maps/renderer/tools/download_release - - maps/routing/router/regression/gen-stopwatch-ammo - - maps/routing/router/scripts/gen-ammo - - maps/routing/versus/lib - - maps/tools/matcher_quality/routes_to_geoms - - maps/tools/package_releaser/lib - - maps/tools/tanker-build/translate_messages - - maps/wikimap/mapspro/libs/python/pymod - - maps/wikimap/mapspro/libs/python/pymod/yandex/maps/wiki - - maps/wikimap/stat/libs/common/tests/lib - - maps/wikimap/stat/libs/common/tests/lib/dates_ut - - market/contrib/python/yaconf_v0.1.2 - - market/contrib/python/yaconf_v0.1.2/yaconf - - market/idx/streams/yatf - - market/library/cms_promo/py_utils - - market/mstat/ch-cache/lib - - market/mstat/ch-cache/lib/database - - market/reductor/configure/lib - - market/reductor/www - - market/sre/library/python/maaslib - - market/sre/services/balancer_api/lib - - market/tools/report_stats/lib - - market/yamarec/log-parsers/bin - - market/yamarec/log-parsers/yamarec_log_parsers - - market/yamarec/metarouter/tests - - market/yamarec/metarouter/yamarec_metarouter - - market/yamarec/performance/utils - - mds/s3/s3_mds_proxy - - mds/s3/s3_mds_proxy/s3mds/helpers/config - - mds/s3/s3_mds_proxy/s3mds/idm/roles - - mds/s3/s3_mds_proxy/s3mds/xml - - media/media_support/media_support - - media/media_support/media_support/chats - - metrika/admin/maas/lib/core/common - - metrika/pylib/iptruler - - metrika/pylib/utils - - ml/nirvana/nope - - ml/tensorflow/ytensorflow - - ml/tensorflow/ytensorflow/ytensorflow/inference - - ml/tensorflow/ytensorflow/ytensorflow/quantization - - ml/tensorflow/ytensorflow/ytensorflow/train/hooks - - modadvert/libs/connectors - - modadvert/libs/lyncher/factor_providers/evil_misprints/it - - modadvert/libs/lyncher/factor_providers/misprints - - modadvert/libs/lyncher/ut/rules - - modadvert/libs/utils/common - - modadvert/libs/utils/common/ut - - modadvert/programs/abuse/bs_abuse_log_processor - - modadvert/programs/direct_proxy/libs/handlers - - modadvert/programs/saas_indexer/libs - - modadvert/programs/transfer_manager/libs - - modadvert/programs/update_flags_offline/tables_manager/libs - - mssngr/router/tools/state_cache_updater_v2 - - opensource/sync/bin/arc2git - - opensource/sync/bin/git2git - - orgvisits/library/python/test_tools/yql/lib - - orgvisits/library/python/text_tools - - orgvisits/library/python/yt_jobs/tests - - orgvisits/metrics/ugc_feedback/ctr + - education/schoolbook/analytics/adhoc/ANALITICSEDU-515 + - education/schoolbook/analytics/adhoc/ANALITICSEDU-687 + - entity/ontodb/tasks/backup_yt_tables/lib + - entity/ontodb/tasks/clean_old_data + - entity/ontodb/tasks/import_museums/lib + - entity/ontodb/tasks/import_yam/lib + - entity/ontodb/tasks/send_table_checker_sensors/lib + - entity/ontodb/tasks/watson_converter/lib + - entity/ontodb/util/wiki + - entity/quality/helix + - entity/quality/metrics/film_lists/combine_attributes_for_stupids_marking + - entity/quality/metrics/not_film_lists/combine_attributes_for_stupids_marking + - entity/quality/metrics/passport + - entity/ugc/db/python/offline_processing + - entity/ugc/quality/reviews_ranking/ml/lib/trueskill + - entity/ugc/tests/lib + - entity/ugc/tools/bell/replay_requests + - entity/ugc/tools/comments/prepare_business_replies_for_cmnt + - entity/ugc/tools/tank/user_poll_ammo + - entity/ugc/tools/viewer_server/lib + - entity/ugc/tools/viewer_server/lib/core + - extsearch/audio/yamrec/query_browser + - extsearch/collections/tools/mmeta2metrics_serp + - extsearch/collections/tools/nirvana/collections_indexer + - extsearch/collections/tools/upper2metrics_serp + - extsearch/geo/conveyors/annotations/filtrate_banned_v2 + - extsearch/geo/conveyors/annotations/metro_extractor/get_metro_to_ll + - extsearch/geo/conveyors/experimental/annotations/fast_annotations/collection_keywords_extractor + - extsearch/geo/tools/similar_orgs/read_user_session + - extsearch/geo/tools/special_features/mining_result_merger + - extsearch/images/library/ytscraper + - extsearch/images/money/scripts/robot + - extsearch/images/money/scripts/robot/util + - extsearch/images/money/tools/commercial_serps_downloader + - extsearch/images/robot/index/testlib/index_process_description + - extsearch/images/robot/library/pycm + - extsearch/images/robot/scripts/cm/semidup2 + - extsearch/images/robot/tools/index_snapshot_cleaner + - extsearch/images/robot/tools/indexrotate + - extsearch/images/robot/tools/robot_losses/python/rotor_download_images + - extsearch/video/python/yql + - extsearch/video/quality/series/base/builder/common + - extsearch/video/quality/series/base/builder/wrappers + - extsearch/video/robot/cm/deletes/cmpy/playerdata + - extsearch/video/robot/cm/library + - extsearch/video/robot/hostsdb/tool/config/add_rules + - extsearch/video/robot/rt_transcoder/metarobot/tests + - extsearch/video/robot/rt_transcoder/transcoder/tests + - haas/cmis/hwr_allocation + - haas/cmis/hwr_allocation/allocation_methods + - health/articles/articles_pipeline + - health/articles/articles_pipeline/actions + - health/articles/articles_pipeline/lib + - health/articles/articles_pipeline/lib/util + - health/common_libs/utils + - health/yamd/health_import + - health/yamd/health_import/data_tests + - health/yamd/libs/cross_links + - health/yamd/libs/utils + - health/yamd/libs/utils/parsers + - infra/awacs/vendor/awacs/pire/ut + - infra/callisto/controllers/viewer/lib2 + - infra/cqudp/src + - infra/cqudp/src/eggs + - infra/deploy/tools/yd_migrate/lib + - infra/gencfg-gui + - infra/heartbeat/src + - infra/heartbeat/src/daemon + - infra/kernel/tools/coroner + - infra/netconfig/utils/pinger/lib + - infra/porto/api_py + - infra/portoshell + - infra/portoshell/src + - infra/qyp/vmctl/src + - infra/reconf_juggler/tools/jdiff/tests + - infra/rtc/janitor + - infra/scripts/eventlog_uploader/script + - infra/yp_drcp/lib + - infra/yp_dru/bin + - irt/bmgen/market_data + - irt/bmgen/synonyms + - keyboard/analytics/toloka/nirvana/blocks + - keyboard/analytics/toloka/nirvana/swipe + - keyboard/analytics/toloka/nirvana/typing + - keyboard/dict/nirvana/blocks + - library/python/testing/filter + - library/python/testing/gtest + - locdoc/doc_tools/yoda/friends/okapi + - locdoc/doc_tools/yoda/friends/okapi/operations + - logbroker/tools/manual/alter_sqs_tables + - logbroker/tools/manual/create_logbroker_account/lib + - logbroker/tools/manual/load_test/load + - mail/contrib/ccs-caldavtester + - mail/contrib/ccs-caldavtester/src + - mail/contrib/ccs-caldavtester/verifiers + - mail/contrib/tatsu/test + - mail/contrib/tatsu/test/grammar + - mail/python/theatre/app + - mail/tools/ews_call/ews_call + - mail/tools/safely_delete_stids/lib + - mail/yasm/lib + - mail/yasm/lib/calendar/alerts + - mail/yasm/lib/calendar/panels + - mapreduce/yt/tools/du-yt + - maps/analyzer/pylibs/watchman_api/lib + - maps/automotive/proxy/config_generator + - maps/automotive/remote_access/autotests/tests/data_types + - maps/b2bgeo/mvrp_solver/backend/tests_lib + - maps/carparks/regression/renderer/generate_ammo + - maps/carparks/tools/route_lost_viewer/bin + - maps/infra/monitoring/sla_calculator/core/services + - maps/jams/renderer2/common/ecstatic/lib + - maps/mobile/server/tools/cache_deprecator + - maps/mobile/tools/android-manifest + - maps/pylibs/dataset_collector + - maps/pylibs/monitoring/lib + - maps/pylibs/monitoring/tests + - maps/renderer/tilesgen/tools/lib + - maps/renderer/tools/download_release + - maps/routing/router/regression/gen-stopwatch-ammo + - maps/routing/router/scripts/gen-ammo + - maps/routing/versus/lib + - maps/tools/matcher_quality/routes_to_geoms + - maps/tools/package_releaser/lib + - maps/tools/tanker-build/translate_messages + - maps/wikimap/mapspro/libs/python/pymod + - maps/wikimap/mapspro/libs/python/pymod/yandex/maps/wiki + - maps/wikimap/stat/libs/common/tests/lib + - maps/wikimap/stat/libs/common/tests/lib/dates_ut + - market/contrib/python/yaconf_v0.1.2 + - market/contrib/python/yaconf_v0.1.2/yaconf + - market/idx/streams/yatf + - market/library/cms_promo/py_utils + - market/mstat/ch-cache/lib + - market/mstat/ch-cache/lib/database + - market/reductor/configure/lib + - market/reductor/www + - market/sre/library/python/maaslib + - market/sre/services/balancer_api/lib + - market/tools/report_stats/lib + - market/yamarec/log-parsers/bin + - market/yamarec/log-parsers/yamarec_log_parsers + - market/yamarec/metarouter/tests + - market/yamarec/metarouter/yamarec_metarouter + - market/yamarec/performance/utils + - mds/s3/s3_mds_proxy + - mds/s3/s3_mds_proxy/s3mds/helpers/config + - mds/s3/s3_mds_proxy/s3mds/idm/roles + - mds/s3/s3_mds_proxy/s3mds/xml + - media/media_support/media_support + - media/media_support/media_support/chats + - metrika/admin/maas/lib/core/common + - metrika/pylib/iptruler + - metrika/pylib/utils + - ml/nirvana/nope + - ml/tensorflow/ytensorflow + - ml/tensorflow/ytensorflow/ytensorflow/inference + - ml/tensorflow/ytensorflow/ytensorflow/quantization + - ml/tensorflow/ytensorflow/ytensorflow/train/hooks + - modadvert/libs/connectors + - modadvert/libs/lyncher/factor_providers/evil_misprints/it + - modadvert/libs/lyncher/factor_providers/misprints + - modadvert/libs/lyncher/ut/rules + - modadvert/libs/utils/common + - modadvert/libs/utils/common/ut + - modadvert/programs/abuse/bs_abuse_log_processor + - modadvert/programs/direct_proxy/libs/handlers + - modadvert/programs/saas_indexer/libs + - modadvert/programs/transfer_manager/libs + - modadvert/programs/update_flags_offline/tables_manager/libs + - mssngr/router/tools/state_cache_updater_v2 + - opensource/sync/bin/arc2git + - opensource/sync/bin/git2git + - orgvisits/library/python/test_tools/yql/lib + - orgvisits/library/python/text_tools + - orgvisits/library/python/yt_jobs/tests + - orgvisits/metrics/ugc_feedback/ctr - passport/backend/adm_api/views/meltingpot - passport/backend/core - passport/backend/core/builders/frodo @@ -471,765 +471,765 @@ migrations: - passport/backend/core/historydb/tests - passport/backend/core/suggest - passport/backend/core/types - - portal/tools/morda-release/server - - quality/ab_testing/abt_resources_lib/loaders/tests/long_metrics - - quality/ab_testing/cofe/bin/cofe_ctl - - quality/ab_testing/cofe/projects/disk - - quality/ab_testing/cofe/projects/disk/utils - - quality/ab_testing/cofe/projects/geo/geoadv - - quality/ab_testing/scripts/kati - - quality/ab_testing/scripts/kati/lib - - quality/antifraud/scripts/prod/traffic_chains - - quality/functionality/chats/feedback/src/feedback/core - - quality/functionality/chats/feedback/src/feedback/core/settings - - quality/functionality/chats/floyd/src/floyd/core - - quality/functionality/chats/floyd/src/floyd/core/settings - - quality/functionality/entity_search/factqueries/instructions/prepare_facts - - quality/functionality/entity_search/factqueries/tools/extract_fact/scripts/get_encyc_queries - - quality/functionality/facts/common/goldensets/actuality/gen_population_tasks - - quality/functionality/facts/recheck/calc_actuality_factors - - quality/functionality/parsepl/libs/parsers/tests - - quality/functionality/parsepl/nirvana/build_market_parsers/src - - quality/functionality/parsepl/toloka/configs_for_parsers - - quality/functionality/parsepl/toloka/configs_for_parsers/ut - - quality/functionality/rtx/scripts/social-serp/nano_squeeze - - quality/functionality/scripts/nirvana/jobs/casper/helpers/ytreader - - quality/functionality/snippets/social_bna - - quality/functionality/snippets/top_hosts/plugins/calculator888 - - quality/functionality/turbo/analytics/ecommerce/lib/page_classifier - - quality/functionality/turbo/auto_cleanup/lib - - quality/functionality/turbo/rss/parser_job/tests/small - - quality/functionality/turbo/tools/infinity/tools/wmc - - quality/functionality/turbo/tools/rediff - - quality/functionality/turbo/tools/tdm - - quality/functionality/turbo/yandex_pogoda - - quality/functionality/unstructured/yt_concatenator/tests - - quality/nirvana_tools/conveyor_operations/asgen/config - - quality/nirvana_tools/conveyor_operations/eval_feature/eval_feature_viewer - - quality/nirvana_tools/conveyor_operations/meta_formula_bfmf_bundle/append_formula_factors - - quality/nirvana_tools/conveyor_operations/meta_formula_bfmf_bundle/get_cache_from_appended_features - - quality/query_expansions/proc/covfefe/makefile_update - - quality/query_expansions/tools/tests/test_dummy_process - - quality/relev_tools/dsat/find_words_with_absent_forms - - quality/relev_tools/lboost_ops/nirvana/operations/main/tests - - quality/trailer/suggest/services/maps/conveyors/doc_features/chronostat - - quality/trailer/suggest/services/maps/conveyors/pool/make_pointwise_pool - - quality/trailer/suggest/services/maps/tools/retrieval_test - - quality/trailer/suggest_dict/suggest_framework/tools - - quality/user_sessions/market/custom_statistics - - quality/user_sessions/market/custom_statistics/cust/abtypes/refuses - - quality/userdata/scripts - - quality/userdata/scripts/state_validation - - quality/webfresh/learn/half_hour/build_union_prs - - quality/webfresh/libraries/prs - - quality/webfresh/libraries/prs/tests - - quality/webfresh/metrics/aggregate_serps - - quality/webfresh/metrics/bad_urls_stats - - quality/webfresh/metrics/build_formulas_config - - quality/yaqlib/yaqlint - - regulargeo/tools - - rnd_toolbox/deckard - - rnd_toolbox/deckard/storage - - rnd_toolbox/hwlib - - robot/favicon/python - - robot/jupiter/library/python/sample - - robot/jupiter/scripts - - robot/jupiter/viewers/galileo - - robot/kwyt/scripts/sampling_data - - robot/lemur/scripts/common - - robot/metrics/forums_sbr/bin/get_forum_urls - - robot/metrics/forums_sbr/bin/get_urls_sample - - robot/metrics/forums_sbr/bin/parse_zora_result - - robot/metrics/fresh_sbr/mk_fresh_serp - - robot/metrics/rotor_missed_words_metric/bin/gemini_canonize - - robot/metrics/speed_sbr/remove_fresh_hosts - - robot/quality/robotrank/mk_learn_pool/lib - - robot/salmon_agent/counters - - robot/salmon_agent/utils - - rt-research/broadmatching/mr/IRT-1517 - - rt-research/broadmatching/scripts/dyn-smart-banners/update_dyn_trashfilter - - rt-research/broadmatching/scripts/pylib - - rt-research/broadmatching/scripts/pylib/bm - - rt-research/broadmatching/scripts/yt/catalogia_mapper - - rt-research/broadmatching/scripts/yt/cdict_generator - - rt-research/broadmatching/scripts/yt/dyn-sources/generate_dse_banners - - rt-research/broadmatching/scripts/yt/dyn-sources/generate_dse_banners/generate_filtered_links - - rt-research/broadmatching/scripts/yt/dyn-sources/yt_prepare_dyn_sources - - rt-research/multik/deploy/deploy - - saas/tools/devops/lib - - saas/tools/devops/lib23/tests/py2 - - saas/tools/devops/lib23/tests/py23 - - samogon/libs/sandbox - - sandbox/common/projects_handler - - sandbox/projects/BuildBegemotLightTestConfig - - sandbox/projects/BuildNewsPackage - - sandbox/projects/BuildSportProxyData - - sandbox/projects/BuildYobject - - sandbox/projects/CheckFreshDocuments - - sandbox/projects/CompareNewsdResponses - - sandbox/projects/CompareYmakeDump - - sandbox/projects/ConvertVideo - - sandbox/projects/DeployVideoMmetaShard - - sandbox/projects/GetAdvquickDatabase - - sandbox/projects/GetFusionMiddlesearchResponses - - sandbox/projects/GetPokazometerDatabase - - sandbox/projects/IexBuildPackages - - sandbox/projects/IexImportPatterns - - sandbox/projects/LandingConstructor - - sandbox/projects/LandingConstructor/Stat/Jobs - - sandbox/projects/MediaLib - - sandbox/projects/MediaLib/shardmap - - sandbox/projects/MixQueriesExperimentsRegions - - sandbox/projects/PersonalPoiGenerator - - sandbox/projects/PersonalPoiGenerator/PoisDumper - - sandbox/projects/ReleaseBalancerConfigGenerator - - sandbox/projects/ReleaseConfigGeneratorService - - sandbox/projects/ReleaseMediaShardmaps - - sandbox/projects/ReportDataRuntime - - sandbox/projects/ReportDataRuntimeItem - - sandbox/projects/ReportDataRuntimeRT - - sandbox/projects/ReportDataRuntimeTags - - sandbox/projects/ReportRuleTestFull - - sandbox/projects/RunNewsLoadtest - - sandbox/projects/SOC/YtProxyAnalyzeCommandParams - - sandbox/projects/SOC/YtRawMasterLogAnalysis - - sandbox/projects/SpawnTestConfigGenerator - - sandbox/projects/TaxiSecurity - - sandbox/projects/TaxiSecurity/BadLogsAnalyzer - - sandbox/projects/TaxiSecurity/YodaxAnalyzer - - sandbox/projects/TestFrontMetricsLogs - - sandbox/projects/TestFrontMetricsLogs/modules - - sandbox/projects/TestReportPerformance - - sandbox/projects/TickenatorBatchProcessing - - sandbox/projects/TickenatorBatchProcessing/YasmScreenshoter - - sandbox/projects/Travel/tasks/tools - - sandbox/projects/Ufo - - sandbox/projects/Ufo/CI - - sandbox/projects/UpdateConfigGeneratorDb - - sandbox/projects/UpdateMapsWizardPpoData - - sandbox/projects/UpdateTestenvNewsdResources - - sandbox/projects/UrlsByShowCounters/executable/lib - - sandbox/projects/VpsAmmo - - sandbox/projects/YabsDebuilder - - sandbox/projects/YabsDebuilderDev - - sandbox/projects/YabsServerStatPerformance - - sandbox/projects/adfox/adfox_ui/testpalm/testrunCreate - - sandbox/projects/alice_evo - - sandbox/projects/alice_evo/AliceEvoIntegrationTestsWrapper - - sandbox/projects/antirobot - - sandbox/projects/antirobot/AsnNames - - sandbox/projects/antirobot/LoadTesting - - sandbox/projects/autobudget/autobudget_lib - - sandbox/projects/avia/avia_statistics/update_alternative_routes_prices - - sandbox/projects/avia/avia_statistics/update_flights - - sandbox/projects/avia/avia_statistics/update_median_prices - - sandbox/projects/avia/avia_statistics/update_popular_months - - sandbox/projects/avia/avia_statistics/update_return_ticket_prices - - sandbox/projects/avia/avia_statistics/update_route_crosslinks - - sandbox/projects/avia/flight_status_registrar/FlightStatsRegistrar - - sandbox/projects/avia/flight_status_registrar/OagFlightsRegistrar - - sandbox/projects/avia/flight_status_registrar/VariFlightRegistrar - - sandbox/projects/avia/log_unknown_fare_codes - - sandbox/projects/avia/travel_avia_dump_resource/task - - sandbox/projects/bsyeti - - sandbox/projects/canvas - - sandbox/projects/canvas/video_constructor_utils - - sandbox/projects/cloud/yfm - - sandbox/projects/common/compare_upper_results - - sandbox/projects/common/fusion - - sandbox/projects/common/gencfg - - sandbox/projects/common/market_report - - sandbox/projects/common/mobilesearch - - sandbox/projects/common/mobilesearch/startrek_client - - sandbox/projects/common/yabs - - sandbox/projects/gencfg - - sandbox/projects/gencfg/BuildConfigGenerator2 - - sandbox/projects/gencfg/GencfgMonitoringCharts - - sandbox/projects/gencfg/ReleaseConfigGenerator2 - - sandbox/projects/gencfg/workflow - - sandbox/projects/health/acceptance_begemot_graph - - sandbox/projects/laas - - sandbox/projects/laas/CollectTestGeobases - - sandbox/projects/logs/HashedSessionsDiff - - sandbox/projects/logs/TestRalibPerfomance - - sandbox/projects/market/infra/helpers - - sandbox/projects/market/infra/helpers/changes_helper - - sandbox/projects/masstransit/MapsMasstransitImportVehicleTasks - - sandbox/projects/media - - sandbox/projects/media/admins/mysqlcopydb - - sandbox/projects/media/kp-front-nginx/config-validation - - sandbox/projects/media_crm/tasks - - sandbox/projects/media_crm/tasks/media_crm_deploy - - sandbox/projects/metrika/mobile/sdk/helpers - - sandbox/projects/mssngr/rtc - - sandbox/projects/mssngr/runtime/MssngrRouterLoadTest - - sandbox/projects/music - - sandbox/projects/music/MusicExportYdbToYt - - sandbox/projects/music/ReleaseMusic - - sandbox/projects/music/deployment/helpers - - sandbox/projects/news - - sandbox/projects/news/CompareNewsAnnotatorResponses - - sandbox/projects/news/UpdateRTHubAdvWidgetResources - - sandbox/projects/ofd/backend/ofd_backend_package_build - - sandbox/projects/ofd/backend/ofd_backend_run_tests - - sandbox/projects/ofd/notifier/ofd_notifier_package_build - - sandbox/projects/ofd/runtime/ofd_runtime_package_build - - sandbox/projects/ofd/runtime/ofd_runtime_run_tests - - sandbox/projects/ofd/tasks/ofd_tasks_package_build - - sandbox/projects/porto/BuildPortoLayer - - sandbox/projects/porto/BuildPortoLayerTmp - - sandbox/projects/qafw - - sandbox/projects/qafw/ansible - - sandbox/projects/reconf - - sandbox/projects/sandbox_ci/sandbox_ci_compare_load_test - - sandbox/projects/sandbox_ci/task - - sandbox/projects/tests - - sandbox/projects/turbo - - sandbox/projects/turbo/CompareTurboResponses - - sandbox/projects/turbo/SampleForTurbo - - sandbox/projects/vh - - sandbox/projects/vh/faas/FaasConvertVideoVodTest - - sandbox/projects/vh/frontend/count_diff - - sandbox/projects/vh/frontend/generate_requests_from_yt_logs - - sandbox/projects/vins - - sandbox/projects/vins/AliceBegemotMegamindPerfTest - - sandbox/projects/vins/BuildVinsCustomEntity - - sandbox/projects/vins/MegamindPerfTest - - sandbox/projects/vins/VinsPerfTest - - sandbox/projects/websearch/CheckPrechargeAfterMemoryMap - - sandbox/projects/yane/ParseYanswerFactLogs - - sandbox/sdk2 - - sandbox/sdk2/vcs - - sandbox/serviceapi - - sandbox/serviceapi/handlers - - sandbox/yasandbox/database/clickhouse - - skynet/kernel - - skynet/kernel/util/sys/user - - skynet/library - - skynet/library/tasks - - smart_devices/crash_analytics/tools/minidump_analyzer - - smart_devices/tools/launcher2/tests/restarts - - smm/lib/models/sklearn - - sprav/mining/botanik_miner - - statbox/qb2 - - statbox/qb2/qb2_extensions/api/v1/extractors - - statbox/qb2/qb2_extensions/api/v1/extractors/pool - - statbox/statkey/jam/jobs/cubes/desktop_installs/v2 - - strm/generate/lib/generate - - sup/stat/find_pushes - - talents/nlu/scripts/train/geoname - - testenv/jobs/rtyserver - - tools/mkdocs_builder/lib - - tools/mkdocs_builder/mkdocs_yandex - - tools/mkdocs_builder/mkdocs_yandex/mkdocs_yandex - - tools/mkdocs_builder/mkdocs_yandex/mkdocs_yandex/ext/markdown - - tools/releaser/src - - tools/ygetparam - - travel/avia/library/python/common - - travel/avia/library/python/common/tests - - travel/avia/library/python/common/tests/lib - - travel/avia/library/python/common/utils - - travel/avia/library/python/geosearch - - travel/avia/library/python/geosearch/views - - travel/avia/library/python/route_search - - travel/avia/library/python/route_search/by_number - - travel/rasp/bus/admin/utils - - travel/rasp/bus/admin/utils/points - - travel/rasp/library/python/common/tests - - travel/rasp/library/python/common/tests/data_api/billing - - travel/rasp/library/python/common/tests/data_api/dzv - - travel/rasp/library/python/geosearch - - travel/rasp/library/python/geosearch/views - - travel/rasp/library/python/route_search - - travel/rasp/library/python/route_search/by_number - - travel/rasp/train_api - - travel/rasp/train_api/scripts - - travel/rasp/train_api/tests - - travel/rasp/train_api/tests/tariffs/train/views - - travel/rasp/train_api/tests/tariffs/train/wizard - - travel/rasp/train_api/tests/train_purchase - - travel/rasp/train_api/tests/train_purchase/tasks - - travel/rasp/train_api/train_partners/base/train_details - - vcs/svn/hooks/check_arc_commit - - vh/telegram/sqs2media - - vh/telegram/sqs2media/handlers - - voicetech/asr/tools/asr_analyzer/lib - - voicetech/common/voicetable/bin/filter_text - - voicetech/common/voicetable/checks/bin/general_voicetable_check - - voicetech/infra/gdpr_proxy/service - - voicetech/tts/vh/utils - - weather/workers/warnings/general - - yabs/analytics/anomaly_analyzer/src - - yabs/chat_bot/bot - - yabs/event-utils - - yabs/outdoor/libs/confirmed_booking - - yabs/python-libs/common - - yabs/qa/b2b_utils/bsserver_b2b/engine/bs_utils - - yabs/qa/b2b_utils/bsserver_b2b/engine/mongo_utils - - yabs/qa/b2b_utils/bsserver_b2b/engine/run - - yabs/qa/b2b_utils/bsserver_b2b/engine/validate_scripts - - yabs/qa/yabs_b2b_tank/qabs/b2b - - yabs/sbyt/testing/core - - yabs/server/cs/pylibs/partner_interface_monitor - - yabs/server/cs/pylibs/settings - - yabs/server/infra/bstrbufbuf/plugin - - yabs/server/infra/trivial_cron - - yabs/server/libs/py_markdown_strings - - yabs/server/test/ft/BSDEV-73064 - - yabs/server/test/ft/BSSERVER-11503 - - yabs/server/test/ft/BSSERVER-14195 - - yabs/server/test/ft/BSSERVER-2122 - - yabs/server/test/ft/BSSERVER-2158 - - yabs/server/test/ft/BSSERVER-2454 - - yabs/server/test/ft/BSSERVER-2976 - - yabs/server/test/ft/BSSERVER-3895 - - yabs/server/test/ft/BSSERVER-9233 - - yabs/server/test/ft/NANPU-817 - - yabs/server/test/ft/checks - - yabs/server/test/pylibs/qxl - - yabs/server/test/pylibs/simulator - - yabs/server/test/qabs_bsserver_pytest - - yabs/server/test/tools/oneshot_tester/lib - - yabs/stat/dropstat2/api/lib - - yabs/stat/infra/clickhouse/repair_master_report - - yabs/utils/autosupbs/pylibs/tasks_generator - - yabs/utils/autosupbs/tests/tasks_generator - - yabs/utils/yabs-mysql-binlog-audit/lib - - yabs/vh/cms-pgaas/cms_common - - yabs/vh/cms-pgaas/cms_common/biz - - yabs/vh/cms-pgaas/content_importer/evsproduction/pattern_based - - yabs/vh/cms-pgaas/content_ksiva_api/lib - - yabs/vh/cms-pgaas/feed_miner - - yabs/vh/cms-pgaas/feed_miner/downloader_middlewares - - yabs/vh/frontend/test/vh_pytest - - yabs/web-bins/export-stat/pcode_experiments/dill - - yabs/web-bins/export-stat/pcode_experiments/issue - - yaphone/advisor/project - - yaphone/localization_admin/src - - yaphone/localization_admin/src/models - - yaphone/localization_admin/src/models/details - - yaphone/localization_admin/src/models/support_info - - yp/scheduler_simulator/simtool - - yql/tools/docs/custom_mkdocs - - yql/tools/docs/wiki2markdown - - yql/tools/mrjob/test - - yql/tools/qplayer - - yweb/antimalware/mitb/mitb_monitor/lib - - yweb/antiporno/analyze_yt_ops - - yweb/antiporno/cp_conv/prepare_suggestive_cp_img_basket - - yweb/antiporno/cp_conv/text_classif/bin - - yweb/antiporno/nav/lib - - yweb/antiporno/pyutil/url - - yweb/antiporno/queries_manual_markup/lib - - yweb/antiporno/query_analyzer/porn_query_config_parser - - yweb/antiporno/site_reachability/lib/reachability_common - - yweb/antiporno/top_queries_cleanup/yql/test - - yweb/antispam/clean_web/tools/run_cm_targets - - yweb/antispam/mascot/scripts/tcinet - - yweb/antispam/seo_masks/py/static_impl/ut - - yweb/antispam/tools/yql - - yweb/antispam/webspam/collections/vw_model_applier/bin - - yweb/antispam/webspam/export/tries/tools/upper_tries - - yweb/antispam/ytgr/viewer - - yweb/blender/newsletter/unused_formulas - - yweb/blender/scripts/blender_viewer/conveyor_experiments/utils - - yweb/blender/scripts/nirvana/jobs/calc_workflow_num_with_filters - - yweb/blender/scripts/nirvana/jobs/join_features - - yweb/blender/scripts/nirvana/jobs/train_sbs_model/utils - - yweb/blogs/parsers/test - - yweb/freshness/scripts/sport_pushes - - yweb/freshness/scripts/sport_wizard - - yweb/freshness/scripts/svn - - yweb/freshness/scripts/trends/trendbot_tg/tg_handler - - yweb/freshness/scripts/trends/trendbot_tg/tg_handler/foreground - - yweb/news/hosts_differ - - yweb/news/runtime_scripts/event_to_infra - - yweb/news/tests - - yweb/news/tests/export - - yweb/news/tests/utils - - yweb/robot/limbo - - yweb/robot/limbo/imports - - yweb/robot/metrics/pmusca/lib - - yweb/sitelinks/astrolabe/build_bna/candidates/filter_by_region - - yweb/sitelinks/scripts/sitelinks - - yweb/structhtml/richsnippets/scripts/build_foto_recipes/prepare_to_deploy - - yweb/verticals/scripts/sport_chats - - yweb/verticals/scripts/sport_zen_updater/add_parsed_zen_urls - - yweb/video/vparsrobot/v2/tests - - yweb/video/vparsrobot/v2/tests-large - - yweb/webdaemons/clickdaemon/tools/create_ammo_from_tcpdump - - yweb/yasap/answers_nirvana/make_ammos - - yweb/yasap/answers_quality/plagiarism/prepare_scraper_queries - - yweb/yasap/pdb/backend/offline_views/history_calculator - - yweb/yasap/pdb/food/normalizer - - yweb/yasap/pdb/nirvana/gathered_boards_delta - - yweb/younglings/tasks/YOUNGLINGS-516 - - zootopia/analytics/drive/source/drive/operations/support/registrations/reg_quality - - zootopia/hub/vds - - zootopia/hub/vds/onetime/orgthief/orgthief/parsers - - zootopia/hub/vds/scripts - - zootopia/hub/vds/services/velobike - F841: - ignore: - - F841 - prefixes: - - addappter/web/api - - addappter/web/api/views - - addappter/web/api/views/api - - addappter/web/api/views/frontend - - addappter/web/common - - addappter/web/common/events - - addappter/web/libs - - addappter/web/libs/marshmallow - - addappter/web/libs/walrus - - addappter/web/testing/fixtures - - adfox/infra/amacs_config/lib - - ads/autobudget/metrics/equivalency_monitoring - - ads/autobudget/metrics/example_monitoring - - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabtools - - ads/ml_engine/learn/result/local_extended_fstr/lib - - ads/pytorch/lib/online_learning/production/processors/tsar_processor/lib - - ads/quality/apc_check_py - - ads/quality/embedding/tsar_tensor/ft_pool/lib - - ads/quality/ltp/libs/build_pool - - ads/sandbox_scripts/ggmonitor - - alice/analytics/wer/g2p - - alice/hollywood/tests/perf_test - - alice/megamind/tests/library - - alice/paskills/granet_server/tests - - alice/uniproxy/bin/send-digest - - alice/uniproxy/bin/uniproxy - - alice/uniproxy/bin/uniproxy-delivery - - alice/uniproxy/bin/uniproxy-subway - - alice/uniproxy/bin/uniproxy-unistat - - alice/uniproxy/bin/yabio-storage - - alice/uniproxy/library/testing - - alice/uniproxy/library/testing/mocks - - alice/uniproxy/tools/balancer_top - - apphost/conf/tests/blackbox - - april/badb - - april/badb/badb/db/mysql - - april/web/bas/ca - - april/web/bas/ca/forms - - april/web/bas/collector - - aurora/scripts/parsers/zoon_ru - - balancer/test/functional/regexp_host - - balancer/test/functional/regexp_path - - billing/apikeys/apikeys - - billing/apikeys/apikeys/mapper - - billing/bcl/bcl - - billing/refs/refs - - cloud/marketplace/queue/yc_marketplace_queue - - cloud/netinfra/rknfilter/yc_rkn_common - - cloud/netinfra/rknfilter/yc_rkn_config_node - - cmnt/tools/regression/request_classes - - cv/imageproc/ocr/tools/database_extraction/nirvana/imgaug/src/augmenters - - cv/short2long/training/yt_calc_factors - - datacloud/log_reader/lib - - devtools/local_cache/ac/tests/perfac - - dict/bert/make/lib - - dict/bert/make/lib/models - - dict/bert/make/lib/tasks - - dict/mt/analytics/sentence_breaking/translate_human_eval_comparison - - dict/mt/g2p/asr/graph - - dict/ontodb/onto_lib/sources/wikidata - - dict/ontodb/proto/direct/ut - - dict/ontodb/utils/export_src_codes - - direct/infra/direct_zkcli - - direct/infra/dt-dump-b2yt-data - - district/logbroker_consumers/lib - - district/logbroker_consumers/lib/yt - - dj/tools/rthub_profiles/acceptance/acceptance_tool - - dj/tools/viewer/custom/entity - - edadeal/analytics/scripts/CashbackReport/CashbackReportLib - - edadeal/analytics/scripts/CashbackReport/CashbackReportLib/CashbackReportLib - - education/lib/common - - education/schoolbook/analytics/adhoc/ANALITICSEDU-687 - - entity/ontodb/tasks/vloggers - - entity/recommender/nirvana/operations/abt_experiments_launcher - - entity/ugc/nirvana/ugcdb/support_hidden_import/lib/ut - - entity/ugc/tools/update_photos - - extsearch/audio/generative/py/uploader - - extsearch/images/tools/nirvana/download_serps - - extsearch/video/robot/cm/library/ut - - extsearch/video/robot/cm/transcoder/cmpy/vh_index_dups_matcher - - extsearch/video/robot/crawling/player_testing/services/live_proxy - - extsearch/video/robot/previews/hitman/lost_preview_status - - extsearch/video/robot/tools/library/python - - extsearch/video/transcoder/per_title/handler - - extsearch/video/transcoder/per_title/vmaf - - geosuggest/conveyors/learn_pool/lst_weights/lib - - haas/cmis/hwr_preorders - - haas/graphite_sync/get_report - - infra/deploy_queue_controller/lib - - infra/dist/dmover/bin/dmover - - infra/dist/dmover/lib - - infra/dist/dmover/lib/internal - - infra/dist/dmover/tests - - infra/host-cpu-metrics - - infra/host-cpu-metrics/host_metrics - - infra/porto/api_py - - infra/qyp/vmproxy/tests - - infra/shawshank/tests - - infra/skybit - - infra/skybit/src - - infra/yp_dns/tools/handle_duplicate_records/lib - - infra/yp_quota_distributor/lib - - keyboard/dict/nirvana/config_generator/blocks - - keyboard/dict/synthetic_ngrams/synthetic_ngrams_builder - - lbs/metrics/lbs_binbase_diff - - library/python/bstr - - logbroker/tools/manual/collect_capacity/base_2020 - - logbroker/tools/manual/collect_capacity/collect_pre - - logbroker/tools/manual/collect_capacity/set_capacity_pre - - logbroker/tools/manual/collect_capacity/topic_list - - logbroker/tools/manual/create_logbroker_account/lib - - logbroker/tools/startrek/st - - mail/python/fake_mulcagate - - mail/python/theatre/app - - mail/python/theatre/app/log_helpers - - mail/tools/sql_execute_per_shard/lib - - maps/analytics/legacy/nile/statadhoc-8703-site-api-report - - maps/automotive/carwashes/tests/src - - maps/automotive/qa/metrics/common/ut - - maps/automotive/tools/statistics_auto/pylib/track_match_finder - - maps/infra/apiteka/config_uploader/tests - - maps/infra/sedem/cli/tests/release - - maps/infra/sedem/cli/tests/release/utils - - maps/tools/matcher_quality/routes_to_geoms - - maps_adv/common/shared_mock/tests - - market/mobile_validator/mt/env - - market/mstat/ch-cache/tests - - market/sre/services/cema-proxy/lib - - market/sre/services/cema/lib - - market/sre/services/cema/lib/classes - - market/yamarec/yamarec/yamarec1 - - market/yamarec/yamarec/yamarec1/tasks - - metrika/admin/brb/server/lib - - metrika/admin/maas/bin/backend - - metrika/admin/maas/bin/monitoring/maas_instances_memory - - metrika/admin/maas/lib/core/common - - metrika/admin/maas/lib/core/daemon - - metrika/admin/maas/lib/core/service - - metrika/admin/python/duty/bot/lib - - metrika/admin/python/scripts/jrun - - metrika/tasklets/conductor/impl - - milab/lib/i2tclient/python - - ml/tensorflow/tfnn/tests - - mlp/mail/aspam/experiments/MLP_231 - - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/inbound - - mlp/mail/aspam/nirvana_operations/conveyor/calc_dsats - - modadvert/libs/connectors/loggers - - modadvert/libs/http - - modadvert/libs/laas/workers/domain_threats - - modadvert/libs/laas/workers/features_from_href - - modadvert/libs/utils/dictutils/ut - - modadvert/programs/cv_app/libs - - modadvert/tools/accept_campaigns - - mssngr/botfarm/src/bot - - music/analytics/jam-sox/music_lib/financial_reports/tests/unit - - music/tools/download-info + - portal/tools/morda-release/server + - quality/ab_testing/abt_resources_lib/loaders/tests/long_metrics + - quality/ab_testing/cofe/bin/cofe_ctl + - quality/ab_testing/cofe/projects/disk + - quality/ab_testing/cofe/projects/disk/utils + - quality/ab_testing/cofe/projects/geo/geoadv + - quality/ab_testing/scripts/kati + - quality/ab_testing/scripts/kati/lib + - quality/antifraud/scripts/prod/traffic_chains + - quality/functionality/chats/feedback/src/feedback/core + - quality/functionality/chats/feedback/src/feedback/core/settings + - quality/functionality/chats/floyd/src/floyd/core + - quality/functionality/chats/floyd/src/floyd/core/settings + - quality/functionality/entity_search/factqueries/instructions/prepare_facts + - quality/functionality/entity_search/factqueries/tools/extract_fact/scripts/get_encyc_queries + - quality/functionality/facts/common/goldensets/actuality/gen_population_tasks + - quality/functionality/facts/recheck/calc_actuality_factors + - quality/functionality/parsepl/libs/parsers/tests + - quality/functionality/parsepl/nirvana/build_market_parsers/src + - quality/functionality/parsepl/toloka/configs_for_parsers + - quality/functionality/parsepl/toloka/configs_for_parsers/ut + - quality/functionality/rtx/scripts/social-serp/nano_squeeze + - quality/functionality/scripts/nirvana/jobs/casper/helpers/ytreader + - quality/functionality/snippets/social_bna + - quality/functionality/snippets/top_hosts/plugins/calculator888 + - quality/functionality/turbo/analytics/ecommerce/lib/page_classifier + - quality/functionality/turbo/auto_cleanup/lib + - quality/functionality/turbo/rss/parser_job/tests/small + - quality/functionality/turbo/tools/infinity/tools/wmc + - quality/functionality/turbo/tools/rediff + - quality/functionality/turbo/tools/tdm + - quality/functionality/turbo/yandex_pogoda + - quality/functionality/unstructured/yt_concatenator/tests + - quality/nirvana_tools/conveyor_operations/asgen/config + - quality/nirvana_tools/conveyor_operations/eval_feature/eval_feature_viewer + - quality/nirvana_tools/conveyor_operations/meta_formula_bfmf_bundle/append_formula_factors + - quality/nirvana_tools/conveyor_operations/meta_formula_bfmf_bundle/get_cache_from_appended_features + - quality/query_expansions/proc/covfefe/makefile_update + - quality/query_expansions/tools/tests/test_dummy_process + - quality/relev_tools/dsat/find_words_with_absent_forms + - quality/relev_tools/lboost_ops/nirvana/operations/main/tests + - quality/trailer/suggest/services/maps/conveyors/doc_features/chronostat + - quality/trailer/suggest/services/maps/conveyors/pool/make_pointwise_pool + - quality/trailer/suggest/services/maps/tools/retrieval_test + - quality/trailer/suggest_dict/suggest_framework/tools + - quality/user_sessions/market/custom_statistics + - quality/user_sessions/market/custom_statistics/cust/abtypes/refuses + - quality/userdata/scripts + - quality/userdata/scripts/state_validation + - quality/webfresh/learn/half_hour/build_union_prs + - quality/webfresh/libraries/prs + - quality/webfresh/libraries/prs/tests + - quality/webfresh/metrics/aggregate_serps + - quality/webfresh/metrics/bad_urls_stats + - quality/webfresh/metrics/build_formulas_config + - quality/yaqlib/yaqlint + - regulargeo/tools + - rnd_toolbox/deckard + - rnd_toolbox/deckard/storage + - rnd_toolbox/hwlib + - robot/favicon/python + - robot/jupiter/library/python/sample + - robot/jupiter/scripts + - robot/jupiter/viewers/galileo + - robot/kwyt/scripts/sampling_data + - robot/lemur/scripts/common + - robot/metrics/forums_sbr/bin/get_forum_urls + - robot/metrics/forums_sbr/bin/get_urls_sample + - robot/metrics/forums_sbr/bin/parse_zora_result + - robot/metrics/fresh_sbr/mk_fresh_serp + - robot/metrics/rotor_missed_words_metric/bin/gemini_canonize + - robot/metrics/speed_sbr/remove_fresh_hosts + - robot/quality/robotrank/mk_learn_pool/lib + - robot/salmon_agent/counters + - robot/salmon_agent/utils + - rt-research/broadmatching/mr/IRT-1517 + - rt-research/broadmatching/scripts/dyn-smart-banners/update_dyn_trashfilter + - rt-research/broadmatching/scripts/pylib + - rt-research/broadmatching/scripts/pylib/bm + - rt-research/broadmatching/scripts/yt/catalogia_mapper + - rt-research/broadmatching/scripts/yt/cdict_generator + - rt-research/broadmatching/scripts/yt/dyn-sources/generate_dse_banners + - rt-research/broadmatching/scripts/yt/dyn-sources/generate_dse_banners/generate_filtered_links + - rt-research/broadmatching/scripts/yt/dyn-sources/yt_prepare_dyn_sources + - rt-research/multik/deploy/deploy + - saas/tools/devops/lib + - saas/tools/devops/lib23/tests/py2 + - saas/tools/devops/lib23/tests/py23 + - samogon/libs/sandbox + - sandbox/common/projects_handler + - sandbox/projects/BuildBegemotLightTestConfig + - sandbox/projects/BuildNewsPackage + - sandbox/projects/BuildSportProxyData + - sandbox/projects/BuildYobject + - sandbox/projects/CheckFreshDocuments + - sandbox/projects/CompareNewsdResponses + - sandbox/projects/CompareYmakeDump + - sandbox/projects/ConvertVideo + - sandbox/projects/DeployVideoMmetaShard + - sandbox/projects/GetAdvquickDatabase + - sandbox/projects/GetFusionMiddlesearchResponses + - sandbox/projects/GetPokazometerDatabase + - sandbox/projects/IexBuildPackages + - sandbox/projects/IexImportPatterns + - sandbox/projects/LandingConstructor + - sandbox/projects/LandingConstructor/Stat/Jobs + - sandbox/projects/MediaLib + - sandbox/projects/MediaLib/shardmap + - sandbox/projects/MixQueriesExperimentsRegions + - sandbox/projects/PersonalPoiGenerator + - sandbox/projects/PersonalPoiGenerator/PoisDumper + - sandbox/projects/ReleaseBalancerConfigGenerator + - sandbox/projects/ReleaseConfigGeneratorService + - sandbox/projects/ReleaseMediaShardmaps + - sandbox/projects/ReportDataRuntime + - sandbox/projects/ReportDataRuntimeItem + - sandbox/projects/ReportDataRuntimeRT + - sandbox/projects/ReportDataRuntimeTags + - sandbox/projects/ReportRuleTestFull + - sandbox/projects/RunNewsLoadtest + - sandbox/projects/SOC/YtProxyAnalyzeCommandParams + - sandbox/projects/SOC/YtRawMasterLogAnalysis + - sandbox/projects/SpawnTestConfigGenerator + - sandbox/projects/TaxiSecurity + - sandbox/projects/TaxiSecurity/BadLogsAnalyzer + - sandbox/projects/TaxiSecurity/YodaxAnalyzer + - sandbox/projects/TestFrontMetricsLogs + - sandbox/projects/TestFrontMetricsLogs/modules + - sandbox/projects/TestReportPerformance + - sandbox/projects/TickenatorBatchProcessing + - sandbox/projects/TickenatorBatchProcessing/YasmScreenshoter + - sandbox/projects/Travel/tasks/tools + - sandbox/projects/Ufo + - sandbox/projects/Ufo/CI + - sandbox/projects/UpdateConfigGeneratorDb + - sandbox/projects/UpdateMapsWizardPpoData + - sandbox/projects/UpdateTestenvNewsdResources + - sandbox/projects/UrlsByShowCounters/executable/lib + - sandbox/projects/VpsAmmo + - sandbox/projects/YabsDebuilder + - sandbox/projects/YabsDebuilderDev + - sandbox/projects/YabsServerStatPerformance + - sandbox/projects/adfox/adfox_ui/testpalm/testrunCreate + - sandbox/projects/alice_evo + - sandbox/projects/alice_evo/AliceEvoIntegrationTestsWrapper + - sandbox/projects/antirobot + - sandbox/projects/antirobot/AsnNames + - sandbox/projects/antirobot/LoadTesting + - sandbox/projects/autobudget/autobudget_lib + - sandbox/projects/avia/avia_statistics/update_alternative_routes_prices + - sandbox/projects/avia/avia_statistics/update_flights + - sandbox/projects/avia/avia_statistics/update_median_prices + - sandbox/projects/avia/avia_statistics/update_popular_months + - sandbox/projects/avia/avia_statistics/update_return_ticket_prices + - sandbox/projects/avia/avia_statistics/update_route_crosslinks + - sandbox/projects/avia/flight_status_registrar/FlightStatsRegistrar + - sandbox/projects/avia/flight_status_registrar/OagFlightsRegistrar + - sandbox/projects/avia/flight_status_registrar/VariFlightRegistrar + - sandbox/projects/avia/log_unknown_fare_codes + - sandbox/projects/avia/travel_avia_dump_resource/task + - sandbox/projects/bsyeti + - sandbox/projects/canvas + - sandbox/projects/canvas/video_constructor_utils + - sandbox/projects/cloud/yfm + - sandbox/projects/common/compare_upper_results + - sandbox/projects/common/fusion + - sandbox/projects/common/gencfg + - sandbox/projects/common/market_report + - sandbox/projects/common/mobilesearch + - sandbox/projects/common/mobilesearch/startrek_client + - sandbox/projects/common/yabs + - sandbox/projects/gencfg + - sandbox/projects/gencfg/BuildConfigGenerator2 + - sandbox/projects/gencfg/GencfgMonitoringCharts + - sandbox/projects/gencfg/ReleaseConfigGenerator2 + - sandbox/projects/gencfg/workflow + - sandbox/projects/health/acceptance_begemot_graph + - sandbox/projects/laas + - sandbox/projects/laas/CollectTestGeobases + - sandbox/projects/logs/HashedSessionsDiff + - sandbox/projects/logs/TestRalibPerfomance + - sandbox/projects/market/infra/helpers + - sandbox/projects/market/infra/helpers/changes_helper + - sandbox/projects/masstransit/MapsMasstransitImportVehicleTasks + - sandbox/projects/media + - sandbox/projects/media/admins/mysqlcopydb + - sandbox/projects/media/kp-front-nginx/config-validation + - sandbox/projects/media_crm/tasks + - sandbox/projects/media_crm/tasks/media_crm_deploy + - sandbox/projects/metrika/mobile/sdk/helpers + - sandbox/projects/mssngr/rtc + - sandbox/projects/mssngr/runtime/MssngrRouterLoadTest + - sandbox/projects/music + - sandbox/projects/music/MusicExportYdbToYt + - sandbox/projects/music/ReleaseMusic + - sandbox/projects/music/deployment/helpers + - sandbox/projects/news + - sandbox/projects/news/CompareNewsAnnotatorResponses + - sandbox/projects/news/UpdateRTHubAdvWidgetResources + - sandbox/projects/ofd/backend/ofd_backend_package_build + - sandbox/projects/ofd/backend/ofd_backend_run_tests + - sandbox/projects/ofd/notifier/ofd_notifier_package_build + - sandbox/projects/ofd/runtime/ofd_runtime_package_build + - sandbox/projects/ofd/runtime/ofd_runtime_run_tests + - sandbox/projects/ofd/tasks/ofd_tasks_package_build + - sandbox/projects/porto/BuildPortoLayer + - sandbox/projects/porto/BuildPortoLayerTmp + - sandbox/projects/qafw + - sandbox/projects/qafw/ansible + - sandbox/projects/reconf + - sandbox/projects/sandbox_ci/sandbox_ci_compare_load_test + - sandbox/projects/sandbox_ci/task + - sandbox/projects/tests + - sandbox/projects/turbo + - sandbox/projects/turbo/CompareTurboResponses + - sandbox/projects/turbo/SampleForTurbo + - sandbox/projects/vh + - sandbox/projects/vh/faas/FaasConvertVideoVodTest + - sandbox/projects/vh/frontend/count_diff + - sandbox/projects/vh/frontend/generate_requests_from_yt_logs + - sandbox/projects/vins + - sandbox/projects/vins/AliceBegemotMegamindPerfTest + - sandbox/projects/vins/BuildVinsCustomEntity + - sandbox/projects/vins/MegamindPerfTest + - sandbox/projects/vins/VinsPerfTest + - sandbox/projects/websearch/CheckPrechargeAfterMemoryMap + - sandbox/projects/yane/ParseYanswerFactLogs + - sandbox/sdk2 + - sandbox/sdk2/vcs + - sandbox/serviceapi + - sandbox/serviceapi/handlers + - sandbox/yasandbox/database/clickhouse + - skynet/kernel + - skynet/kernel/util/sys/user + - skynet/library + - skynet/library/tasks + - smart_devices/crash_analytics/tools/minidump_analyzer + - smart_devices/tools/launcher2/tests/restarts + - smm/lib/models/sklearn + - sprav/mining/botanik_miner + - statbox/qb2 + - statbox/qb2/qb2_extensions/api/v1/extractors + - statbox/qb2/qb2_extensions/api/v1/extractors/pool + - statbox/statkey/jam/jobs/cubes/desktop_installs/v2 + - strm/generate/lib/generate + - sup/stat/find_pushes + - talents/nlu/scripts/train/geoname + - testenv/jobs/rtyserver + - tools/mkdocs_builder/lib + - tools/mkdocs_builder/mkdocs_yandex + - tools/mkdocs_builder/mkdocs_yandex/mkdocs_yandex + - tools/mkdocs_builder/mkdocs_yandex/mkdocs_yandex/ext/markdown + - tools/releaser/src + - tools/ygetparam + - travel/avia/library/python/common + - travel/avia/library/python/common/tests + - travel/avia/library/python/common/tests/lib + - travel/avia/library/python/common/utils + - travel/avia/library/python/geosearch + - travel/avia/library/python/geosearch/views + - travel/avia/library/python/route_search + - travel/avia/library/python/route_search/by_number + - travel/rasp/bus/admin/utils + - travel/rasp/bus/admin/utils/points + - travel/rasp/library/python/common/tests + - travel/rasp/library/python/common/tests/data_api/billing + - travel/rasp/library/python/common/tests/data_api/dzv + - travel/rasp/library/python/geosearch + - travel/rasp/library/python/geosearch/views + - travel/rasp/library/python/route_search + - travel/rasp/library/python/route_search/by_number + - travel/rasp/train_api + - travel/rasp/train_api/scripts + - travel/rasp/train_api/tests + - travel/rasp/train_api/tests/tariffs/train/views + - travel/rasp/train_api/tests/tariffs/train/wizard + - travel/rasp/train_api/tests/train_purchase + - travel/rasp/train_api/tests/train_purchase/tasks + - travel/rasp/train_api/train_partners/base/train_details + - vcs/svn/hooks/check_arc_commit + - vh/telegram/sqs2media + - vh/telegram/sqs2media/handlers + - voicetech/asr/tools/asr_analyzer/lib + - voicetech/common/voicetable/bin/filter_text + - voicetech/common/voicetable/checks/bin/general_voicetable_check + - voicetech/infra/gdpr_proxy/service + - voicetech/tts/vh/utils + - weather/workers/warnings/general + - yabs/analytics/anomaly_analyzer/src + - yabs/chat_bot/bot + - yabs/event-utils + - yabs/outdoor/libs/confirmed_booking + - yabs/python-libs/common + - yabs/qa/b2b_utils/bsserver_b2b/engine/bs_utils + - yabs/qa/b2b_utils/bsserver_b2b/engine/mongo_utils + - yabs/qa/b2b_utils/bsserver_b2b/engine/run + - yabs/qa/b2b_utils/bsserver_b2b/engine/validate_scripts + - yabs/qa/yabs_b2b_tank/qabs/b2b + - yabs/sbyt/testing/core + - yabs/server/cs/pylibs/partner_interface_monitor + - yabs/server/cs/pylibs/settings + - yabs/server/infra/bstrbufbuf/plugin + - yabs/server/infra/trivial_cron + - yabs/server/libs/py_markdown_strings + - yabs/server/test/ft/BSDEV-73064 + - yabs/server/test/ft/BSSERVER-11503 + - yabs/server/test/ft/BSSERVER-14195 + - yabs/server/test/ft/BSSERVER-2122 + - yabs/server/test/ft/BSSERVER-2158 + - yabs/server/test/ft/BSSERVER-2454 + - yabs/server/test/ft/BSSERVER-2976 + - yabs/server/test/ft/BSSERVER-3895 + - yabs/server/test/ft/BSSERVER-9233 + - yabs/server/test/ft/NANPU-817 + - yabs/server/test/ft/checks + - yabs/server/test/pylibs/qxl + - yabs/server/test/pylibs/simulator + - yabs/server/test/qabs_bsserver_pytest + - yabs/server/test/tools/oneshot_tester/lib + - yabs/stat/dropstat2/api/lib + - yabs/stat/infra/clickhouse/repair_master_report + - yabs/utils/autosupbs/pylibs/tasks_generator + - yabs/utils/autosupbs/tests/tasks_generator + - yabs/utils/yabs-mysql-binlog-audit/lib + - yabs/vh/cms-pgaas/cms_common + - yabs/vh/cms-pgaas/cms_common/biz + - yabs/vh/cms-pgaas/content_importer/evsproduction/pattern_based + - yabs/vh/cms-pgaas/content_ksiva_api/lib + - yabs/vh/cms-pgaas/feed_miner + - yabs/vh/cms-pgaas/feed_miner/downloader_middlewares + - yabs/vh/frontend/test/vh_pytest + - yabs/web-bins/export-stat/pcode_experiments/dill + - yabs/web-bins/export-stat/pcode_experiments/issue + - yaphone/advisor/project + - yaphone/localization_admin/src + - yaphone/localization_admin/src/models + - yaphone/localization_admin/src/models/details + - yaphone/localization_admin/src/models/support_info + - yp/scheduler_simulator/simtool + - yql/tools/docs/custom_mkdocs + - yql/tools/docs/wiki2markdown + - yql/tools/mrjob/test + - yql/tools/qplayer + - yweb/antimalware/mitb/mitb_monitor/lib + - yweb/antiporno/analyze_yt_ops + - yweb/antiporno/cp_conv/prepare_suggestive_cp_img_basket + - yweb/antiporno/cp_conv/text_classif/bin + - yweb/antiporno/nav/lib + - yweb/antiporno/pyutil/url + - yweb/antiporno/queries_manual_markup/lib + - yweb/antiporno/query_analyzer/porn_query_config_parser + - yweb/antiporno/site_reachability/lib/reachability_common + - yweb/antiporno/top_queries_cleanup/yql/test + - yweb/antispam/clean_web/tools/run_cm_targets + - yweb/antispam/mascot/scripts/tcinet + - yweb/antispam/seo_masks/py/static_impl/ut + - yweb/antispam/tools/yql + - yweb/antispam/webspam/collections/vw_model_applier/bin + - yweb/antispam/webspam/export/tries/tools/upper_tries + - yweb/antispam/ytgr/viewer + - yweb/blender/newsletter/unused_formulas + - yweb/blender/scripts/blender_viewer/conveyor_experiments/utils + - yweb/blender/scripts/nirvana/jobs/calc_workflow_num_with_filters + - yweb/blender/scripts/nirvana/jobs/join_features + - yweb/blender/scripts/nirvana/jobs/train_sbs_model/utils + - yweb/blogs/parsers/test + - yweb/freshness/scripts/sport_pushes + - yweb/freshness/scripts/sport_wizard + - yweb/freshness/scripts/svn + - yweb/freshness/scripts/trends/trendbot_tg/tg_handler + - yweb/freshness/scripts/trends/trendbot_tg/tg_handler/foreground + - yweb/news/hosts_differ + - yweb/news/runtime_scripts/event_to_infra + - yweb/news/tests + - yweb/news/tests/export + - yweb/news/tests/utils + - yweb/robot/limbo + - yweb/robot/limbo/imports + - yweb/robot/metrics/pmusca/lib + - yweb/sitelinks/astrolabe/build_bna/candidates/filter_by_region + - yweb/sitelinks/scripts/sitelinks + - yweb/structhtml/richsnippets/scripts/build_foto_recipes/prepare_to_deploy + - yweb/verticals/scripts/sport_chats + - yweb/verticals/scripts/sport_zen_updater/add_parsed_zen_urls + - yweb/video/vparsrobot/v2/tests + - yweb/video/vparsrobot/v2/tests-large + - yweb/webdaemons/clickdaemon/tools/create_ammo_from_tcpdump + - yweb/yasap/answers_nirvana/make_ammos + - yweb/yasap/answers_quality/plagiarism/prepare_scraper_queries + - yweb/yasap/pdb/backend/offline_views/history_calculator + - yweb/yasap/pdb/food/normalizer + - yweb/yasap/pdb/nirvana/gathered_boards_delta + - yweb/younglings/tasks/YOUNGLINGS-516 + - zootopia/analytics/drive/source/drive/operations/support/registrations/reg_quality + - zootopia/hub/vds + - zootopia/hub/vds/onetime/orgthief/orgthief/parsers + - zootopia/hub/vds/scripts + - zootopia/hub/vds/services/velobike + F841: + ignore: + - F841 + prefixes: + - addappter/web/api + - addappter/web/api/views + - addappter/web/api/views/api + - addappter/web/api/views/frontend + - addappter/web/common + - addappter/web/common/events + - addappter/web/libs + - addappter/web/libs/marshmallow + - addappter/web/libs/walrus + - addappter/web/testing/fixtures + - adfox/infra/amacs_config/lib + - ads/autobudget/metrics/equivalency_monitoring + - ads/autobudget/metrics/example_monitoring + - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabtools + - ads/ml_engine/learn/result/local_extended_fstr/lib + - ads/pytorch/lib/online_learning/production/processors/tsar_processor/lib + - ads/quality/apc_check_py + - ads/quality/embedding/tsar_tensor/ft_pool/lib + - ads/quality/ltp/libs/build_pool + - ads/sandbox_scripts/ggmonitor + - alice/analytics/wer/g2p + - alice/hollywood/tests/perf_test + - alice/megamind/tests/library + - alice/paskills/granet_server/tests + - alice/uniproxy/bin/send-digest + - alice/uniproxy/bin/uniproxy + - alice/uniproxy/bin/uniproxy-delivery + - alice/uniproxy/bin/uniproxy-subway + - alice/uniproxy/bin/uniproxy-unistat + - alice/uniproxy/bin/yabio-storage + - alice/uniproxy/library/testing + - alice/uniproxy/library/testing/mocks + - alice/uniproxy/tools/balancer_top + - apphost/conf/tests/blackbox + - april/badb + - april/badb/badb/db/mysql + - april/web/bas/ca + - april/web/bas/ca/forms + - april/web/bas/collector + - aurora/scripts/parsers/zoon_ru + - balancer/test/functional/regexp_host + - balancer/test/functional/regexp_path + - billing/apikeys/apikeys + - billing/apikeys/apikeys/mapper + - billing/bcl/bcl + - billing/refs/refs + - cloud/marketplace/queue/yc_marketplace_queue + - cloud/netinfra/rknfilter/yc_rkn_common + - cloud/netinfra/rknfilter/yc_rkn_config_node + - cmnt/tools/regression/request_classes + - cv/imageproc/ocr/tools/database_extraction/nirvana/imgaug/src/augmenters + - cv/short2long/training/yt_calc_factors + - datacloud/log_reader/lib + - devtools/local_cache/ac/tests/perfac + - dict/bert/make/lib + - dict/bert/make/lib/models + - dict/bert/make/lib/tasks + - dict/mt/analytics/sentence_breaking/translate_human_eval_comparison + - dict/mt/g2p/asr/graph + - dict/ontodb/onto_lib/sources/wikidata + - dict/ontodb/proto/direct/ut + - dict/ontodb/utils/export_src_codes + - direct/infra/direct_zkcli + - direct/infra/dt-dump-b2yt-data + - district/logbroker_consumers/lib + - district/logbroker_consumers/lib/yt + - dj/tools/rthub_profiles/acceptance/acceptance_tool + - dj/tools/viewer/custom/entity + - edadeal/analytics/scripts/CashbackReport/CashbackReportLib + - edadeal/analytics/scripts/CashbackReport/CashbackReportLib/CashbackReportLib + - education/lib/common + - education/schoolbook/analytics/adhoc/ANALITICSEDU-687 + - entity/ontodb/tasks/vloggers + - entity/recommender/nirvana/operations/abt_experiments_launcher + - entity/ugc/nirvana/ugcdb/support_hidden_import/lib/ut + - entity/ugc/tools/update_photos + - extsearch/audio/generative/py/uploader + - extsearch/images/tools/nirvana/download_serps + - extsearch/video/robot/cm/library/ut + - extsearch/video/robot/cm/transcoder/cmpy/vh_index_dups_matcher + - extsearch/video/robot/crawling/player_testing/services/live_proxy + - extsearch/video/robot/previews/hitman/lost_preview_status + - extsearch/video/robot/tools/library/python + - extsearch/video/transcoder/per_title/handler + - extsearch/video/transcoder/per_title/vmaf + - geosuggest/conveyors/learn_pool/lst_weights/lib + - haas/cmis/hwr_preorders + - haas/graphite_sync/get_report + - infra/deploy_queue_controller/lib + - infra/dist/dmover/bin/dmover + - infra/dist/dmover/lib + - infra/dist/dmover/lib/internal + - infra/dist/dmover/tests + - infra/host-cpu-metrics + - infra/host-cpu-metrics/host_metrics + - infra/porto/api_py + - infra/qyp/vmproxy/tests + - infra/shawshank/tests + - infra/skybit + - infra/skybit/src + - infra/yp_dns/tools/handle_duplicate_records/lib + - infra/yp_quota_distributor/lib + - keyboard/dict/nirvana/config_generator/blocks + - keyboard/dict/synthetic_ngrams/synthetic_ngrams_builder + - lbs/metrics/lbs_binbase_diff + - library/python/bstr + - logbroker/tools/manual/collect_capacity/base_2020 + - logbroker/tools/manual/collect_capacity/collect_pre + - logbroker/tools/manual/collect_capacity/set_capacity_pre + - logbroker/tools/manual/collect_capacity/topic_list + - logbroker/tools/manual/create_logbroker_account/lib + - logbroker/tools/startrek/st + - mail/python/fake_mulcagate + - mail/python/theatre/app + - mail/python/theatre/app/log_helpers + - mail/tools/sql_execute_per_shard/lib + - maps/analytics/legacy/nile/statadhoc-8703-site-api-report + - maps/automotive/carwashes/tests/src + - maps/automotive/qa/metrics/common/ut + - maps/automotive/tools/statistics_auto/pylib/track_match_finder + - maps/infra/apiteka/config_uploader/tests + - maps/infra/sedem/cli/tests/release + - maps/infra/sedem/cli/tests/release/utils + - maps/tools/matcher_quality/routes_to_geoms + - maps_adv/common/shared_mock/tests + - market/mobile_validator/mt/env + - market/mstat/ch-cache/tests + - market/sre/services/cema-proxy/lib + - market/sre/services/cema/lib + - market/sre/services/cema/lib/classes + - market/yamarec/yamarec/yamarec1 + - market/yamarec/yamarec/yamarec1/tasks + - metrika/admin/brb/server/lib + - metrika/admin/maas/bin/backend + - metrika/admin/maas/bin/monitoring/maas_instances_memory + - metrika/admin/maas/lib/core/common + - metrika/admin/maas/lib/core/daemon + - metrika/admin/maas/lib/core/service + - metrika/admin/python/duty/bot/lib + - metrika/admin/python/scripts/jrun + - metrika/tasklets/conductor/impl + - milab/lib/i2tclient/python + - ml/tensorflow/tfnn/tests + - mlp/mail/aspam/experiments/MLP_231 + - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/inbound + - mlp/mail/aspam/nirvana_operations/conveyor/calc_dsats + - modadvert/libs/connectors/loggers + - modadvert/libs/http + - modadvert/libs/laas/workers/domain_threats + - modadvert/libs/laas/workers/features_from_href + - modadvert/libs/utils/dictutils/ut + - modadvert/programs/cv_app/libs + - modadvert/tools/accept_campaigns + - mssngr/botfarm/src/bot + - music/analytics/jam-sox/music_lib/financial_reports/tests/unit + - music/tools/download-info - passport/backend/library/distprim - passport/backend/library/distprim/threading - passport/backend/meltingpot/utils - - plus/gift/gift - - plus/gift/gift/admin - - quality/ab_testing/cofe/projects/zalogin - - quality/functionality/chats/common/utils - - quality/functionality/chats/feedback/src/feedback/api - - quality/functionality/chats/feedback/src/feedback/api/v1 - - quality/functionality/rtx/server/plugin - - quality/functionality/snippets/rkn_filter - - quality/functionality/turbo/analytics/quality/cms_report - - quality/functionality/turbo/analytics/quality/samplers/cms_sampler - - quality/functionality/turbo/autoparser/flags_postprocess/tests/medium - - quality/functionality/turbo/tools/pq_delete_docs - - quality/functionality/turbo/tools/tdm - - quality/logs/mousetrack_lib/python/tests - - quality/relev_tools/choice_screen/create_stove - - quality/trailer/suggest/toloka_processor/report_yt - - quality/webfresh/metrics/serpsets/raw_serpset_patcher/tests - - rnd_toolbox/deckard - - rnd_toolbox/nyamm - - robot/library/yuppie - - robot/library/yuppie/modules - - robot/metrics/speed_sbr/nirvana - - robot/quality/nirvana/refererrank/premetric_map - - robot/quality/samovar_conveyour/tools/get_samples - - robot/quality/sendlink_conveyour/tools/max_rank_acceptance - - robot/research/eval_nirvana_graph - - rt-research/broadmatching/scripts/yt/dyn-sources/yt_prepare_dyn_sources - - rt-research/multik/tools/jupyter - - rtmapreduce/tests/recipes/rtmr_processing_recipe - - saas/tools/devops/check_backup - - saas/tools/devops/lib23 - - saas/tools/refresh/import_rtyserver - - sandbox/common/upload - - sandbox/projects/alice_evo - - sandbox/projects/avia/mysql_sync_testing_with_prod - - sandbox/projects/balancer/load/BalancerLoadCompare - - sandbox/projects/bitbucket/GBGAdaptor - - sandbox/projects/devops - - sandbox/projects/devops/HardwareGencfgGroups - - sandbox/projects/dj/DjCompileConfig - - sandbox/projects/geoadv - - sandbox/projects/geoadv/ReleasePrices - - sandbox/projects/geosearch/snippets - - sandbox/projects/geosearch/snippets/AddrsSnippetsTask - - sandbox/projects/hollywood - - sandbox/projects/hollywood/fast_data/BuildAndDeployHollywoodFastData - - sandbox/projects/logbroker/tasks/BuildSeveralResources - - sandbox/projects/masstransit/MapsMasstransitImportVehicleTasks - - sandbox/projects/music/deployment/MusicRestoreMdb/YandexCloudDatabase - - sandbox/projects/music/deployment/helpers - - sandbox/projects/news - - sandbox/projects/news/RunNewsComplexLoadtests - - sandbox/projects/news/runtime_tests - - sandbox/projects/news/runtime_tests/CompareNewsApphostServiceResponses - - sandbox/projects/saas/backups/DetachServiceIndex2 - - sandbox/projects/sport_wizard/DeploySportProxyData - - sandbox/projects/video - - sandbox/projects/video/priemka/VideoRobotPriemkaSimple - - sandbox/projects/vqe/measure_performance - - sandbox/projects/yt - - sandbox/projects/yt/ci_tasks/packages/YtBuildCronPackage - - sandbox/projects/yt/layers_tasks/YtBuildSpravLayerTask - - search/metrics/monitoring/core - - search/mon/rviewer/db - - search/mon/rviewer/modules - - search/mon/trainer/libs - - search/mon/uchenki/app - - search/mon/uchenki/app/controllers/api - - search/mon/wabbajack/bin/icscr - - search/mon/wabbajack/libs/client - - search/mon/wabbajack/libs/client/parsers - - search/scraper/parser_platform/parsers - - search/scraper_over_yt/scripts - - skynet/kernel - - skynet/kernel/util/tests - - smart_devices/tools/launcher2/tests/restarts - - statbox/statkey/jam/jobs - - statbox/statkey/jam/jobs/cubes/superapp/autofill/v2 - - statbox/statkey/jam/jobs/cubes/superapp/bi_turboappweb_turbo_counter/v2 - - statbox/statkey/jam/jobs/cubes/superapp/cohorts_daily/v2 - - strm/generate/lib/generate - - taxi/graph/packages/taxigraphd - - testenv/core/web_server - - travel/avia/shared_flights/tasks/schedules_dumper - - travel/hotels/suggest/builder - - travel/hotels/suggest/metrics_builder - - travel/rasp/content/rzdParser - - travel/rasp/train_api - - travel/rasp/train_api/middleware - - vcs/manage_contrib - - vh/lib/sqs_watcher - - vh/recommender/tools/delayed_view_stats - - voicetech/asr/markup/select - - voicetech/asr/tools/run_normalizer/prepare_data_for_mt_normalizer - - voicetech/infra/uniproxy/tests/session - - voicetech/infra/voice_ext_mon/bin - - voicetech/spotter/selection_for_annotation/bin - - voicetech/spotter/train/lib - - weather/workers/warnings/push - - yabs/analytics/anomaly_analyzer/src - - yabs/analytics/new_traffic_generation/src - - yabs/analytics/traffic_generation/z_2_barnavig_click_counter - - yabs/analytics/traffic_generation/z_4_spylog_visits_counter - - yabs/analytics/traffic_generation/z_5_appmetr_counter - - yabs/analytics/traffic_generation/z_6_metrika_visits_counter - - yabs/analytics/traffic_generation/z_7_chevent_scc_counter - - yabs/analytics/traffic_generation/z_9_3_metr_yabro_coeff - - yabs/analytics/traffic_generation/z_9_4_all_visits - - yabs/analytics/yt_cleaner - - yabs/autobudget/pylibs/tasks - - yabs/awaps_pvl/pvl/logic - - yabs/outdoor/py_schedule/logic - - yabs/qa/oneshots/astkachev/BSSERVER-14230/noload_rsya_only - - yabs/qa/oneshots/astkachev/BSSERVER-14230/noload_search_only - - yabs/qa/oneshots/sergtaim/BSSERVER-14259/AddKeywordWithNewIdToYT-test - - yabs/server/cs/pylibs/dash_board - - yabs/server/cs/pylibs/full_graph_plot - - yabs/server/cs/pylibs/gantt_chart/lib - - yabs/server/cs/pylibs/settings - - yabs/server/test/ft/BSSERVER-13708 - - yabs/vh/cms-pgaas/sport_api_importer/sport_api_importer_lib - - yabs/vh/frontend/test/ft/HOME-43539 - - yweb/antispam/cid/analytics/pushes/crypta_lal/args - - yweb/antispam/cid/analytics/pushes/push_sender/args - - yweb/antispam/cid/analytics/pushes/push_sender/cli - - yweb/antispam/cid/support/auto_answer/args - - yweb/blender/scripts/blender_viewer/conveyor_experiments/utils - - yweb/freshness/scripts/sport_pushes - - yweb/news/dupsd/tests - - yweb/news/zen/video_export - - yweb/sitelinks/astrolabe/migration/bna_to_mysql - - yweb/webscripts/video/duplicates/nirvana_scripts/knn/filter_knn_source - - yweb/yasap/pdb/tools/backup/create_backup - - yweb/yasap/znatoki/znatoki_ctl/lib - - yweb/yasap/znatoki/znatoki_ctl/lib/commands - - zootopia/analytics/ml/features/geo_features/idle_duration_features - - zootopia/analytics/ml/join_features/lib - - zootopia/analytics/ml/util/logs_context_manager/lib - F403: - ignore: - - F403 - prefixes: - - adfox/infra/registrator - - adfox/infra/registrator/lib - - ads/libs/py_dssm_lib/dssm_applier - - ads/libs/py_ml_factors/matrixnet - - ads/libs/py_vw_lib - - ads/nirvana/xfactor-yt - - ads/tools/mx_feature_remapper - - ads/watchman/contrib/flask-restplus-patched - - ads/watchman/contrib/flask-restplus-patched/flask_restplus_patched - - alice/analytics/utils/marty/run_nirvana_instance - - cloud/ai/speechkit/stt/tests/data_pipeline/join - - cv/short2long/nirvana/lib - - devtools/signer/signer - - dict/ontodb/cmpy/lib - - dict/ontodb/isa/libshorttext/converter/stemmer - - dict/ontodb/report/common - - dict/ontodb/utils/export_src_codes - - edadeal/analytics/scripts/CashbackReport/CashbackReportLib - - edadeal/analytics/scripts/CashbackReport/CashbackReportLib/CashbackReportLib - - extsearch/geo/recommender/runtime/config/generation - - mail/freezing_tests/active_users_aggregation - - maps/analyzer/tools/online_jams/pylib - - market/seo/offer_base - - mediapers/feature_machine/nirvana/script_runner - - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/inbound - - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/outbound - - mlp/mail/aspam/nirvana_operations/conveyor/calc_dsats + - plus/gift/gift + - plus/gift/gift/admin + - quality/ab_testing/cofe/projects/zalogin + - quality/functionality/chats/common/utils + - quality/functionality/chats/feedback/src/feedback/api + - quality/functionality/chats/feedback/src/feedback/api/v1 + - quality/functionality/rtx/server/plugin + - quality/functionality/snippets/rkn_filter + - quality/functionality/turbo/analytics/quality/cms_report + - quality/functionality/turbo/analytics/quality/samplers/cms_sampler + - quality/functionality/turbo/autoparser/flags_postprocess/tests/medium + - quality/functionality/turbo/tools/pq_delete_docs + - quality/functionality/turbo/tools/tdm + - quality/logs/mousetrack_lib/python/tests + - quality/relev_tools/choice_screen/create_stove + - quality/trailer/suggest/toloka_processor/report_yt + - quality/webfresh/metrics/serpsets/raw_serpset_patcher/tests + - rnd_toolbox/deckard + - rnd_toolbox/nyamm + - robot/library/yuppie + - robot/library/yuppie/modules + - robot/metrics/speed_sbr/nirvana + - robot/quality/nirvana/refererrank/premetric_map + - robot/quality/samovar_conveyour/tools/get_samples + - robot/quality/sendlink_conveyour/tools/max_rank_acceptance + - robot/research/eval_nirvana_graph + - rt-research/broadmatching/scripts/yt/dyn-sources/yt_prepare_dyn_sources + - rt-research/multik/tools/jupyter + - rtmapreduce/tests/recipes/rtmr_processing_recipe + - saas/tools/devops/check_backup + - saas/tools/devops/lib23 + - saas/tools/refresh/import_rtyserver + - sandbox/common/upload + - sandbox/projects/alice_evo + - sandbox/projects/avia/mysql_sync_testing_with_prod + - sandbox/projects/balancer/load/BalancerLoadCompare + - sandbox/projects/bitbucket/GBGAdaptor + - sandbox/projects/devops + - sandbox/projects/devops/HardwareGencfgGroups + - sandbox/projects/dj/DjCompileConfig + - sandbox/projects/geoadv + - sandbox/projects/geoadv/ReleasePrices + - sandbox/projects/geosearch/snippets + - sandbox/projects/geosearch/snippets/AddrsSnippetsTask + - sandbox/projects/hollywood + - sandbox/projects/hollywood/fast_data/BuildAndDeployHollywoodFastData + - sandbox/projects/logbroker/tasks/BuildSeveralResources + - sandbox/projects/masstransit/MapsMasstransitImportVehicleTasks + - sandbox/projects/music/deployment/MusicRestoreMdb/YandexCloudDatabase + - sandbox/projects/music/deployment/helpers + - sandbox/projects/news + - sandbox/projects/news/RunNewsComplexLoadtests + - sandbox/projects/news/runtime_tests + - sandbox/projects/news/runtime_tests/CompareNewsApphostServiceResponses + - sandbox/projects/saas/backups/DetachServiceIndex2 + - sandbox/projects/sport_wizard/DeploySportProxyData + - sandbox/projects/video + - sandbox/projects/video/priemka/VideoRobotPriemkaSimple + - sandbox/projects/vqe/measure_performance + - sandbox/projects/yt + - sandbox/projects/yt/ci_tasks/packages/YtBuildCronPackage + - sandbox/projects/yt/layers_tasks/YtBuildSpravLayerTask + - search/metrics/monitoring/core + - search/mon/rviewer/db + - search/mon/rviewer/modules + - search/mon/trainer/libs + - search/mon/uchenki/app + - search/mon/uchenki/app/controllers/api + - search/mon/wabbajack/bin/icscr + - search/mon/wabbajack/libs/client + - search/mon/wabbajack/libs/client/parsers + - search/scraper/parser_platform/parsers + - search/scraper_over_yt/scripts + - skynet/kernel + - skynet/kernel/util/tests + - smart_devices/tools/launcher2/tests/restarts + - statbox/statkey/jam/jobs + - statbox/statkey/jam/jobs/cubes/superapp/autofill/v2 + - statbox/statkey/jam/jobs/cubes/superapp/bi_turboappweb_turbo_counter/v2 + - statbox/statkey/jam/jobs/cubes/superapp/cohorts_daily/v2 + - strm/generate/lib/generate + - taxi/graph/packages/taxigraphd + - testenv/core/web_server + - travel/avia/shared_flights/tasks/schedules_dumper + - travel/hotels/suggest/builder + - travel/hotels/suggest/metrics_builder + - travel/rasp/content/rzdParser + - travel/rasp/train_api + - travel/rasp/train_api/middleware + - vcs/manage_contrib + - vh/lib/sqs_watcher + - vh/recommender/tools/delayed_view_stats + - voicetech/asr/markup/select + - voicetech/asr/tools/run_normalizer/prepare_data_for_mt_normalizer + - voicetech/infra/uniproxy/tests/session + - voicetech/infra/voice_ext_mon/bin + - voicetech/spotter/selection_for_annotation/bin + - voicetech/spotter/train/lib + - weather/workers/warnings/push + - yabs/analytics/anomaly_analyzer/src + - yabs/analytics/new_traffic_generation/src + - yabs/analytics/traffic_generation/z_2_barnavig_click_counter + - yabs/analytics/traffic_generation/z_4_spylog_visits_counter + - yabs/analytics/traffic_generation/z_5_appmetr_counter + - yabs/analytics/traffic_generation/z_6_metrika_visits_counter + - yabs/analytics/traffic_generation/z_7_chevent_scc_counter + - yabs/analytics/traffic_generation/z_9_3_metr_yabro_coeff + - yabs/analytics/traffic_generation/z_9_4_all_visits + - yabs/analytics/yt_cleaner + - yabs/autobudget/pylibs/tasks + - yabs/awaps_pvl/pvl/logic + - yabs/outdoor/py_schedule/logic + - yabs/qa/oneshots/astkachev/BSSERVER-14230/noload_rsya_only + - yabs/qa/oneshots/astkachev/BSSERVER-14230/noload_search_only + - yabs/qa/oneshots/sergtaim/BSSERVER-14259/AddKeywordWithNewIdToYT-test + - yabs/server/cs/pylibs/dash_board + - yabs/server/cs/pylibs/full_graph_plot + - yabs/server/cs/pylibs/gantt_chart/lib + - yabs/server/cs/pylibs/settings + - yabs/server/test/ft/BSSERVER-13708 + - yabs/vh/cms-pgaas/sport_api_importer/sport_api_importer_lib + - yabs/vh/frontend/test/ft/HOME-43539 + - yweb/antispam/cid/analytics/pushes/crypta_lal/args + - yweb/antispam/cid/analytics/pushes/push_sender/args + - yweb/antispam/cid/analytics/pushes/push_sender/cli + - yweb/antispam/cid/support/auto_answer/args + - yweb/blender/scripts/blender_viewer/conveyor_experiments/utils + - yweb/freshness/scripts/sport_pushes + - yweb/news/dupsd/tests + - yweb/news/zen/video_export + - yweb/sitelinks/astrolabe/migration/bna_to_mysql + - yweb/webscripts/video/duplicates/nirvana_scripts/knn/filter_knn_source + - yweb/yasap/pdb/tools/backup/create_backup + - yweb/yasap/znatoki/znatoki_ctl/lib + - yweb/yasap/znatoki/znatoki_ctl/lib/commands + - zootopia/analytics/ml/features/geo_features/idle_duration_features + - zootopia/analytics/ml/join_features/lib + - zootopia/analytics/ml/util/logs_context_manager/lib + F403: + ignore: + - F403 + prefixes: + - adfox/infra/registrator + - adfox/infra/registrator/lib + - ads/libs/py_dssm_lib/dssm_applier + - ads/libs/py_ml_factors/matrixnet + - ads/libs/py_vw_lib + - ads/nirvana/xfactor-yt + - ads/tools/mx_feature_remapper + - ads/watchman/contrib/flask-restplus-patched + - ads/watchman/contrib/flask-restplus-patched/flask_restplus_patched + - alice/analytics/utils/marty/run_nirvana_instance + - cloud/ai/speechkit/stt/tests/data_pipeline/join + - cv/short2long/nirvana/lib + - devtools/signer/signer + - dict/ontodb/cmpy/lib + - dict/ontodb/isa/libshorttext/converter/stemmer + - dict/ontodb/report/common + - dict/ontodb/utils/export_src_codes + - edadeal/analytics/scripts/CashbackReport/CashbackReportLib + - edadeal/analytics/scripts/CashbackReport/CashbackReportLib/CashbackReportLib + - extsearch/geo/recommender/runtime/config/generation + - mail/freezing_tests/active_users_aggregation + - maps/analyzer/tools/online_jams/pylib + - market/seo/offer_base + - mediapers/feature_machine/nirvana/script_runner + - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/inbound + - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/outbound + - mlp/mail/aspam/nirvana_operations/conveyor/calc_dsats - passport/backend/api/tests/views/bundle/change_avatar - - quality/ab_testing/cofe/projects/alice/sesame/v1_1 - - quality/neural_net/bert/bert/utils/distributed - - quality/neural_net/bert/bert/utils/fs - - sandbox/projects/tycoon/TycoonAdverts - - search/base_search/tools/explain_l1 - - search/mon/rviewer/app - - search/mon/rviewer/db - - search/mon/rviewer/modules - - search/mon/rviewer/modules/clients - - search/mon/rviewer/modules/orchestrator - - travel/avia/avia_api - - travel/avia/avia_api/avia/settings - - travel/avia/avia_api/avia/v1/schemas - - travel/avia/avia_api/tests - - voicetech/asr/cloud_engine/cli/nirvana/aggregate_kenlm_outputs - - voicetech/asr/cloud_engine/cli/nirvana/aggregate_subword_lm_outputs - - voicetech/asr/cloud_engine/cli/nirvana/build_lm - - voicetech/asr/cloud_engine/cli/nirvana/make_kenlm_lingware - - voicetech/asr/cloud_engine/cli/nirvana/make_subword_lm_lingware - - voicetech/asr/cloud_engine/cli/nirvana/merge_lingwares - - voicetech/asr/cloud_engine/cli/nirvana/select_best_kenlm - - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_freq_table - - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_mixture_model - - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_model - - voicetech/asr/core/lib/subword_ngram_lm/nirvana/fit_lambda - - voicetech/asr/core/lib/subword_ngram_lm/nirvana/fit_vocabulary - - voicetech/asr/tools/language_model/tests/test_linear_merge - - yabs/outdoor/viewer/back/logic + - quality/ab_testing/cofe/projects/alice/sesame/v1_1 + - quality/neural_net/bert/bert/utils/distributed + - quality/neural_net/bert/bert/utils/fs + - sandbox/projects/tycoon/TycoonAdverts + - search/base_search/tools/explain_l1 + - search/mon/rviewer/app + - search/mon/rviewer/db + - search/mon/rviewer/modules + - search/mon/rviewer/modules/clients + - search/mon/rviewer/modules/orchestrator + - travel/avia/avia_api + - travel/avia/avia_api/avia/settings + - travel/avia/avia_api/avia/v1/schemas + - travel/avia/avia_api/tests + - voicetech/asr/cloud_engine/cli/nirvana/aggregate_kenlm_outputs + - voicetech/asr/cloud_engine/cli/nirvana/aggregate_subword_lm_outputs + - voicetech/asr/cloud_engine/cli/nirvana/build_lm + - voicetech/asr/cloud_engine/cli/nirvana/make_kenlm_lingware + - voicetech/asr/cloud_engine/cli/nirvana/make_subword_lm_lingware + - voicetech/asr/cloud_engine/cli/nirvana/merge_lingwares + - voicetech/asr/cloud_engine/cli/nirvana/select_best_kenlm + - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_freq_table + - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_mixture_model + - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_model + - voicetech/asr/core/lib/subword_ngram_lm/nirvana/fit_lambda + - voicetech/asr/core/lib/subword_ngram_lm/nirvana/fit_vocabulary + - voicetech/asr/tools/language_model/tests/test_linear_merge + - yabs/outdoor/viewer/back/logic F401: ignore: - F401 diff --git a/build/ya.conf.json b/build/ya.conf.json index 3486620b7b..5f7cc875d6 100644 --- a/build/ya.conf.json +++ b/build/ya.conf.json @@ -5743,7 +5743,7 @@ "sandbox_id": [ 1206141440 ], - "match": "ymake" + "match": "ymake" }, "executable": { "ymake": [ diff --git a/build/ymake.core.conf b/build/ymake.core.conf index 5d14628a43..081833998b 100644 --- a/build/ymake.core.conf +++ b/build/ymake.core.conf @@ -5621,7 +5621,7 @@ SSE4_DEFINES= SSE4_CFLAGS= # tag:cpu -when (($ARCH_X86_64 || $ARCH_I386) && $DISABLE_INSTRUCTION_SETS != "yes") { +when (($ARCH_X86_64 || $ARCH_I386) && $DISABLE_INSTRUCTION_SETS != "yes") { when ($CLANG || $CLANG_CL || $GCC) { PIC_CFLAGS=-fPIC SSE2_CFLAGS=-msse2 diff --git a/contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h b/contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h index 6a468f2f8f..ba1ee14ef1 100644 --- a/contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h +++ b/contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h @@ -1,28 +1,28 @@ /* $OpenBSD: readpassphrase.h,v 1.6 2019/01/25 00:19:25 millert Exp $ */ - -/* + +/* * Copyright (c) 2000, 2002 Todd C. Miller <millert@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Sponsored in part by the Defense Advanced Research Projects - * Agency (DARPA) and Air Force Research Laboratory, Air Force - * Materiel Command, USAF, under agreement number F39502-99-1-0512. - */ - + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + #ifndef _READPASSPHRASE_H_ #define _READPASSPHRASE_H_ - + #define RPP_ECHO_OFF 0x00 /* Turn off echo (default). */ #define RPP_ECHO_ON 0x01 /* Leave echo on. */ #define RPP_REQUIRE_TTY 0x02 /* Fail if there is no tty. */ @@ -30,15 +30,15 @@ #define RPP_FORCEUPPER 0x08 /* Force input to upper case. */ #define RPP_SEVENBIT 0x10 /* Strip the high bit from input. */ #define RPP_STDIN 0x20 /* Read from stdin, not /dev/tty */ - + #include <sys/cdefs.h> #ifdef __cplusplus -extern "C" { -#endif -char * readpassphrase(const char *, char *, size_t, int); +extern "C" { +#endif +char * readpassphrase(const char *, char *, size_t, int); #ifdef __cplusplus } // extern "C" #endif - + #endif /* !_READPASSPHRASE_H_ */ diff --git a/contrib/libs/libc_compat/readpassphrase.c b/contrib/libs/libc_compat/readpassphrase.c index b25d10f52f..df677e9ca5 100644 --- a/contrib/libs/libc_compat/readpassphrase.c +++ b/contrib/libs/libc_compat/readpassphrase.c @@ -1,192 +1,192 @@ /* $OpenBSD: readpassphrase.c,v 1.27 2019/01/25 00:19:25 millert Exp $ */ - -/* - * Copyright (c) 2000-2002, 2007, 2010 + +/* + * Copyright (c) 2000-2002, 2007, 2010 * Todd C. Miller <millert@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Sponsored in part by the Defense Advanced Research Projects - * Agency (DARPA) and Air Force Research Laboratory, Air Force - * Materiel Command, USAF, under agreement number F39502-99-1-0512. - */ - -#include <ctype.h> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <ctype.h> #include <errno.h> -#include <fcntl.h> +#include <fcntl.h> #include <paths.h> #include <pwd.h> #include <signal.h> -#include <string.h> +#include <string.h> #include <termios.h> -#include <unistd.h> +#include <unistd.h> #include <readpassphrase.h> - -#ifndef TCSASOFT -/* If we don't have TCSASOFT define it so that ORing it it below is a no-op. */ -# define TCSASOFT 0 -#endif - -/* SunOS 4.x which lacks _POSIX_VDISABLE, but has VDISABLE */ -#if !defined(_POSIX_VDISABLE) && defined(VDISABLE) -# define _POSIX_VDISABLE VDISABLE -#endif - + +#ifndef TCSASOFT +/* If we don't have TCSASOFT define it so that ORing it it below is a no-op. */ +# define TCSASOFT 0 +#endif + +/* SunOS 4.x which lacks _POSIX_VDISABLE, but has VDISABLE */ +#if !defined(_POSIX_VDISABLE) && defined(VDISABLE) +# define _POSIX_VDISABLE VDISABLE +#endif + static volatile sig_atomic_t signo[_NSIG]; - -static void handler(int); - -char * -readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags) -{ - ssize_t nr; - int input, output, save_errno, i, need_restart; - char ch, *p, *end; - struct termios term, oterm; - struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm; - struct sigaction savetstp, savettin, savettou, savepipe; - - /* I suppose we could alloc on demand in this case (XXX). */ - if (bufsiz == 0) { - errno = EINVAL; - return(NULL); - } - -restart: + +static void handler(int); + +char * +readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags) +{ + ssize_t nr; + int input, output, save_errno, i, need_restart; + char ch, *p, *end; + struct termios term, oterm; + struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm; + struct sigaction savetstp, savettin, savettou, savepipe; + + /* I suppose we could alloc on demand in this case (XXX). */ + if (bufsiz == 0) { + errno = EINVAL; + return(NULL); + } + +restart: for (i = 0; i < _NSIG; i++) - signo[i] = 0; - nr = -1; - save_errno = 0; - need_restart = 0; - /* - * Read and write to /dev/tty if available. If not, read from - * stdin and write to stderr unless a tty is required. - */ - if ((flags & RPP_STDIN) || - (input = output = open(_PATH_TTY, O_RDWR)) == -1) { - if (flags & RPP_REQUIRE_TTY) { - errno = ENOTTY; - return(NULL); - } - input = STDIN_FILENO; - output = STDERR_FILENO; - } - - /* - * Turn off echo if possible. - * If we are using a tty but are not the foreground pgrp this will - * generate SIGTTOU, so do it *before* installing the signal handlers. - */ - if (input != STDIN_FILENO && tcgetattr(input, &oterm) == 0) { - memcpy(&term, &oterm, sizeof(term)); - if (!(flags & RPP_ECHO_ON)) + signo[i] = 0; + nr = -1; + save_errno = 0; + need_restart = 0; + /* + * Read and write to /dev/tty if available. If not, read from + * stdin and write to stderr unless a tty is required. + */ + if ((flags & RPP_STDIN) || + (input = output = open(_PATH_TTY, O_RDWR)) == -1) { + if (flags & RPP_REQUIRE_TTY) { + errno = ENOTTY; + return(NULL); + } + input = STDIN_FILENO; + output = STDERR_FILENO; + } + + /* + * Turn off echo if possible. + * If we are using a tty but are not the foreground pgrp this will + * generate SIGTTOU, so do it *before* installing the signal handlers. + */ + if (input != STDIN_FILENO && tcgetattr(input, &oterm) == 0) { + memcpy(&term, &oterm, sizeof(term)); + if (!(flags & RPP_ECHO_ON)) term.c_lflag &= ~(ECHO | ECHONL); - (void)tcsetattr(input, TCSAFLUSH|TCSASOFT, &term); - } else { - memset(&term, 0, sizeof(term)); - term.c_lflag |= ECHO; - memset(&oterm, 0, sizeof(oterm)); - oterm.c_lflag |= ECHO; - } - - /* - * Catch signals that would otherwise cause the user to end - * up with echo turned off in the shell. Don't worry about - * things like SIGXCPU and SIGVTALRM for now. - */ - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; /* don't restart system calls */ - sa.sa_handler = handler; - (void)sigaction(SIGALRM, &sa, &savealrm); - (void)sigaction(SIGHUP, &sa, &savehup); - (void)sigaction(SIGINT, &sa, &saveint); - (void)sigaction(SIGPIPE, &sa, &savepipe); - (void)sigaction(SIGQUIT, &sa, &savequit); - (void)sigaction(SIGTERM, &sa, &saveterm); - (void)sigaction(SIGTSTP, &sa, &savetstp); - (void)sigaction(SIGTTIN, &sa, &savettin); - (void)sigaction(SIGTTOU, &sa, &savettou); - - if (!(flags & RPP_STDIN)) - (void)write(output, prompt, strlen(prompt)); - end = buf + bufsiz - 1; - p = buf; - while ((nr = read(input, &ch, 1)) == 1 && ch != '\n' && ch != '\r') { - if (p < end) { - if ((flags & RPP_SEVENBIT)) - ch &= 0x7f; - if (isalpha((unsigned char)ch)) { - if ((flags & RPP_FORCELOWER)) - ch = (char)tolower((unsigned char)ch); - if ((flags & RPP_FORCEUPPER)) - ch = (char)toupper((unsigned char)ch); - } - *p++ = ch; - } - } - *p = '\0'; - save_errno = errno; - if (!(term.c_lflag & ECHO)) - (void)write(output, "\n", 1); - - /* Restore old terminal settings and signals. */ - if (memcmp(&term, &oterm, sizeof(term)) != 0) { - const int sigttou = signo[SIGTTOU]; - - /* Ignore SIGTTOU generated when we are not the fg pgrp. */ - while (tcsetattr(input, TCSAFLUSH|TCSASOFT, &oterm) == -1 && - errno == EINTR && !signo[SIGTTOU]) - continue; - signo[SIGTTOU] = sigttou; - } - (void)sigaction(SIGALRM, &savealrm, NULL); - (void)sigaction(SIGHUP, &savehup, NULL); - (void)sigaction(SIGINT, &saveint, NULL); - (void)sigaction(SIGQUIT, &savequit, NULL); - (void)sigaction(SIGPIPE, &savepipe, NULL); - (void)sigaction(SIGTERM, &saveterm, NULL); - (void)sigaction(SIGTSTP, &savetstp, NULL); - (void)sigaction(SIGTTIN, &savettin, NULL); - (void)sigaction(SIGTTOU, &savettou, NULL); - if (input != STDIN_FILENO) - (void)close(input); - - /* - * If we were interrupted by a signal, resend it to ourselves - * now that we have restored the signal handlers. - */ + (void)tcsetattr(input, TCSAFLUSH|TCSASOFT, &term); + } else { + memset(&term, 0, sizeof(term)); + term.c_lflag |= ECHO; + memset(&oterm, 0, sizeof(oterm)); + oterm.c_lflag |= ECHO; + } + + /* + * Catch signals that would otherwise cause the user to end + * up with echo turned off in the shell. Don't worry about + * things like SIGXCPU and SIGVTALRM for now. + */ + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; /* don't restart system calls */ + sa.sa_handler = handler; + (void)sigaction(SIGALRM, &sa, &savealrm); + (void)sigaction(SIGHUP, &sa, &savehup); + (void)sigaction(SIGINT, &sa, &saveint); + (void)sigaction(SIGPIPE, &sa, &savepipe); + (void)sigaction(SIGQUIT, &sa, &savequit); + (void)sigaction(SIGTERM, &sa, &saveterm); + (void)sigaction(SIGTSTP, &sa, &savetstp); + (void)sigaction(SIGTTIN, &sa, &savettin); + (void)sigaction(SIGTTOU, &sa, &savettou); + + if (!(flags & RPP_STDIN)) + (void)write(output, prompt, strlen(prompt)); + end = buf + bufsiz - 1; + p = buf; + while ((nr = read(input, &ch, 1)) == 1 && ch != '\n' && ch != '\r') { + if (p < end) { + if ((flags & RPP_SEVENBIT)) + ch &= 0x7f; + if (isalpha((unsigned char)ch)) { + if ((flags & RPP_FORCELOWER)) + ch = (char)tolower((unsigned char)ch); + if ((flags & RPP_FORCEUPPER)) + ch = (char)toupper((unsigned char)ch); + } + *p++ = ch; + } + } + *p = '\0'; + save_errno = errno; + if (!(term.c_lflag & ECHO)) + (void)write(output, "\n", 1); + + /* Restore old terminal settings and signals. */ + if (memcmp(&term, &oterm, sizeof(term)) != 0) { + const int sigttou = signo[SIGTTOU]; + + /* Ignore SIGTTOU generated when we are not the fg pgrp. */ + while (tcsetattr(input, TCSAFLUSH|TCSASOFT, &oterm) == -1 && + errno == EINTR && !signo[SIGTTOU]) + continue; + signo[SIGTTOU] = sigttou; + } + (void)sigaction(SIGALRM, &savealrm, NULL); + (void)sigaction(SIGHUP, &savehup, NULL); + (void)sigaction(SIGINT, &saveint, NULL); + (void)sigaction(SIGQUIT, &savequit, NULL); + (void)sigaction(SIGPIPE, &savepipe, NULL); + (void)sigaction(SIGTERM, &saveterm, NULL); + (void)sigaction(SIGTSTP, &savetstp, NULL); + (void)sigaction(SIGTTIN, &savettin, NULL); + (void)sigaction(SIGTTOU, &savettou, NULL); + if (input != STDIN_FILENO) + (void)close(input); + + /* + * If we were interrupted by a signal, resend it to ourselves + * now that we have restored the signal handlers. + */ for (i = 0; i < _NSIG; i++) { - if (signo[i]) { - kill(getpid(), i); - switch (i) { - case SIGTSTP: - case SIGTTIN: - case SIGTTOU: - need_restart = 1; - } - } - } - if (need_restart) - goto restart; - - if (save_errno) - errno = save_errno; - return(nr == -1 ? NULL : buf); -} - - -static void handler(int s) -{ - - signo[s] = 1; -} + if (signo[i]) { + kill(getpid(), i); + switch (i) { + case SIGTSTP: + case SIGTTIN: + case SIGTTOU: + need_restart = 1; + } + } + } + if (need_restart) + goto restart; + + if (save_errno) + errno = save_errno; + return(nr == -1 ? NULL : buf); +} + + +static void handler(int s) +{ + + signo[s] = 1; +} diff --git a/library/cpp/lfalloc/lf_allocX64.h b/library/cpp/lfalloc/lf_allocX64.h index 20df33b60d..fd2a906d6f 100644 --- a/library/cpp/lfalloc/lf_allocX64.h +++ b/library/cpp/lfalloc/lf_allocX64.h @@ -1679,7 +1679,7 @@ static void DebugTraceMMgr(const char* pszFormat, ...) // __cdecl #ifdef _win_ OutputDebugStringA(buff); #else - fputs(buff, stderr); + fputs(buff, stderr); #endif } diff --git a/library/cpp/threading/local_executor/tbb_local_executor.cpp b/library/cpp/threading/local_executor/tbb_local_executor.cpp index 9903ef33de..65d6659443 100644 --- a/library/cpp/threading/local_executor/tbb_local_executor.cpp +++ b/library/cpp/threading/local_executor/tbb_local_executor.cpp @@ -1,53 +1,53 @@ -#include "tbb_local_executor.h" - -template <bool RespectTls> -void NPar::TTbbLocalExecutor<RespectTls>::SubmitAsyncTasks(TLocallyExecutableFunction exec, int firstId, int lastId) { - for (int i = firstId; i < lastId; ++i) { - Group.run([=] { exec(i); }); - } -} - -template <bool RespectTls> -int NPar::TTbbLocalExecutor<RespectTls>::GetThreadCount() const noexcept { - return NumberOfTbbThreads - 1; -} - -template <bool RespectTls> -int NPar::TTbbLocalExecutor<RespectTls>::GetWorkerThreadId() const noexcept { - return TbbArena.execute([] { - return tbb::this_task_arena::current_thread_index(); - }); -} - -template <bool RespectTls> -void NPar::TTbbLocalExecutor<RespectTls>::Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) { - if (flags & WAIT_COMPLETE) { - exec->LocalExec(id); - } else { - TbbArena.execute([=] { - SubmitAsyncTasks([=] (int id) { exec->LocalExec(id); }, id, id + 1); - }); - } -} - -template <bool RespectTls> -void NPar::TTbbLocalExecutor<RespectTls>::ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) { - if (flags & WAIT_COMPLETE) { - TbbArena.execute([=] { - if (RespectTls) { - tbb::this_task_arena::isolate([=]{ - tbb::parallel_for(firstId, lastId, [=] (int id) { exec->LocalExec(id); }); - }); - } else { - tbb::parallel_for(firstId, lastId, [=] (int id) { exec->LocalExec(id); }); - } - }); - } else { - TbbArena.execute([=] { - SubmitAsyncTasks([=] (int id) { exec->LocalExec(id); }, firstId, lastId); - }); - } -} - -template class NPar::TTbbLocalExecutor<true>; -template class NPar::TTbbLocalExecutor<false>; +#include "tbb_local_executor.h" + +template <bool RespectTls> +void NPar::TTbbLocalExecutor<RespectTls>::SubmitAsyncTasks(TLocallyExecutableFunction exec, int firstId, int lastId) { + for (int i = firstId; i < lastId; ++i) { + Group.run([=] { exec(i); }); + } +} + +template <bool RespectTls> +int NPar::TTbbLocalExecutor<RespectTls>::GetThreadCount() const noexcept { + return NumberOfTbbThreads - 1; +} + +template <bool RespectTls> +int NPar::TTbbLocalExecutor<RespectTls>::GetWorkerThreadId() const noexcept { + return TbbArena.execute([] { + return tbb::this_task_arena::current_thread_index(); + }); +} + +template <bool RespectTls> +void NPar::TTbbLocalExecutor<RespectTls>::Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) { + if (flags & WAIT_COMPLETE) { + exec->LocalExec(id); + } else { + TbbArena.execute([=] { + SubmitAsyncTasks([=] (int id) { exec->LocalExec(id); }, id, id + 1); + }); + } +} + +template <bool RespectTls> +void NPar::TTbbLocalExecutor<RespectTls>::ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) { + if (flags & WAIT_COMPLETE) { + TbbArena.execute([=] { + if (RespectTls) { + tbb::this_task_arena::isolate([=]{ + tbb::parallel_for(firstId, lastId, [=] (int id) { exec->LocalExec(id); }); + }); + } else { + tbb::parallel_for(firstId, lastId, [=] (int id) { exec->LocalExec(id); }); + } + }); + } else { + TbbArena.execute([=] { + SubmitAsyncTasks([=] (int id) { exec->LocalExec(id); }, firstId, lastId); + }); + } +} + +template class NPar::TTbbLocalExecutor<true>; +template class NPar::TTbbLocalExecutor<false>; diff --git a/library/cpp/threading/local_executor/tbb_local_executor.h b/library/cpp/threading/local_executor/tbb_local_executor.h index f62694d6f7..8d790db18c 100644 --- a/library/cpp/threading/local_executor/tbb_local_executor.h +++ b/library/cpp/threading/local_executor/tbb_local_executor.h @@ -1,49 +1,49 @@ -#pragma once - -#include "local_executor.h" -#define __TBB_TASK_ISOLATION 1 -#define __TBB_NO_IMPLICIT_LINKAGE 1 - -#include <contrib/libs/tbb/include/tbb/blocked_range.h> -#include <contrib/libs/tbb/include/tbb/parallel_for.h> -#include <contrib/libs/tbb/include/tbb/task_arena.h> -#include <contrib/libs/tbb/include/tbb/task_group.h> - -namespace NPar { - template <bool RespectTls = false> - class TTbbLocalExecutor final: public ILocalExecutor { - public: - TTbbLocalExecutor(int nThreads) - : ILocalExecutor() - , TbbArena(nThreads) - , NumberOfTbbThreads(nThreads) {} - ~TTbbLocalExecutor() noexcept override {} - - // 0-based ILocalExecutor worker thread identification - virtual int GetWorkerThreadId() const noexcept override; - virtual int GetThreadCount() const noexcept override; - - // Add task for further execution. - // - // @param exec Task description. - // @param id Task argument. - // @param flags Bitmask composed by `HIGH_PRIORITY`, `MED_PRIORITY`, `LOW_PRIORITY` - // and `WAIT_COMPLETE`. - virtual void Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) override; - - // Add tasks range for further execution. - // - // @param exec Task description. - // @param firstId, lastId Task arguments [firstId, lastId) - // @param flags Same as for `Exec`. - virtual void ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) override; - - // Submit tasks for async run - void SubmitAsyncTasks(TLocallyExecutableFunction exec, int firstId, int lastId); - - private: - mutable tbb::task_arena TbbArena; - tbb::task_group Group; - int NumberOfTbbThreads; - }; -} +#pragma once + +#include "local_executor.h" +#define __TBB_TASK_ISOLATION 1 +#define __TBB_NO_IMPLICIT_LINKAGE 1 + +#include <contrib/libs/tbb/include/tbb/blocked_range.h> +#include <contrib/libs/tbb/include/tbb/parallel_for.h> +#include <contrib/libs/tbb/include/tbb/task_arena.h> +#include <contrib/libs/tbb/include/tbb/task_group.h> + +namespace NPar { + template <bool RespectTls = false> + class TTbbLocalExecutor final: public ILocalExecutor { + public: + TTbbLocalExecutor(int nThreads) + : ILocalExecutor() + , TbbArena(nThreads) + , NumberOfTbbThreads(nThreads) {} + ~TTbbLocalExecutor() noexcept override {} + + // 0-based ILocalExecutor worker thread identification + virtual int GetWorkerThreadId() const noexcept override; + virtual int GetThreadCount() const noexcept override; + + // Add task for further execution. + // + // @param exec Task description. + // @param id Task argument. + // @param flags Bitmask composed by `HIGH_PRIORITY`, `MED_PRIORITY`, `LOW_PRIORITY` + // and `WAIT_COMPLETE`. + virtual void Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) override; + + // Add tasks range for further execution. + // + // @param exec Task description. + // @param firstId, lastId Task arguments [firstId, lastId) + // @param flags Same as for `Exec`. + virtual void ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) override; + + // Submit tasks for async run + void SubmitAsyncTasks(TLocallyExecutableFunction exec, int firstId, int lastId); + + private: + mutable tbb::task_arena TbbArena; + tbb::task_group Group; + int NumberOfTbbThreads; + }; +} diff --git a/library/cpp/threading/local_executor/ya.make b/library/cpp/threading/local_executor/ya.make index 7e4ffd2ab5..df210f92bb 100644 --- a/library/cpp/threading/local_executor/ya.make +++ b/library/cpp/threading/local_executor/ya.make @@ -9,7 +9,7 @@ LIBRARY() SRCS( local_executor.cpp - tbb_local_executor.cpp + tbb_local_executor.cpp ) PEERDIR( diff --git a/util/charset/ya.make b/util/charset/ya.make index cf5c171305..26d38cb10b 100644 --- a/util/charset/ya.make +++ b/util/charset/ya.make @@ -19,7 +19,7 @@ JOIN_SRCS( wide.cpp ) -IF (ARCH_X86_64 AND NOT DISABLE_INSTRUCTION_SETS) +IF (ARCH_X86_64 AND NOT DISABLE_INSTRUCTION_SETS) SRC_CPP_SSE41(wide_sse41.cpp) ELSE() SRC( diff --git a/util/datetime/cputimer.cpp b/util/datetime/cputimer.cpp index b804351373..516d372c37 100644 --- a/util/datetime/cputimer.cpp +++ b/util/datetime/cputimer.cpp @@ -12,7 +12,7 @@ #include <sys/types.h> #include <sys/resource.h> #include <sys/param.h> -#elif defined(_win_) +#elif defined(_win_) #include <util/system/winint.h> #endif diff --git a/util/system/info.cpp b/util/system/info.cpp index 1dc3de5604..cf6681e89a 100644 --- a/util/system/info.cpp +++ b/util/system/info.cpp @@ -28,9 +28,9 @@ static int getloadavg(double* loadavg, int nelem) { } #elif defined(_unix_) || defined(_darwin_) #include <sys/types.h> -#endif - -#if defined(_freebsd_) || defined(_darwin_) +#endif + +#if defined(_freebsd_) || defined(_darwin_) #include <sys/sysctl.h> #endif diff --git a/util/thread/lfqueue.h b/util/thread/lfqueue.h index 07b1351624..ab523631e4 100644 --- a/util/thread/lfqueue.h +++ b/util/thread/lfqueue.h @@ -70,10 +70,10 @@ class TLockFreeQueue: public TNonCopyable { } } - alignas(64) TRootNode* volatile JobQueue; - alignas(64) volatile TAtomic FreememCounter; - alignas(64) volatile TAtomic FreeingTaskCounter; - alignas(64) TRootNode* volatile FreePtr; + alignas(64) TRootNode* volatile JobQueue; + alignas(64) volatile TAtomic FreememCounter; + alignas(64) volatile TAtomic FreeingTaskCounter; + alignas(64) TRootNode* volatile FreePtr; void TryToFreeAsyncMemory() { TAtomic keepCounter = AtomicAdd(FreeingTaskCounter, 0); @@ -306,7 +306,7 @@ public: newRoot = new TRootNode; AtomicSet(newRoot->PushQueue, nullptr); listInvertor.DoCopy(AtomicGet(curRoot->PushQueue)); - AtomicSet(newRoot->PopQueue, listInvertor.Copy); + AtomicSet(newRoot->PopQueue, listInvertor.Copy); newRoot->CopyCounter(curRoot); Y_ASSERT(AtomicGet(curRoot->PopQueue) == nullptr); if (AtomicCas(&JobQueue, newRoot, curRoot)) { @@ -15,7 +15,7 @@ exit /b %ERRORLEVEL% :find_ya call :dbg Searching for ya near ya.bat... set YA_BAT_REAL=%~dp0ya -if exist "%YA_BAT_REAL%" exit /b 0 +if exist "%YA_BAT_REAL%" exit /b 0 call :err Ya not found exit /b 1 diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h b/ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h index 41bbb03e0a..9cd104925a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h @@ -1,16 +1,16 @@ -#pragma once - -#include <tuple> -#include <type_traits> - - -template <typename T> -struct function_traits; - -template <typename ReturnType, typename... Args> -struct function_traits<ReturnType(Args...)> -{ - using result = ReturnType; - using arguments = std::tuple<Args...>; - using arguments_decay = std::tuple<typename std::decay<Args>::type...>; -}; +#pragma once + +#include <tuple> +#include <type_traits> + + +template <typename T> +struct function_traits; + +template <typename ReturnType, typename... Args> +struct function_traits<ReturnType(Args...)> +{ + using result = ReturnType; + using arguments = std::tuple<Args...>; + using arguments_decay = std::tuple<typename std::decay<Args>::type...>; +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp index a80495b5d3..f67b37bd71 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp @@ -1,25 +1,25 @@ -#include <Poco/Net/DNS.h> -#include <common/getFQDNOrHostName.h> - - -namespace -{ - std::string getFQDNOrHostNameImpl() - { - try - { - return Poco::Net::DNS::thisHost().name(); - } - catch (...) - { - return Poco::Net::DNS::hostName(); - } - } -} - - -const std::string & getFQDNOrHostName() -{ - static std::string result = getFQDNOrHostNameImpl(); - return result; -} +#include <Poco/Net/DNS.h> +#include <common/getFQDNOrHostName.h> + + +namespace +{ + std::string getFQDNOrHostNameImpl() + { + try + { + return Poco::Net::DNS::thisHost().name(); + } + catch (...) + { + return Poco::Net::DNS::hostName(); + } + } +} + + +const std::string & getFQDNOrHostName() +{ + static std::string result = getFQDNOrHostNameImpl(); + return result; +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h index a1a9af6ca0..fe164a6420 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h @@ -1,9 +1,9 @@ -#pragma once - -#include <string> - - -/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the 'hostname' tool with the -f flag. - * If it does not work, return hostname - similar to calling 'hostname' without flags or 'uname -n'. - */ -const std::string & getFQDNOrHostName(); +#pragma once + +#include <string> + + +/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the 'hostname' tool with the -f flag. + * If it does not work, return hostname - similar to calling 'hostname' without flags or 'uname -n'. + */ +const std::string & getFQDNOrHostName(); diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp index d0f409007f..054e9be907 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp @@ -6,7 +6,7 @@ #elif defined(OS_LINUX) #include <unistd.h> #include <syscall.h> -#elif defined(OS_FREEBSD) +#elif defined(OS_FREEBSD) #include <pthread_np.h> #else #include <pthread.h> @@ -23,7 +23,7 @@ uint64_t getThreadId() current_tid = gettid(); #elif defined(OS_LINUX) current_tid = syscall(SYS_gettid); /// This call is always successful. - man gettid -#elif defined(OS_FREEBSD) +#elif defined(OS_FREEBSD) current_tid = pthread_getthreadid_np(); #elif defined(OS_SUNOS) // On Solaris-derived systems, this returns the ID of the LWP, analogous diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp b/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp index 3fc0272d41..49d566dac1 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp @@ -1,19 +1,19 @@ /// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex. -#include <common/defines.h> +#include <common/defines.h> #if defined(__linux__) && !defined(THREAD_SANITIZER) #define USE_PHDR_CACHE 1 #endif -/// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own. -#ifdef USE_PHDR_CACHE - -#if defined(__clang__) -# pragma clang diagnostic ignored "-Wreserved-id-macro" -# pragma clang diagnostic ignored "-Wunused-macros" -#endif - +/// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own. +#ifdef USE_PHDR_CACHE + +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wreserved-id-macro" +# pragma clang diagnostic ignored "-Wunused-macros" +#endif + #define __msan_unpoison(X, Y) // NOLINT #if defined(ch_has_feature) # if ch_has_feature(memory_sanitizer) @@ -61,7 +61,7 @@ extern "C" #endif int dl_iterate_phdr(int (*callback) (dl_phdr_info * info, size_t size, void * data), void * data) { - auto * current_phdr_cache = phdr_cache.load(); + auto * current_phdr_cache = phdr_cache.load(); if (!current_phdr_cache) { // Cache is not yet populated, pass through to the original function. diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp index bcc762ef2a..c9968d841c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp @@ -1,90 +1,90 @@ -#include <Common/typeid_cast.h> -#include <Common/assert_cast.h> -#include <Columns/FilterDescription.h> -#include <Columns/ColumnsNumber.h> -#include <Columns/ColumnNullable.h> -#include <Columns/ColumnConst.h> -#include <Core/ColumnWithTypeAndName.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; -} - - -ConstantFilterDescription::ConstantFilterDescription(const IColumn & column) -{ - if (column.onlyNull()) - { - always_false = true; - return; - } - - if (isColumnConst(column)) - { - const ColumnConst & column_const = assert_cast<const ColumnConst &>(column); - ColumnPtr column_nested = column_const.getDataColumnPtr()->convertToFullColumnIfLowCardinality(); - - if (!typeid_cast<const ColumnUInt8 *>(column_nested.get())) - { - const ColumnNullable * column_nested_nullable = checkAndGetColumn<ColumnNullable>(*column_nested); - if (!column_nested_nullable || !typeid_cast<const ColumnUInt8 *>(&column_nested_nullable->getNestedColumn())) - { - throw Exception("Illegal type " + column_nested->getName() + " of column for constant filter. Must be UInt8 or Nullable(UInt8).", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); - } - } - - if (column_const.getValue<UInt64>()) - always_true = true; - else - always_false = true; - return; - } -} - - -FilterDescription::FilterDescription(const IColumn & column_) -{ - if (column_.lowCardinality()) - data_holder = column_.convertToFullColumnIfLowCardinality(); - - const auto & column = data_holder ? *data_holder : column_; - - if (const ColumnUInt8 * concrete_column = typeid_cast<const ColumnUInt8 *>(&column)) - { - data = &concrete_column->getData(); - return; - } - - if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(column)) - { - ColumnPtr nested_column = nullable_column->getNestedColumnPtr(); - MutableColumnPtr mutable_holder = IColumn::mutate(std::move(nested_column)); - - ColumnUInt8 * concrete_column = typeid_cast<ColumnUInt8 *>(mutable_holder.get()); - if (!concrete_column) - throw Exception("Illegal type " + column.getName() + " of column for filter. Must be UInt8 or Nullable(UInt8).", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); - - const NullMap & null_map = nullable_column->getNullMapData(); - IColumn::Filter & res = concrete_column->getData(); - - size_t size = res.size(); - for (size_t i = 0; i < size; ++i) - res[i] = res[i] && !null_map[i]; - - data = &res; - data_holder = std::move(mutable_holder); - return; - } - - throw Exception("Illegal type " + column.getName() + " of column for filter. Must be UInt8 or Nullable(UInt8) or Const variants of them.", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); -} - -} +#include <Common/typeid_cast.h> +#include <Common/assert_cast.h> +#include <Columns/FilterDescription.h> +#include <Columns/ColumnsNumber.h> +#include <Columns/ColumnNullable.h> +#include <Columns/ColumnConst.h> +#include <Core/ColumnWithTypeAndName.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; +} + + +ConstantFilterDescription::ConstantFilterDescription(const IColumn & column) +{ + if (column.onlyNull()) + { + always_false = true; + return; + } + + if (isColumnConst(column)) + { + const ColumnConst & column_const = assert_cast<const ColumnConst &>(column); + ColumnPtr column_nested = column_const.getDataColumnPtr()->convertToFullColumnIfLowCardinality(); + + if (!typeid_cast<const ColumnUInt8 *>(column_nested.get())) + { + const ColumnNullable * column_nested_nullable = checkAndGetColumn<ColumnNullable>(*column_nested); + if (!column_nested_nullable || !typeid_cast<const ColumnUInt8 *>(&column_nested_nullable->getNestedColumn())) + { + throw Exception("Illegal type " + column_nested->getName() + " of column for constant filter. Must be UInt8 or Nullable(UInt8).", + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + } + } + + if (column_const.getValue<UInt64>()) + always_true = true; + else + always_false = true; + return; + } +} + + +FilterDescription::FilterDescription(const IColumn & column_) +{ + if (column_.lowCardinality()) + data_holder = column_.convertToFullColumnIfLowCardinality(); + + const auto & column = data_holder ? *data_holder : column_; + + if (const ColumnUInt8 * concrete_column = typeid_cast<const ColumnUInt8 *>(&column)) + { + data = &concrete_column->getData(); + return; + } + + if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(column)) + { + ColumnPtr nested_column = nullable_column->getNestedColumnPtr(); + MutableColumnPtr mutable_holder = IColumn::mutate(std::move(nested_column)); + + ColumnUInt8 * concrete_column = typeid_cast<ColumnUInt8 *>(mutable_holder.get()); + if (!concrete_column) + throw Exception("Illegal type " + column.getName() + " of column for filter. Must be UInt8 or Nullable(UInt8).", + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + + const NullMap & null_map = nullable_column->getNullMapData(); + IColumn::Filter & res = concrete_column->getData(); + + size_t size = res.size(); + for (size_t i = 0; i < size; ++i) + res[i] = res[i] && !null_map[i]; + + data = &res; + data_holder = std::move(mutable_holder); + return; + } + + throw Exception("Illegal type " + column.getName() + " of column for filter. Must be UInt8 or Nullable(UInt8) or Const variants of them.", + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h index 13f04fdd7a..05812fea28 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h @@ -1,35 +1,35 @@ -#pragma once - -#include <Columns/IColumn.h> - - -namespace DB -{ - -/// Support methods for implementation of WHERE, PREWHERE and HAVING. - - -/// Analyze if the column for filter is constant thus filter is always false or always true. -struct ConstantFilterDescription -{ - bool always_false = false; - bool always_true = false; - - ConstantFilterDescription() {} - explicit ConstantFilterDescription(const IColumn & column); -}; - - -/// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8). -struct FilterDescription -{ - const IColumn::Filter * data = nullptr; /// Pointer to filter when it is not always true or always false. - ColumnPtr data_holder; /// If new column was generated, it will be owned by holder. - - explicit FilterDescription(const IColumn & column); -}; - - -struct ColumnWithTypeAndName; - -} +#pragma once + +#include <Columns/IColumn.h> + + +namespace DB +{ + +/// Support methods for implementation of WHERE, PREWHERE and HAVING. + + +/// Analyze if the column for filter is constant thus filter is always false or always true. +struct ConstantFilterDescription +{ + bool always_false = false; + bool always_true = false; + + ConstantFilterDescription() {} + explicit ConstantFilterDescription(const IColumn & column); +}; + + +/// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8). +struct FilterDescription +{ + const IColumn::Filter * data = nullptr; /// Pointer to filter when it is not always true or always false. + ColumnPtr data_holder; /// If new column was generated, it will be owned by holder. + + explicit FilterDescription(const IColumn & column); +}; + + +struct ColumnWithTypeAndName; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp index f967c02f9e..e38856a75e 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp @@ -1,7 +1,7 @@ -#include <Common/ClickHouseRevision.h> - -namespace ClickHouseRevision -{ +#include <Common/ClickHouseRevision.h> + +namespace ClickHouseRevision +{ unsigned getVersionRevision() { return 0; } unsigned getVersionInteger() { return 0; } -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h index 5d90422c56..86d1e3db33 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h @@ -1,7 +1,7 @@ -#pragma once - -namespace ClickHouseRevision -{ +#pragma once + +namespace ClickHouseRevision +{ unsigned getVersionRevision(); - unsigned getVersionInteger(); -} + unsigned getVersionInteger(); +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h index 575c2b2b05..3ffa9bb71d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h @@ -1,460 +1,460 @@ -#pragma once - -#include <Common/HashTable/HashTable.h> -#include <Common/HashTable/HashTableKeyHolder.h> -#include <Common/ColumnsHashingImpl.h> -#include <Common/Arena.h> -#include <Common/LRUCache.h> -#include <Common/assert_cast.h> -#include <common/unaligned.h> - -#include <Columns/ColumnString.h> -#include <Columns/ColumnFixedString.h> -#include <Columns/ColumnLowCardinality.h> - -#include <Core/Defines.h> -#include <memory> +#pragma once + +#include <Common/HashTable/HashTable.h> +#include <Common/HashTable/HashTableKeyHolder.h> +#include <Common/ColumnsHashingImpl.h> +#include <Common/Arena.h> +#include <Common/LRUCache.h> +#include <Common/assert_cast.h> +#include <common/unaligned.h> + +#include <Columns/ColumnString.h> +#include <Columns/ColumnFixedString.h> +#include <Columns/ColumnLowCardinality.h> + +#include <Core/Defines.h> +#include <memory> #include <cassert> - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -namespace ColumnsHashing -{ - -/// For the case when there is one numeric key. -/// UInt8/16/32/64 for any type with corresponding bit width. + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace ColumnsHashing +{ + +/// For the case when there is one numeric key. +/// UInt8/16/32/64 for any type with corresponding bit width. template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false> -struct HashMethodOneNumber +struct HashMethodOneNumber : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset> -{ +{ using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>; using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>; - - const char * vec; - - /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. - HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) - { - vec = key_columns[0]->getRawData().data; - } - - HashMethodOneNumber(const IColumn * column) - { - vec = column->getRawData().data; - } - - /// Creates context. Method is called once and result context is used in all threads. - using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr - - /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr. - /// Data is a HashTable where to insert key from column's row. - /// For Serialized method, key may be placed in pool. - using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult - - /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr. - using Base::findKey; /// (Data & data, size_t row, Arena & pool) -> FindResult - - /// Get hash value of row. - using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t - - /// Is used for default implementation in HashMethodBase. - FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); } + + const char * vec; + + /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. + HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) + { + vec = key_columns[0]->getRawData().data; + } + + HashMethodOneNumber(const IColumn * column) + { + vec = column->getRawData().data; + } + + /// Creates context. Method is called once and result context is used in all threads. + using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr + + /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr. + /// Data is a HashTable where to insert key from column's row. + /// For Serialized method, key may be placed in pool. + using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult + + /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr. + using Base::findKey; /// (Data & data, size_t row, Arena & pool) -> FindResult + + /// Get hash value of row. + using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t + + /// Is used for default implementation in HashMethodBase. + FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); } const FieldType * getKeyData() const { return reinterpret_cast<const FieldType *>(vec); } -}; - - -/// For the case when there is one string key. +}; + + +/// For the case when there is one string key. template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false> -struct HashMethodString +struct HashMethodString : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset> -{ +{ using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>; using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>; - - const IColumn::Offset * offsets; - const UInt8 * chars; - - HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) - { - const IColumn & column = *key_columns[0]; - const ColumnString & column_string = assert_cast<const ColumnString &>(column); - offsets = column_string.getOffsets().data(); - chars = column_string.getChars().data(); - } - - auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const - { - StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1); - - if constexpr (place_string_to_arena) - { - return ArenaKeyHolder{key, pool}; - } - else - { - return key; - } - } - -protected: - friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; -}; - - -/// For the case when there is one fixed-length string key. + + const IColumn::Offset * offsets; + const UInt8 * chars; + + HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) + { + const IColumn & column = *key_columns[0]; + const ColumnString & column_string = assert_cast<const ColumnString &>(column); + offsets = column_string.getOffsets().data(); + chars = column_string.getChars().data(); + } + + auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const + { + StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1); + + if constexpr (place_string_to_arena) + { + return ArenaKeyHolder{key, pool}; + } + else + { + return key; + } + } + +protected: + friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; +}; + + +/// For the case when there is one fixed-length string key. template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false> -struct HashMethodFixedString +struct HashMethodFixedString : public columns_hashing_impl:: HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset> -{ +{ using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>; using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>; - - size_t n; - const ColumnFixedString::Chars * chars; - - HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) - { - const IColumn & column = *key_columns[0]; - const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column); - n = column_string.getN(); - chars = &column_string.getChars(); - } - - auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const - { - StringRef key(&(*chars)[row * n], n); - - if constexpr (place_string_to_arena) - { - return ArenaKeyHolder{key, pool}; - } - else - { - return key; - } - } - -protected: - friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; -}; - - -/// Cache stores dictionaries and saved_hash per dictionary key. -class LowCardinalityDictionaryCache : public HashMethodContext -{ -public: - /// Will assume that dictionaries with same hash has the same keys. - /// Just in case, check that they have also the same size. - struct DictionaryKey - { - UInt128 hash; - UInt64 size; - - bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; } - }; - - struct DictionaryKeyHash - { - size_t operator()(const DictionaryKey & key) const - { - SipHash hash; + + size_t n; + const ColumnFixedString::Chars * chars; + + HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) + { + const IColumn & column = *key_columns[0]; + const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column); + n = column_string.getN(); + chars = &column_string.getChars(); + } + + auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const + { + StringRef key(&(*chars)[row * n], n); + + if constexpr (place_string_to_arena) + { + return ArenaKeyHolder{key, pool}; + } + else + { + return key; + } + } + +protected: + friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; +}; + + +/// Cache stores dictionaries and saved_hash per dictionary key. +class LowCardinalityDictionaryCache : public HashMethodContext +{ +public: + /// Will assume that dictionaries with same hash has the same keys. + /// Just in case, check that they have also the same size. + struct DictionaryKey + { + UInt128 hash; + UInt64 size; + + bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; } + }; + + struct DictionaryKeyHash + { + size_t operator()(const DictionaryKey & key) const + { + SipHash hash; hash.update(key.hash); - hash.update(key.size); - return hash.get64(); - } - }; - - struct CachedValues - { - /// Store ptr to dictionary to be sure it won't be deleted. - ColumnPtr dictionary_holder; - /// Hashes for dictionary keys. - const UInt64 * saved_hash = nullptr; - }; - - using CachedValuesPtr = std::shared_ptr<CachedValues>; - - explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {} - - CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); } - void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); } - -private: - using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>; - Cache cache; -}; - - -/// Single low cardinality column. -template <typename SingleColumnMethod, typename Mapped, bool use_cache> -struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod -{ - using Base = SingleColumnMethod; - - enum class VisitValue - { - Empty = 0, - Found = 1, - NotFound = 2, - }; - - static constexpr bool has_mapped = !std::is_same<Mapped, void>::value; - using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>; - using FindResult = columns_hashing_impl::FindResultImpl<Mapped>; - - static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings) - { - return std::make_shared<LowCardinalityDictionaryCache>(settings); - } - - ColumnRawPtrs key_columns; - const IColumn * positions = nullptr; - size_t size_of_index_type = 0; - - /// saved hash is from current column or from cache. - const UInt64 * saved_hash = nullptr; - /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted. - ColumnPtr dictionary_holder; - - /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages. - columns_hashing_impl::MappedCache<Mapped> mapped_cache; - PaddedPODArray<VisitValue> visit_cache; - - /// If initialized column is nullable. - bool is_nullable = false; - - static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * column) - { - auto low_cardinality_column = typeid_cast<const ColumnLowCardinality *>(column); - if (!low_cardinality_column) - throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. " - "Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR); - return *low_cardinality_column; - } - - HashMethodSingleLowCardinalityColumn( - const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context) - : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context) - { - auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]); - - if (!context) - throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn", - ErrorCodes::LOGICAL_ERROR); - - LowCardinalityDictionaryCache * lcd_cache; - if constexpr (use_cache) - { - lcd_cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get()); - if (!lcd_cache) - { - const auto & cached_val = *context; - throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: " - + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR); - } - } - - auto * dict = column->getDictionary().getNestedNotNullableColumn().get(); - is_nullable = column->getDictionary().nestedColumnIsNullable(); - key_columns = {dict}; - bool is_shared_dict = column->isSharedDictionary(); - - typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key; - typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values; - - if (is_shared_dict) - { - dictionary_key = {column->getDictionary().getHash(), dict->size()}; - if constexpr (use_cache) - cached_values = lcd_cache->get(dictionary_key); - } - - if (cached_values) - { - saved_hash = cached_values->saved_hash; - dictionary_holder = cached_values->dictionary_holder; - } - else - { - saved_hash = column->getDictionary().tryGetSavedHash(); - dictionary_holder = column->getDictionaryPtr(); - - if constexpr (use_cache) - { - if (is_shared_dict) - { - cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>(); - cached_values->saved_hash = saved_hash; - cached_values->dictionary_holder = dictionary_holder; - - lcd_cache->set(dictionary_key, cached_values); - } - } - } - - if constexpr (has_mapped) - mapped_cache.resize(key_columns[0]->size()); - - VisitValue empty(VisitValue::Empty); - visit_cache.assign(key_columns[0]->size(), empty); - - size_of_index_type = column->getSizeOfIndexType(); - positions = column->getIndexesPtr().get(); - } - - ALWAYS_INLINE size_t getIndexAt(size_t row) const - { - switch (size_of_index_type) - { - case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(positions)->getElement(row); - case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(positions)->getElement(row); - case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(positions)->getElement(row); - case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(positions)->getElement(row); - default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); - } - } - - /// Get the key holder from the key columns for insertion into the hash table. - ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const - { - return Base::getKeyHolder(getIndexAt(row), pool); - } - - template <typename Data> - ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool) - { - size_t row = getIndexAt(row_); - - if (is_nullable && row == 0) - { - visit_cache[row] = VisitValue::Found; - bool has_null_key = data.hasNullKeyData(); - data.hasNullKeyData() = true; - - if constexpr (has_mapped) - return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key); - else - return EmplaceResult(!has_null_key); - } - - if (visit_cache[row] == VisitValue::Found) - { - if constexpr (has_mapped) - return EmplaceResult(mapped_cache[row], mapped_cache[row], false); - else - return EmplaceResult(false); - } - - auto key_holder = getKeyHolder(row_, pool); - - bool inserted = false; - typename Data::LookupResult it; - if (saved_hash) - data.emplace(key_holder, it, inserted, saved_hash[row]); - else - data.emplace(key_holder, it, inserted); - - visit_cache[row] = VisitValue::Found; - - if constexpr (has_mapped) - { - auto & mapped = it->getMapped(); - if (inserted) - { - new (&mapped) Mapped(); - } - mapped_cache[row] = mapped; - return EmplaceResult(mapped, mapped_cache[row], inserted); - } - else - return EmplaceResult(inserted); - } - - ALWAYS_INLINE bool isNullAt(size_t i) - { - if (!is_nullable) - return false; - - return getIndexAt(i) == 0; - } - - template <typename Data> - ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool) - { - size_t row = getIndexAt(row_); - - if (is_nullable && row == 0) - { - if constexpr (has_mapped) - return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData()); - else - return FindResult(data.hasNullKeyData()); - } - - if (visit_cache[row] != VisitValue::Empty) - { - if constexpr (has_mapped) - return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found); - else - return FindResult(visit_cache[row] == VisitValue::Found); - } - - auto key_holder = getKeyHolder(row_, pool); - - typename Data::iterator it; - if (saved_hash) - it = data.find(*key_holder, saved_hash[row]); - else - it = data.find(*key_holder); - - bool found = it != data.end(); - visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound; - - if constexpr (has_mapped) - { - if (found) - mapped_cache[row] = it->second; - } - - if constexpr (has_mapped) - return FindResult(&mapped_cache[row], found); - else - return FindResult(found); - } - - template <typename Data> - ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) - { - row = getIndexAt(row); - if (saved_hash) - return saved_hash[row]; - - return Base::getHash(data, row, pool); - } -}; - - -// Optional mask for low cardinality columns. -template <bool has_low_cardinality> -struct LowCardinalityKeys -{ - ColumnRawPtrs nested_columns; - ColumnRawPtrs positions; - Sizes position_sizes; -}; - -template <> -struct LowCardinalityKeys<false> {}; - -/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits. + hash.update(key.size); + return hash.get64(); + } + }; + + struct CachedValues + { + /// Store ptr to dictionary to be sure it won't be deleted. + ColumnPtr dictionary_holder; + /// Hashes for dictionary keys. + const UInt64 * saved_hash = nullptr; + }; + + using CachedValuesPtr = std::shared_ptr<CachedValues>; + + explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {} + + CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); } + void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); } + +private: + using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>; + Cache cache; +}; + + +/// Single low cardinality column. +template <typename SingleColumnMethod, typename Mapped, bool use_cache> +struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod +{ + using Base = SingleColumnMethod; + + enum class VisitValue + { + Empty = 0, + Found = 1, + NotFound = 2, + }; + + static constexpr bool has_mapped = !std::is_same<Mapped, void>::value; + using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>; + using FindResult = columns_hashing_impl::FindResultImpl<Mapped>; + + static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings) + { + return std::make_shared<LowCardinalityDictionaryCache>(settings); + } + + ColumnRawPtrs key_columns; + const IColumn * positions = nullptr; + size_t size_of_index_type = 0; + + /// saved hash is from current column or from cache. + const UInt64 * saved_hash = nullptr; + /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted. + ColumnPtr dictionary_holder; + + /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages. + columns_hashing_impl::MappedCache<Mapped> mapped_cache; + PaddedPODArray<VisitValue> visit_cache; + + /// If initialized column is nullable. + bool is_nullable = false; + + static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * column) + { + auto low_cardinality_column = typeid_cast<const ColumnLowCardinality *>(column); + if (!low_cardinality_column) + throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. " + "Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR); + return *low_cardinality_column; + } + + HashMethodSingleLowCardinalityColumn( + const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context) + : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context) + { + auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]); + + if (!context) + throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn", + ErrorCodes::LOGICAL_ERROR); + + LowCardinalityDictionaryCache * lcd_cache; + if constexpr (use_cache) + { + lcd_cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get()); + if (!lcd_cache) + { + const auto & cached_val = *context; + throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: " + + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR); + } + } + + auto * dict = column->getDictionary().getNestedNotNullableColumn().get(); + is_nullable = column->getDictionary().nestedColumnIsNullable(); + key_columns = {dict}; + bool is_shared_dict = column->isSharedDictionary(); + + typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key; + typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values; + + if (is_shared_dict) + { + dictionary_key = {column->getDictionary().getHash(), dict->size()}; + if constexpr (use_cache) + cached_values = lcd_cache->get(dictionary_key); + } + + if (cached_values) + { + saved_hash = cached_values->saved_hash; + dictionary_holder = cached_values->dictionary_holder; + } + else + { + saved_hash = column->getDictionary().tryGetSavedHash(); + dictionary_holder = column->getDictionaryPtr(); + + if constexpr (use_cache) + { + if (is_shared_dict) + { + cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>(); + cached_values->saved_hash = saved_hash; + cached_values->dictionary_holder = dictionary_holder; + + lcd_cache->set(dictionary_key, cached_values); + } + } + } + + if constexpr (has_mapped) + mapped_cache.resize(key_columns[0]->size()); + + VisitValue empty(VisitValue::Empty); + visit_cache.assign(key_columns[0]->size(), empty); + + size_of_index_type = column->getSizeOfIndexType(); + positions = column->getIndexesPtr().get(); + } + + ALWAYS_INLINE size_t getIndexAt(size_t row) const + { + switch (size_of_index_type) + { + case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(positions)->getElement(row); + case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(positions)->getElement(row); + case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(positions)->getElement(row); + case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(positions)->getElement(row); + default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); + } + } + + /// Get the key holder from the key columns for insertion into the hash table. + ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const + { + return Base::getKeyHolder(getIndexAt(row), pool); + } + + template <typename Data> + ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool) + { + size_t row = getIndexAt(row_); + + if (is_nullable && row == 0) + { + visit_cache[row] = VisitValue::Found; + bool has_null_key = data.hasNullKeyData(); + data.hasNullKeyData() = true; + + if constexpr (has_mapped) + return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key); + else + return EmplaceResult(!has_null_key); + } + + if (visit_cache[row] == VisitValue::Found) + { + if constexpr (has_mapped) + return EmplaceResult(mapped_cache[row], mapped_cache[row], false); + else + return EmplaceResult(false); + } + + auto key_holder = getKeyHolder(row_, pool); + + bool inserted = false; + typename Data::LookupResult it; + if (saved_hash) + data.emplace(key_holder, it, inserted, saved_hash[row]); + else + data.emplace(key_holder, it, inserted); + + visit_cache[row] = VisitValue::Found; + + if constexpr (has_mapped) + { + auto & mapped = it->getMapped(); + if (inserted) + { + new (&mapped) Mapped(); + } + mapped_cache[row] = mapped; + return EmplaceResult(mapped, mapped_cache[row], inserted); + } + else + return EmplaceResult(inserted); + } + + ALWAYS_INLINE bool isNullAt(size_t i) + { + if (!is_nullable) + return false; + + return getIndexAt(i) == 0; + } + + template <typename Data> + ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool) + { + size_t row = getIndexAt(row_); + + if (is_nullable && row == 0) + { + if constexpr (has_mapped) + return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData()); + else + return FindResult(data.hasNullKeyData()); + } + + if (visit_cache[row] != VisitValue::Empty) + { + if constexpr (has_mapped) + return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found); + else + return FindResult(visit_cache[row] == VisitValue::Found); + } + + auto key_holder = getKeyHolder(row_, pool); + + typename Data::iterator it; + if (saved_hash) + it = data.find(*key_holder, saved_hash[row]); + else + it = data.find(*key_holder); + + bool found = it != data.end(); + visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound; + + if constexpr (has_mapped) + { + if (found) + mapped_cache[row] = it->second; + } + + if constexpr (has_mapped) + return FindResult(&mapped_cache[row], found); + else + return FindResult(found); + } + + template <typename Data> + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) + { + row = getIndexAt(row); + if (saved_hash) + return saved_hash[row]; + + return Base::getHash(data, row, pool); + } +}; + + +// Optional mask for low cardinality columns. +template <bool has_low_cardinality> +struct LowCardinalityKeys +{ + ColumnRawPtrs nested_columns; + ColumnRawPtrs positions; + Sizes position_sizes; +}; + +template <> +struct LowCardinalityKeys<false> {}; + +/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits. template < typename Value, typename Key, @@ -463,21 +463,21 @@ template < bool has_low_cardinality_ = false, bool use_cache = true, bool need_offset = false> -struct HashMethodKeysFixed - : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_> +struct HashMethodKeysFixed + : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_> , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset> -{ +{ using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>; using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>; - using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>; - - static constexpr bool has_nullable_keys = has_nullable_keys_; - static constexpr bool has_low_cardinality = has_low_cardinality_; - - LowCardinalityKeys<has_low_cardinality> low_cardinality_keys; - Sizes key_sizes; - size_t keys_size; - + using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>; + + static constexpr bool has_nullable_keys = has_nullable_keys_; + static constexpr bool has_low_cardinality = has_low_cardinality_; + + LowCardinalityKeys<has_low_cardinality> low_cardinality_keys; + Sizes key_sizes; + size_t keys_size; + /// SSSE3 shuffle method can be used. Shuffle masks will be calculated and stored here. #if defined(__SSSE3__) && !defined(MEMORY_SANITIZER) std::unique_ptr<uint8_t[]> masks; @@ -498,26 +498,26 @@ struct HashMethodKeysFixed return true; } - HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &) - : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size()) - { - if constexpr (has_low_cardinality) - { - low_cardinality_keys.nested_columns.resize(key_columns.size()); - low_cardinality_keys.positions.assign(key_columns.size(), nullptr); - low_cardinality_keys.position_sizes.resize(key_columns.size()); - for (size_t i = 0; i < key_columns.size(); ++i) - { - if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i])) - { - low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get(); - low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes(); - low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType(); - } - else - low_cardinality_keys.nested_columns[i] = key_columns[i]; - } - } + HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &) + : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size()) + { + if constexpr (has_low_cardinality) + { + low_cardinality_keys.nested_columns.resize(key_columns.size()); + low_cardinality_keys.positions.assign(key_columns.size(), nullptr); + low_cardinality_keys.position_sizes.resize(key_columns.size()); + for (size_t i = 0; i < key_columns.size(); ++i) + { + if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i])) + { + low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get(); + low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes(); + low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType(); + } + else + low_cardinality_keys.nested_columns[i] = key_columns[i]; + } + } if (usePreparedKeys(key_sizes)) { @@ -575,21 +575,21 @@ struct HashMethodKeysFixed columns_data[i] = Base::getActualColumns()[i]->getRawData().data; } #endif - } - - ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const - { - if constexpr (has_nullable_keys) - { - auto bitmap = Base::createBitmap(row); - return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap); - } - else - { - if constexpr (has_low_cardinality) - return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes, - &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes); - + } + + ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const + { + if constexpr (has_nullable_keys) + { + auto bitmap = Base::createBitmap(row); + return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap); + } + else + { + if constexpr (has_low_cardinality) + return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes, + &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes); + if (!prepared_keys.empty()) return prepared_keys[row]; @@ -600,9 +600,9 @@ struct HashMethodKeysFixed return packFixedShuffle<Key>(columns_data.get(), keys_size, key_sizes.data(), row, masks.get()); } #endif - return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes); - } - } + return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes); + } + } static std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> & key_columns, const Sizes & key_sizes) { @@ -634,56 +634,56 @@ struct HashMethodKeysFixed key_columns.swap(new_columns); return new_sizes; } -}; - -/** Hash by concatenating serialized key values. - * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts. - * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. - * Therefore, when aggregating by several strings, there is no ambiguity. - */ -template <typename Value, typename Mapped> -struct HashMethodSerialized - : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false> -{ - using Self = HashMethodSerialized<Value, Mapped>; - using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; - - ColumnRawPtrs key_columns; - size_t keys_size; - - HashMethodSerialized(const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) - : key_columns(key_columns_), keys_size(key_columns_.size()) {} - -protected: - friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; - - ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const - { - return SerializedKeyHolder{ - serializeKeysToPoolContiguous(row, keys_size, key_columns, pool), - pool}; - } -}; - -/// For the case when there is one string key. +}; + +/** Hash by concatenating serialized key values. + * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts. + * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. + * Therefore, when aggregating by several strings, there is no ambiguity. + */ +template <typename Value, typename Mapped> +struct HashMethodSerialized + : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false> +{ + using Self = HashMethodSerialized<Value, Mapped>; + using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; + + ColumnRawPtrs key_columns; + size_t keys_size; + + HashMethodSerialized(const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) + : key_columns(key_columns_), keys_size(key_columns_.size()) {} + +protected: + friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; + + ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const + { + return SerializedKeyHolder{ + serializeKeysToPoolContiguous(row, keys_size, key_columns, pool), + pool}; + } +}; + +/// For the case when there is one string key. template <typename Value, typename Mapped, bool use_cache = true, bool need_offset = false> -struct HashMethodHashed +struct HashMethodHashed : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache, need_offset>, Value, Mapped, use_cache, need_offset> -{ - using Key = UInt128; +{ + using Key = UInt128; using Self = HashMethodHashed<Value, Mapped, use_cache, need_offset>; using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>; - - ColumnRawPtrs key_columns; - - HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &) - : key_columns(std::move(key_columns_)) {} - - ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const - { - return hash128(row, key_columns.size(), key_columns); - } -}; - -} -} + + ColumnRawPtrs key_columns; + + HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &) + : key_columns(std::move(key_columns_)) {} + + ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const + { + return hash128(row, key_columns.size(), key_columns); + } +}; + +} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h index f6ed2cd05e..aa7ae6ea29 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h @@ -1,104 +1,104 @@ -#pragma once - -#include <Columns/IColumn.h> +#pragma once + +#include <Columns/IColumn.h> #include <Columns/ColumnNullable.h> -#include <Common/assert_cast.h> -#include <Common/HashTable/HashTableKeyHolder.h> -#include <Interpreters/AggregationCommon.h> - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -namespace ColumnsHashing -{ - -/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe. -/// Is used for caching. -class HashMethodContext -{ -public: - virtual ~HashMethodContext() = default; - - struct Settings - { - size_t max_threads; - }; -}; - -using HashMethodContextPtr = std::shared_ptr<HashMethodContext>; - - -namespace columns_hashing_impl -{ - -template <typename Value, bool consecutive_keys_optimization_> -struct LastElementCache -{ - static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_; - Value value; - bool empty = true; - bool found = false; - - bool check(const Value & value_) { return !empty && value == value_; } - - template <typename Key> - bool check(const Key & key) { return !empty && value.first == key; } -}; - -template <typename Data> -struct LastElementCache<Data, false> -{ - static constexpr bool consecutive_keys_optimization = false; -}; - -template <typename Mapped> -class EmplaceResultImpl -{ - Mapped & value; - Mapped & cached_value; - bool inserted; - -public: - EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_) - : value(value_), cached_value(cached_value_), inserted(inserted_) {} - - bool isInserted() const { return inserted; } - auto & getMapped() const { return value; } - - void setMapped(const Mapped & mapped) - { - cached_value = mapped; - value = mapped; - } -}; - -template <> -class EmplaceResultImpl<void> -{ - bool inserted; - -public: - explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {} - bool isInserted() const { return inserted; } -}; - +#include <Common/assert_cast.h> +#include <Common/HashTable/HashTableKeyHolder.h> +#include <Interpreters/AggregationCommon.h> + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace ColumnsHashing +{ + +/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe. +/// Is used for caching. +class HashMethodContext +{ +public: + virtual ~HashMethodContext() = default; + + struct Settings + { + size_t max_threads; + }; +}; + +using HashMethodContextPtr = std::shared_ptr<HashMethodContext>; + + +namespace columns_hashing_impl +{ + +template <typename Value, bool consecutive_keys_optimization_> +struct LastElementCache +{ + static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_; + Value value; + bool empty = true; + bool found = false; + + bool check(const Value & value_) { return !empty && value == value_; } + + template <typename Key> + bool check(const Key & key) { return !empty && value.first == key; } +}; + +template <typename Data> +struct LastElementCache<Data, false> +{ + static constexpr bool consecutive_keys_optimization = false; +}; + +template <typename Mapped> +class EmplaceResultImpl +{ + Mapped & value; + Mapped & cached_value; + bool inserted; + +public: + EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_) + : value(value_), cached_value(cached_value_), inserted(inserted_) {} + + bool isInserted() const { return inserted; } + auto & getMapped() const { return value; } + + void setMapped(const Mapped & mapped) + { + cached_value = mapped; + value = mapped; + } +}; + +template <> +class EmplaceResultImpl<void> +{ + bool inserted; + +public: + explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {} + bool isInserted() const { return inserted; } +}; + /// FindResult optionally may contain pointer to value and offset in hashtable buffer. /// Only bool found is required. /// So we will have 4 different specializations for FindResultImpl class FindResultImplBase -{ - bool found; - -public: +{ + bool found; + +public: explicit FindResultImplBase(bool found_) : found(found_) {} - bool isFound() const { return found; } -}; - + bool isFound() const { return found; } +}; + template <bool need_offset = false> class FindResultImplOffsetBase { @@ -107,13 +107,13 @@ public: explicit FindResultImplOffsetBase(size_t /* off */) {} }; -template <> +template <> class FindResultImplOffsetBase<true> -{ +{ size_t offset; public: constexpr static bool has_offset = true; - + explicit FindResultImplOffsetBase(size_t off) : offset(off) {} ALWAYS_INLINE size_t getOffset() const { return offset; } }; @@ -123,7 +123,7 @@ class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBas { Mapped * value; -public: +public: FindResultImpl() : FindResultImplBase(false), FindResultImplOffsetBase<need_offset>(0) {} @@ -131,8 +131,8 @@ public: FindResultImpl(Mapped * value_, bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off), value(value_) {} Mapped & getMapped() const { return *value; } -}; - +}; + template <bool need_offset> class FindResultImpl<void, need_offset> : public FindResultImplBase, public FindResultImplOffsetBase<need_offset> { @@ -141,254 +141,254 @@ public: }; template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false> -class HashMethodBase -{ -public: - using EmplaceResult = EmplaceResultImpl<Mapped>; +class HashMethodBase +{ +public: + using EmplaceResult = EmplaceResultImpl<Mapped>; using FindResult = FindResultImpl<Mapped, need_offset>; - static constexpr bool has_mapped = !std::is_same<Mapped, void>::value; - using Cache = LastElementCache<Value, consecutive_keys_optimization>; - - static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } - - template <typename Data> - ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) - { - auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); - return emplaceImpl(key_holder, data); - } - - template <typename Data> - ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool) - { - auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); - return findKeyImpl(keyHolderGetKey(key_holder), data); - } - - template <typename Data> - ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) - { - auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); - return data.hash(keyHolderGetKey(key_holder)); - } - -protected: - Cache cache; - - HashMethodBase() - { - if constexpr (consecutive_keys_optimization) - { - if constexpr (has_mapped) - { - /// Init PairNoInit elements. - cache.value.second = Mapped(); - cache.value.first = {}; - } - else - cache.value = Value(); - } - } - - template <typename Data, typename KeyHolder> - ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) - { - if constexpr (Cache::consecutive_keys_optimization) - { - if (cache.found && cache.check(keyHolderGetKey(key_holder))) - { - if constexpr (has_mapped) - return EmplaceResult(cache.value.second, cache.value.second, false); - else - return EmplaceResult(false); - } - } - - typename Data::LookupResult it; - bool inserted = false; - data.emplace(key_holder, it, inserted); - - [[maybe_unused]] Mapped * cached = nullptr; - if constexpr (has_mapped) - cached = &it->getMapped(); - - if (inserted) - { - if constexpr (has_mapped) - { - new (&it->getMapped()) Mapped(); - } - } - - if constexpr (consecutive_keys_optimization) - { - cache.found = true; - cache.empty = false; - - if constexpr (has_mapped) - { - cache.value.first = it->getKey(); - cache.value.second = it->getMapped(); - cached = &cache.value.second; - } - else - { - cache.value = it->getKey(); - } - } - - if constexpr (has_mapped) - return EmplaceResult(it->getMapped(), *cached, inserted); - else - return EmplaceResult(inserted); - } - - template <typename Data, typename Key> - ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data) - { - if constexpr (Cache::consecutive_keys_optimization) - { + static constexpr bool has_mapped = !std::is_same<Mapped, void>::value; + using Cache = LastElementCache<Value, consecutive_keys_optimization>; + + static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } + + template <typename Data> + ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) + { + auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); + return emplaceImpl(key_holder, data); + } + + template <typename Data> + ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool) + { + auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); + return findKeyImpl(keyHolderGetKey(key_holder), data); + } + + template <typename Data> + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) + { + auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); + return data.hash(keyHolderGetKey(key_holder)); + } + +protected: + Cache cache; + + HashMethodBase() + { + if constexpr (consecutive_keys_optimization) + { + if constexpr (has_mapped) + { + /// Init PairNoInit elements. + cache.value.second = Mapped(); + cache.value.first = {}; + } + else + cache.value = Value(); + } + } + + template <typename Data, typename KeyHolder> + ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) + { + if constexpr (Cache::consecutive_keys_optimization) + { + if (cache.found && cache.check(keyHolderGetKey(key_holder))) + { + if constexpr (has_mapped) + return EmplaceResult(cache.value.second, cache.value.second, false); + else + return EmplaceResult(false); + } + } + + typename Data::LookupResult it; + bool inserted = false; + data.emplace(key_holder, it, inserted); + + [[maybe_unused]] Mapped * cached = nullptr; + if constexpr (has_mapped) + cached = &it->getMapped(); + + if (inserted) + { + if constexpr (has_mapped) + { + new (&it->getMapped()) Mapped(); + } + } + + if constexpr (consecutive_keys_optimization) + { + cache.found = true; + cache.empty = false; + + if constexpr (has_mapped) + { + cache.value.first = it->getKey(); + cache.value.second = it->getMapped(); + cached = &cache.value.second; + } + else + { + cache.value = it->getKey(); + } + } + + if constexpr (has_mapped) + return EmplaceResult(it->getMapped(), *cached, inserted); + else + return EmplaceResult(inserted); + } + + template <typename Data, typename Key> + ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data) + { + if constexpr (Cache::consecutive_keys_optimization) + { /// It's possible to support such combination, but code will became more complex. /// Now there's not place where we need this options enabled together static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options"); - if (cache.check(key)) - { - if constexpr (has_mapped) + if (cache.check(key)) + { + if constexpr (has_mapped) return FindResult(&cache.value.second, cache.found, 0); - else + else return FindResult(cache.found, 0); - } - } - - auto it = data.find(key); - - if constexpr (consecutive_keys_optimization) - { - cache.found = it != nullptr; - cache.empty = false; - - if constexpr (has_mapped) - { - cache.value.first = key; - if (it) - { - cache.value.second = it->getMapped(); - } - } - else - { - cache.value = key; - } - } - + } + } + + auto it = data.find(key); + + if constexpr (consecutive_keys_optimization) + { + cache.found = it != nullptr; + cache.empty = false; + + if constexpr (has_mapped) + { + cache.value.first = key; + if (it) + { + cache.value.second = it->getMapped(); + } + } + else + { + cache.value = key; + } + } + size_t offset = 0; if constexpr (FindResult::has_offset) { offset = it ? data.offsetInternal(it) : 0; } - if constexpr (has_mapped) + if constexpr (has_mapped) return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset); - else + else return FindResult(it != nullptr, offset); - } -}; - - -template <typename T> -struct MappedCache : public PaddedPODArray<T> {}; - -template <> -struct MappedCache<void> {}; - - -/// This class is designed to provide the functionality that is required for -/// supporting nullable keys in HashMethodKeysFixed. If there are -/// no nullable keys, this class is merely implemented as an empty shell. -template <typename Key, bool has_nullable_keys> -class BaseStateKeysFixed; - -/// Case where nullable keys are supported. -template <typename Key> -class BaseStateKeysFixed<Key, true> -{ -protected: - BaseStateKeysFixed(const ColumnRawPtrs & key_columns) - { - null_maps.reserve(key_columns.size()); - actual_columns.reserve(key_columns.size()); - - for (const auto & col : key_columns) - { - if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(col)) - { - actual_columns.push_back(&nullable_col->getNestedColumn()); - null_maps.push_back(&nullable_col->getNullMapColumn()); - } - else - { - actual_columns.push_back(col); - null_maps.push_back(nullptr); - } - } - } - - /// Return the columns which actually contain the values of the keys. - /// For a given key column, if it is nullable, we return its nested - /// column. Otherwise we return the key column itself. - inline const ColumnRawPtrs & getActualColumns() const - { - return actual_columns; - } - - /// Create a bitmap that indicates whether, for a particular row, - /// a key column bears a null value or not. - KeysNullMap<Key> createBitmap(size_t row) const - { - KeysNullMap<Key> bitmap{}; - - for (size_t k = 0; k < null_maps.size(); ++k) - { - if (null_maps[k] != nullptr) - { - const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData(); - if (null_map[row] == 1) - { - size_t bucket = k / 8; - size_t offset = k % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - - return bitmap; - } - -private: - ColumnRawPtrs actual_columns; - ColumnRawPtrs null_maps; -}; - -/// Case where nullable keys are not supported. -template <typename Key> -class BaseStateKeysFixed<Key, false> -{ -protected: - BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {} - - const ColumnRawPtrs & getActualColumns() const { return actual_columns; } - - KeysNullMap<Key> createBitmap(size_t) const - { - throw Exception{"Internal error: calling createBitmap() for non-nullable keys" - " is forbidden", ErrorCodes::LOGICAL_ERROR}; - } - -private: - ColumnRawPtrs actual_columns; -}; - -} - -} - -} + } +}; + + +template <typename T> +struct MappedCache : public PaddedPODArray<T> {}; + +template <> +struct MappedCache<void> {}; + + +/// This class is designed to provide the functionality that is required for +/// supporting nullable keys in HashMethodKeysFixed. If there are +/// no nullable keys, this class is merely implemented as an empty shell. +template <typename Key, bool has_nullable_keys> +class BaseStateKeysFixed; + +/// Case where nullable keys are supported. +template <typename Key> +class BaseStateKeysFixed<Key, true> +{ +protected: + BaseStateKeysFixed(const ColumnRawPtrs & key_columns) + { + null_maps.reserve(key_columns.size()); + actual_columns.reserve(key_columns.size()); + + for (const auto & col : key_columns) + { + if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(col)) + { + actual_columns.push_back(&nullable_col->getNestedColumn()); + null_maps.push_back(&nullable_col->getNullMapColumn()); + } + else + { + actual_columns.push_back(col); + null_maps.push_back(nullptr); + } + } + } + + /// Return the columns which actually contain the values of the keys. + /// For a given key column, if it is nullable, we return its nested + /// column. Otherwise we return the key column itself. + inline const ColumnRawPtrs & getActualColumns() const + { + return actual_columns; + } + + /// Create a bitmap that indicates whether, for a particular row, + /// a key column bears a null value or not. + KeysNullMap<Key> createBitmap(size_t row) const + { + KeysNullMap<Key> bitmap{}; + + for (size_t k = 0; k < null_maps.size(); ++k) + { + if (null_maps[k] != nullptr) + { + const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData(); + if (null_map[row] == 1) + { + size_t bucket = k / 8; + size_t offset = k % 8; + bitmap[bucket] |= UInt8(1) << offset; + } + } + } + + return bitmap; + } + +private: + ColumnRawPtrs actual_columns; + ColumnRawPtrs null_maps; +}; + +/// Case where nullable keys are not supported. +template <typename Key> +class BaseStateKeysFixed<Key, false> +{ +protected: + BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {} + + const ColumnRawPtrs & getActualColumns() const { return actual_columns; } + + KeysNullMap<Key> createBitmap(size_t) const + { + throw Exception{"Internal error: calling createBitmap() for non-nullable keys" + " is forbidden", ErrorCodes::LOGICAL_ERROR}; + } + +private: + ColumnRawPtrs actual_columns; +}; + +} + +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h index dc1f748764..bc9d55ff8f 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h @@ -1,15 +1,15 @@ -#pragma once - -#include <queue> -#include <type_traits> +#pragma once + +#include <queue> +#include <type_traits> #include <atomic> - -#include <Poco/Mutex.h> -#include <Poco/Semaphore.h> - + +#include <Poco/Mutex.h> +#include <Poco/Semaphore.h> + #include <common/MoveOrCopyIfThrow.h> #include <Common/Exception.h> - + namespace DB { namespace ErrorCodes @@ -18,20 +18,20 @@ namespace ErrorCodes } } -/** A very simple thread-safe queue of limited size. - * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty. - * If you try to push an element into an overflowed queue, the thread is blocked until space appears in the queue. - */ -template <typename T> -class ConcurrentBoundedQueue -{ -private: - std::queue<T> queue; +/** A very simple thread-safe queue of limited size. + * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty. + * If you try to push an element into an overflowed queue, the thread is blocked until space appears in the queue. + */ +template <typename T> +class ConcurrentBoundedQueue +{ +private: + std::queue<T> queue; mutable Poco::FastMutex mutex; - Poco::Semaphore fill_count; - Poco::Semaphore empty_count; + Poco::Semaphore fill_count; + Poco::Semaphore empty_count; std::atomic_bool closed = false; - + template <typename... Args> bool tryEmplaceImpl(Args &&... args) { @@ -63,71 +63,71 @@ private: empty_count.set(); } -public: +public: explicit ConcurrentBoundedQueue(size_t max_fill) : fill_count(0, max_fill) , empty_count(max_fill, max_fill) {} - - void push(const T & x) - { - empty_count.wait(); + + void push(const T & x) + { + empty_count.wait(); if (!tryEmplaceImpl(x)) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "tryPush/tryEmplace must be used with close()"); - } - - template <typename... Args> - void emplace(Args &&... args) - { - empty_count.wait(); + } + + template <typename... Args> + void emplace(Args &&... args) + { + empty_count.wait(); if (!tryEmplaceImpl(std::forward<Args>(args)...)) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "tryPush/tryEmplace must be used with close()"); - } - - void pop(T & x) - { - fill_count.wait(); + } + + void pop(T & x) + { + fill_count.wait(); popImpl(x); - } - - bool tryPush(const T & x, UInt64 milliseconds = 0) - { + } + + bool tryPush(const T & x, UInt64 milliseconds = 0) + { if (!empty_count.tryWait(milliseconds)) return false; return tryEmplaceImpl(x); - } - - template <typename... Args> - bool tryEmplace(UInt64 milliseconds, Args &&... args) - { + } + + template <typename... Args> + bool tryEmplace(UInt64 milliseconds, Args &&... args) + { if (!empty_count.tryWait(milliseconds)) return false; return tryEmplaceImpl(std::forward<Args>(args)...); - } - - bool tryPop(T & x, UInt64 milliseconds = 0) - { + } + + bool tryPop(T & x, UInt64 milliseconds = 0) + { if (!fill_count.tryWait(milliseconds)) return false; popImpl(x); return true; - } - + } + size_t size() const - { - Poco::ScopedLock<Poco::FastMutex> lock(mutex); - return queue.size(); - } - + { + Poco::ScopedLock<Poco::FastMutex> lock(mutex); + return queue.size(); + } + size_t empty() const - { - Poco::ScopedLock<Poco::FastMutex> lock(mutex); - return queue.empty(); - } - + { + Poco::ScopedLock<Poco::FastMutex> lock(mutex); + return queue.empty(); + } + /// Forbids to push new elements to queue. /// Returns false if queue was not closed before call, returns true if queue was already closed. bool close() @@ -141,15 +141,15 @@ public: return closed.load(); } - void clear() - { - while (fill_count.tryWait(0)) - { - { - Poco::ScopedLock<Poco::FastMutex> lock(mutex); - queue.pop(); - } - empty_count.set(); - } - } -}; + void clear() + { + while (fill_count.tryWait(0)) + { + { + Poco::ScopedLock<Poco::FastMutex> lock(mutex); + queue.pop(); + } + empty_count.set(); + } + } +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp index 2166dcc3e8..eb677debb0 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp @@ -1,37 +1,37 @@ -#include <Common/Config/AbstractConfigurationComparison.h> +#include <Common/Config/AbstractConfigurationComparison.h> #include <Common/getMultipleKeysFromConfig.h> - -#include <unordered_set> -#include <common/StringRef.h> -#include <Poco/Util/AbstractConfiguration.h> - - -namespace DB -{ -namespace -{ - String concatKeyAndSubKey(const String & key, const String & subkey) - { - // Copied from Poco::Util::ConfigurationView::translateKey(): - String result = key; - if (!result.empty() && !subkey.empty() && subkey[0] != '[') - result += '.'; - result += subkey; - return result; - }; -} - - -bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right) -{ - return isSameConfiguration(left, String(), right, String()); -} - + +#include <unordered_set> +#include <common/StringRef.h> +#include <Poco/Util/AbstractConfiguration.h> + + +namespace DB +{ +namespace +{ + String concatKeyAndSubKey(const String & key, const String & subkey) + { + // Copied from Poco::Util::ConfigurationView::translateKey(): + String result = key; + if (!result.empty() && !subkey.empty() && subkey[0] != '[') + result += '.'; + result += subkey; + return result; + }; +} + + +bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right) +{ + return isSameConfiguration(left, String(), right, String()); +} + bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right, const String & key) { return isSameConfiguration(left, key, right, key); } - + bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right, const String & root, const String & name) { if (&left == &right) @@ -49,44 +49,44 @@ bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration return true; } -bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key, - const Poco::Util::AbstractConfiguration & right, const String & right_key) -{ - if (&left == &right && left_key == right_key) - return true; - - bool has_property = left.hasProperty(left_key); - if (has_property != right.hasProperty(right_key)) - return false; - if (has_property) - { - /// The left and right configurations contains values so we can compare them. - if (left.getRawString(left_key) != right.getRawString(right_key)) - return false; - } - - /// Get the subkeys of the left and right configurations. - Poco::Util::AbstractConfiguration::Keys subkeys; - left.keys(left_key, subkeys); - - { - /// Check that the right configuration has the same set of subkeys as the left configuration. - Poco::Util::AbstractConfiguration::Keys right_subkeys; - right.keys(right_key, right_subkeys); - std::unordered_set<StringRef> left_subkeys{subkeys.begin(), subkeys.end()}; - if ((left_subkeys.size() != right_subkeys.size()) || (left_subkeys.size() != subkeys.size())) - return false; - for (const auto & right_subkey : right_subkeys) - if (!left_subkeys.count(right_subkey)) - return false; - } - - /// Go through all the subkeys and compare corresponding parts of the configurations. - for (const auto & subkey : subkeys) - if (!isSameConfiguration(left, concatKeyAndSubKey(left_key, subkey), right, concatKeyAndSubKey(right_key, subkey))) - return false; - - return true; -} - -} +bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key, + const Poco::Util::AbstractConfiguration & right, const String & right_key) +{ + if (&left == &right && left_key == right_key) + return true; + + bool has_property = left.hasProperty(left_key); + if (has_property != right.hasProperty(right_key)) + return false; + if (has_property) + { + /// The left and right configurations contains values so we can compare them. + if (left.getRawString(left_key) != right.getRawString(right_key)) + return false; + } + + /// Get the subkeys of the left and right configurations. + Poco::Util::AbstractConfiguration::Keys subkeys; + left.keys(left_key, subkeys); + + { + /// Check that the right configuration has the same set of subkeys as the left configuration. + Poco::Util::AbstractConfiguration::Keys right_subkeys; + right.keys(right_key, right_subkeys); + std::unordered_set<StringRef> left_subkeys{subkeys.begin(), subkeys.end()}; + if ((left_subkeys.size() != right_subkeys.size()) || (left_subkeys.size() != subkeys.size())) + return false; + for (const auto & right_subkey : right_subkeys) + if (!left_subkeys.count(right_subkey)) + return false; + } + + /// Go through all the subkeys and compare corresponding parts of the configurations. + for (const auto & subkey : subkeys) + if (!isSameConfiguration(left, concatKeyAndSubKey(left_key, subkey), right, concatKeyAndSubKey(right_key, subkey))) + return false; + + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h index 9ef3118235..6e1d8a890b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h @@ -1,18 +1,18 @@ -#pragma once - +#pragma once + #include <common/types.h> - -namespace Poco::Util -{ - class AbstractConfiguration; -} - -namespace DB -{ - /// Returns true if two configurations contains the same keys and values. - bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, - const Poco::Util::AbstractConfiguration & right); - + +namespace Poco::Util +{ + class AbstractConfiguration; +} + +namespace DB +{ + /// Returns true if two configurations contains the same keys and values. + bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, + const Poco::Util::AbstractConfiguration & right); + /// Config may have multiple keys with one name. For example: /// <root> /// <some_key>...</some_key> @@ -29,17 +29,17 @@ namespace DB const Poco::Util::AbstractConfiguration & right, const String & key); - /// Returns true if specified subviews of the two configurations contains the same keys and values. - bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key, - const Poco::Util::AbstractConfiguration & right, const String & right_key); - - inline bool operator==(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right) - { - return isSameConfiguration(left, right); - } - - inline bool operator!=(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right) - { - return !isSameConfiguration(left, right); - } -} + /// Returns true if specified subviews of the two configurations contains the same keys and values. + bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key, + const Poco::Util::AbstractConfiguration & right, const String & right_key); + + inline bool operator==(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right) + { + return isSameConfiguration(left, right); + } + + inline bool operator!=(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right) + { + return !isSameConfiguration(left, right); + } +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp index b279a6ca12..4fe0f0bb8c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp @@ -1,92 +1,92 @@ -#include "DNSResolver.h" -#include <common/SimpleCache.h> -#include <Common/Exception.h> -#include <Common/ProfileEvents.h> -#include <Core/Names.h> +#include "DNSResolver.h" +#include <common/SimpleCache.h> +#include <Common/Exception.h> +#include <Common/ProfileEvents.h> +#include <Core/Names.h> #include <common/types.h> -#include <Poco/Net/IPAddress.h> -#include <Poco/Net/DNS.h> -#include <Poco/Net/NetException.h> -#include <Poco/NumberParser.h> -#include <arpa/inet.h> -#include <atomic> -#include <optional> -#include <string_view> - -namespace ProfileEvents -{ - extern Event DNSError; -} - -namespace std -{ -template<> struct hash<Poco::Net::IPAddress> -{ - size_t operator()(const Poco::Net::IPAddress & address) const noexcept - { - std::string_view addr(static_cast<const char *>(address.addr()), address.length()); - std::hash<std::string_view> hash_impl; - return hash_impl(addr); - } -}; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int DNS_ERROR; -} - - -/// Slightly altered implementation from https://github.com/pocoproject/poco/blob/poco-1.6.1/Net/src/SocketAddress.cpp#L86 -static void splitHostAndPort(const std::string & host_and_port, std::string & out_host, UInt16 & out_port) -{ - String port_str; - out_host.clear(); - - auto it = host_and_port.begin(); - auto end = host_and_port.end(); - - if (*it == '[') /// Try parse case '[<IPv6 or something else>]:<port>' - { - ++it; - while (it != end && *it != ']') - out_host += *it++; - if (it == end) - throw Exception("Malformed IPv6 address", ErrorCodes::BAD_ARGUMENTS); - ++it; - } - else /// Case '<IPv4 or domain name or something else>:<port>' - { - while (it != end && *it != ':') - out_host += *it++; - } - - if (it != end && *it == ':') - { - ++it; - while (it != end) - port_str += *it++; - } - else - throw Exception("Missing port number", ErrorCodes::BAD_ARGUMENTS); - - unsigned port; - if (Poco::NumberParser::tryParseUnsigned(port_str, port) && port <= 0xFFFF) - { - out_port = static_cast<UInt16>(port); - } - else +#include <Poco/Net/IPAddress.h> +#include <Poco/Net/DNS.h> +#include <Poco/Net/NetException.h> +#include <Poco/NumberParser.h> +#include <arpa/inet.h> +#include <atomic> +#include <optional> +#include <string_view> + +namespace ProfileEvents +{ + extern Event DNSError; +} + +namespace std +{ +template<> struct hash<Poco::Net::IPAddress> +{ + size_t operator()(const Poco::Net::IPAddress & address) const noexcept + { + std::string_view addr(static_cast<const char *>(address.addr()), address.length()); + std::hash<std::string_view> hash_impl; + return hash_impl(addr); + } +}; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int DNS_ERROR; +} + + +/// Slightly altered implementation from https://github.com/pocoproject/poco/blob/poco-1.6.1/Net/src/SocketAddress.cpp#L86 +static void splitHostAndPort(const std::string & host_and_port, std::string & out_host, UInt16 & out_port) +{ + String port_str; + out_host.clear(); + + auto it = host_and_port.begin(); + auto end = host_and_port.end(); + + if (*it == '[') /// Try parse case '[<IPv6 or something else>]:<port>' + { + ++it; + while (it != end && *it != ']') + out_host += *it++; + if (it == end) + throw Exception("Malformed IPv6 address", ErrorCodes::BAD_ARGUMENTS); + ++it; + } + else /// Case '<IPv4 or domain name or something else>:<port>' + { + while (it != end && *it != ':') + out_host += *it++; + } + + if (it != end && *it == ':') + { + ++it; + while (it != end) + port_str += *it++; + } + else + throw Exception("Missing port number", ErrorCodes::BAD_ARGUMENTS); + + unsigned port; + if (Poco::NumberParser::tryParseUnsigned(port_str, port) && port <= 0xFFFF) + { + out_port = static_cast<UInt16>(port); + } + else throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS); -} - -static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) -{ - Poco::Net::IPAddress ip; - +} + +static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) +{ + Poco::Net::IPAddress ip; + /// NOTE: /// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2 /// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (w/o brackets) @@ -101,24 +101,24 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) if (Poco::Net::IPAddress::tryParse(host, ip)) return DNSResolver::IPAddresses(1, ip); } - - /// Family: AF_UNSPEC - /// AI_ALL is required for checking if client is allowed to connect from an address - auto flags = Poco::Net::DNS::DNS_HINT_AI_V4MAPPED | Poco::Net::DNS::DNS_HINT_AI_ALL; - /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured. - /// It should not affect client address checking, since client cannot connect from IPv6 address - /// if server has no IPv6 addresses. - flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG; + + /// Family: AF_UNSPEC + /// AI_ALL is required for checking if client is allowed to connect from an address + auto flags = Poco::Net::DNS::DNS_HINT_AI_V4MAPPED | Poco::Net::DNS::DNS_HINT_AI_ALL; + /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured. + /// It should not affect client address checking, since client cannot connect from IPv6 address + /// if server has no IPv6 addresses. + flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG; DNSResolver::IPAddresses addresses; try { -#if defined(ARCADIA_BUILD) +#if defined(ARCADIA_BUILD) addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses(); -#else +#else addresses = Poco::Net::DNS::hostByName(host, flags).addresses(); -#endif +#endif } catch (const Poco::Net::DNSException & e) { @@ -126,225 +126,225 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) addresses.clear(); } - if (addresses.empty()) - throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR); - - return addresses; -} - -static String reverseResolveImpl(const Poco::Net::IPAddress & address) -{ - Poco::Net::SocketAddress sock_addr(address, 0); - - /// Resolve by hand, because Poco::Net::DNS::hostByAddress(...) does getaddrinfo(...) after getnameinfo(...) - char host[1024]; - int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD); - if (err) - throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR); - return host; -} - -struct DNSResolver::Impl -{ - SimpleCache<decltype(resolveIPAddressImpl), &resolveIPAddressImpl> cache_host; - SimpleCache<decltype(reverseResolveImpl), &reverseResolveImpl> cache_address; - - std::mutex drop_mutex; - std::mutex update_mutex; - - /// Cached server host name - std::optional<String> host_name; - - /// Store hosts, which was asked to resolve from last update of DNS cache. - NameSet new_hosts; - std::unordered_set<Poco::Net::IPAddress> new_addresses; - - /// Store all hosts, which was whenever asked to resolve - NameSet known_hosts; - std::unordered_set<Poco::Net::IPAddress> known_addresses; - - /// If disabled, will not make cache lookups, will resolve addresses manually on each call - std::atomic<bool> disable_cache{false}; -}; - - -DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()), log(&Poco::Logger::get("DNSResolver")) {} - -Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) -{ - return resolveHostAll(host).front(); -} - -DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) -{ - if (impl->disable_cache) - return resolveIPAddressImpl(host); - - addToNewHosts(host); - return impl->cache_host(host); -} - -Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port) -{ - if (impl->disable_cache) - return Poco::Net::SocketAddress(host_and_port); - - String host; - UInt16 port; - splitHostAndPort(host_and_port, host, port); - - addToNewHosts(host); - return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); -} - -Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port) -{ - if (impl->disable_cache) - return Poco::Net::SocketAddress(host, port); - - addToNewHosts(host); - return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); -} - -String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) -{ - if (impl->disable_cache) - return reverseResolveImpl(address); - - addToNewAddresses(address); - return impl->cache_address(address); -} - -void DNSResolver::dropCache() -{ - impl->cache_host.drop(); - impl->cache_address.drop(); - - std::scoped_lock lock(impl->update_mutex, impl->drop_mutex); - - impl->known_hosts.clear(); - impl->known_addresses.clear(); - impl->new_hosts.clear(); - impl->new_addresses.clear(); - impl->host_name.reset(); -} - -void DNSResolver::setDisableCacheFlag(bool is_disabled) -{ - impl->disable_cache = is_disabled; -} - -String DNSResolver::getHostName() -{ - if (impl->disable_cache) - return Poco::Net::DNS::hostName(); - - std::lock_guard lock(impl->drop_mutex); - - if (!impl->host_name.has_value()) - impl->host_name.emplace(Poco::Net::DNS::hostName()); - - return *impl->host_name; -} - -static const String & cacheElemToString(const String & str) { return str; } -static String cacheElemToString(const Poco::Net::IPAddress & addr) { return addr.toString(); } - -template<typename UpdateF, typename ElemsT> -bool DNSResolver::updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg) -{ - bool updated = false; - String lost_elems; - for (const auto & elem : elems) - { - try - { - updated |= (this->*update_func)(elem); - } - catch (const Poco::Net::NetException &) - { - ProfileEvents::increment(ProfileEvents::DNSError); - - if (!lost_elems.empty()) - lost_elems += ", "; - lost_elems += cacheElemToString(elem); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (!lost_elems.empty()) - LOG_INFO(log, log_msg, lost_elems); - - return updated; -} - -bool DNSResolver::updateCache() -{ - LOG_DEBUG(log, "Updating DNS cache"); - - { + if (addresses.empty()) + throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR); + + return addresses; +} + +static String reverseResolveImpl(const Poco::Net::IPAddress & address) +{ + Poco::Net::SocketAddress sock_addr(address, 0); + + /// Resolve by hand, because Poco::Net::DNS::hostByAddress(...) does getaddrinfo(...) after getnameinfo(...) + char host[1024]; + int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD); + if (err) + throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR); + return host; +} + +struct DNSResolver::Impl +{ + SimpleCache<decltype(resolveIPAddressImpl), &resolveIPAddressImpl> cache_host; + SimpleCache<decltype(reverseResolveImpl), &reverseResolveImpl> cache_address; + + std::mutex drop_mutex; + std::mutex update_mutex; + + /// Cached server host name + std::optional<String> host_name; + + /// Store hosts, which was asked to resolve from last update of DNS cache. + NameSet new_hosts; + std::unordered_set<Poco::Net::IPAddress> new_addresses; + + /// Store all hosts, which was whenever asked to resolve + NameSet known_hosts; + std::unordered_set<Poco::Net::IPAddress> known_addresses; + + /// If disabled, will not make cache lookups, will resolve addresses manually on each call + std::atomic<bool> disable_cache{false}; +}; + + +DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()), log(&Poco::Logger::get("DNSResolver")) {} + +Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) +{ + return resolveHostAll(host).front(); +} + +DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) +{ + if (impl->disable_cache) + return resolveIPAddressImpl(host); + + addToNewHosts(host); + return impl->cache_host(host); +} + +Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port) +{ + if (impl->disable_cache) + return Poco::Net::SocketAddress(host_and_port); + + String host; + UInt16 port; + splitHostAndPort(host_and_port, host, port); + + addToNewHosts(host); + return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); +} + +Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port) +{ + if (impl->disable_cache) + return Poco::Net::SocketAddress(host, port); + + addToNewHosts(host); + return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); +} + +String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) +{ + if (impl->disable_cache) + return reverseResolveImpl(address); + + addToNewAddresses(address); + return impl->cache_address(address); +} + +void DNSResolver::dropCache() +{ + impl->cache_host.drop(); + impl->cache_address.drop(); + + std::scoped_lock lock(impl->update_mutex, impl->drop_mutex); + + impl->known_hosts.clear(); + impl->known_addresses.clear(); + impl->new_hosts.clear(); + impl->new_addresses.clear(); + impl->host_name.reset(); +} + +void DNSResolver::setDisableCacheFlag(bool is_disabled) +{ + impl->disable_cache = is_disabled; +} + +String DNSResolver::getHostName() +{ + if (impl->disable_cache) + return Poco::Net::DNS::hostName(); + + std::lock_guard lock(impl->drop_mutex); + + if (!impl->host_name.has_value()) + impl->host_name.emplace(Poco::Net::DNS::hostName()); + + return *impl->host_name; +} + +static const String & cacheElemToString(const String & str) { return str; } +static String cacheElemToString(const Poco::Net::IPAddress & addr) { return addr.toString(); } + +template<typename UpdateF, typename ElemsT> +bool DNSResolver::updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg) +{ + bool updated = false; + String lost_elems; + for (const auto & elem : elems) + { + try + { + updated |= (this->*update_func)(elem); + } + catch (const Poco::Net::NetException &) + { + ProfileEvents::increment(ProfileEvents::DNSError); + + if (!lost_elems.empty()) + lost_elems += ", "; + lost_elems += cacheElemToString(elem); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + if (!lost_elems.empty()) + LOG_INFO(log, log_msg, lost_elems); + + return updated; +} + +bool DNSResolver::updateCache() +{ + LOG_DEBUG(log, "Updating DNS cache"); + + { String updated_host_name = Poco::Net::DNS::hostName(); - std::lock_guard lock(impl->drop_mutex); - - for (const auto & host : impl->new_hosts) - impl->known_hosts.insert(host); - impl->new_hosts.clear(); - - for (const auto & address : impl->new_addresses) - impl->known_addresses.insert(address); - impl->new_addresses.clear(); - + std::lock_guard lock(impl->drop_mutex); + + for (const auto & host : impl->new_hosts) + impl->known_hosts.insert(host); + impl->new_hosts.clear(); + + for (const auto & address : impl->new_addresses) + impl->known_addresses.insert(address); + impl->new_addresses.clear(); + impl->host_name.emplace(updated_host_name); - } - + } + /// FIXME Updating may take a long time because we cannot manage timeouts of getaddrinfo(...) and getnameinfo(...). - /// DROP DNS CACHE will wait on update_mutex (possibly while holding drop_mutex) - std::lock_guard lock(impl->update_mutex); - - bool hosts_updated = updateCacheImpl(&DNSResolver::updateHost, impl->known_hosts, "Cached hosts not found: {}"); - updateCacheImpl(&DNSResolver::updateAddress, impl->known_addresses, "Cached addresses not found: {}"); - - LOG_DEBUG(log, "Updated DNS cache"); - return hosts_updated; -} - -bool DNSResolver::updateHost(const String & host) -{ - /// Usage of updateHost implies that host is already in cache and there is no extra computations - auto old_value = impl->cache_host(host); - impl->cache_host.update(host); - return old_value != impl->cache_host(host); -} - -bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address) -{ - auto old_value = impl->cache_address(address); - impl->cache_address.update(address); - return old_value == impl->cache_address(address); -} - -void DNSResolver::addToNewHosts(const String & host) -{ - std::lock_guard lock(impl->drop_mutex); - impl->new_hosts.insert(host); -} - -void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address) -{ - std::lock_guard lock(impl->drop_mutex); - impl->new_addresses.insert(address); -} - -DNSResolver::~DNSResolver() = default; - -DNSResolver & DNSResolver::instance() -{ - static DNSResolver ret; - return ret; -} - -} + /// DROP DNS CACHE will wait on update_mutex (possibly while holding drop_mutex) + std::lock_guard lock(impl->update_mutex); + + bool hosts_updated = updateCacheImpl(&DNSResolver::updateHost, impl->known_hosts, "Cached hosts not found: {}"); + updateCacheImpl(&DNSResolver::updateAddress, impl->known_addresses, "Cached addresses not found: {}"); + + LOG_DEBUG(log, "Updated DNS cache"); + return hosts_updated; +} + +bool DNSResolver::updateHost(const String & host) +{ + /// Usage of updateHost implies that host is already in cache and there is no extra computations + auto old_value = impl->cache_host(host); + impl->cache_host.update(host); + return old_value != impl->cache_host(host); +} + +bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address) +{ + auto old_value = impl->cache_address(address); + impl->cache_address.update(address); + return old_value == impl->cache_address(address); +} + +void DNSResolver::addToNewHosts(const String & host) +{ + std::lock_guard lock(impl->drop_mutex); + impl->new_hosts.insert(host); +} + +void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address) +{ + std::lock_guard lock(impl->drop_mutex); + impl->new_addresses.insert(address); +} + +DNSResolver::~DNSResolver() = default; + +DNSResolver & DNSResolver::instance() +{ + static DNSResolver ret; + return ret; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h index 102a5d2786..57c28188f5 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h @@ -1,73 +1,73 @@ -#pragma once -#include <Poco/Net/IPAddress.h> -#include <Poco/Net/SocketAddress.h> -#include <memory> +#pragma once +#include <Poco/Net/IPAddress.h> +#include <Poco/Net/SocketAddress.h> +#include <memory> #include <common/types.h> -#include <Core/Names.h> -#include <boost/noncopyable.hpp> -#include <common/logger_useful.h> - - -namespace DB -{ - -/// A singleton implementing DNS names resolving with optional DNS cache -/// The cache is being updated asynchronous in separate thread (see DNSCacheUpdater) -/// or it could be updated manually via drop() method. -class DNSResolver : private boost::noncopyable -{ -public: - typedef std::vector<Poco::Net::IPAddress> IPAddresses; - - static DNSResolver & instance(); - - DNSResolver(const DNSResolver &) = delete; - - /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves its IP - Poco::Net::IPAddress resolveHost(const std::string & host); - - /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs - IPAddresses resolveHostAll(const std::string & host); - - /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port - Poco::Net::SocketAddress resolveAddress(const std::string & host_and_port); - - Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port); - - /// Accepts host IP and resolves its host name - String reverseResolve(const Poco::Net::IPAddress & address); - - /// Get this server host name - String getHostName(); - - /// Disables caching - void setDisableCacheFlag(bool is_disabled = true); - - /// Drops all caches - void dropCache(); - - /// Updates all known hosts in cache. - /// Returns true if IP of any host has been changed. - bool updateCache(); - - ~DNSResolver(); - -private: - template<typename UpdateF, typename ElemsT> - bool updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg); - - DNSResolver(); - - struct Impl; - std::unique_ptr<Impl> impl; - Poco::Logger * log; - - /// Updates cached value and returns true it has been changed. - bool updateHost(const String & host); - bool updateAddress(const Poco::Net::IPAddress & address); - - void addToNewHosts(const String & host); - void addToNewAddresses(const Poco::Net::IPAddress & address); -}; - -} +#include <Core/Names.h> +#include <boost/noncopyable.hpp> +#include <common/logger_useful.h> + + +namespace DB +{ + +/// A singleton implementing DNS names resolving with optional DNS cache +/// The cache is being updated asynchronous in separate thread (see DNSCacheUpdater) +/// or it could be updated manually via drop() method. +class DNSResolver : private boost::noncopyable +{ +public: + typedef std::vector<Poco::Net::IPAddress> IPAddresses; + + static DNSResolver & instance(); + + DNSResolver(const DNSResolver &) = delete; + + /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves its IP + Poco::Net::IPAddress resolveHost(const std::string & host); + + /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs + IPAddresses resolveHostAll(const std::string & host); + + /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port + Poco::Net::SocketAddress resolveAddress(const std::string & host_and_port); + + Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port); + + /// Accepts host IP and resolves its host name + String reverseResolve(const Poco::Net::IPAddress & address); + + /// Get this server host name + String getHostName(); + + /// Disables caching + void setDisableCacheFlag(bool is_disabled = true); + + /// Drops all caches + void dropCache(); + + /// Updates all known hosts in cache. + /// Returns true if IP of any host has been changed. + bool updateCache(); + + ~DNSResolver(); + +private: + template<typename UpdateF, typename ElemsT> + bool updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg); + + DNSResolver(); + + struct Impl; + std::unique_ptr<Impl> impl; + Poco::Logger * log; + + /// Updates cached value and returns true it has been changed. + bool updateHost(const String & host); + bool updateAddress(const Poco::Net::IPAddress & address); + + void addToNewHosts(const String & host); + void addToNewAddresses(const Poco::Net::IPAddress & address); +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h index e3ca42398e..37bd81c8b4 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h @@ -1,53 +1,53 @@ -#pragma once - -#include <Common/HashTable/FixedHashTable.h> -#include <Common/HashTable/HashMap.h> - - -template <typename Key, typename TMapped, typename TState = HashTableNoState> -struct FixedHashMapCell -{ - using Mapped = TMapped; - using State = TState; - - using value_type = PairNoInit<Key, Mapped>; - using mapped_type = TMapped; - - bool full; - Mapped mapped; - +#pragma once + +#include <Common/HashTable/FixedHashTable.h> +#include <Common/HashTable/HashMap.h> + + +template <typename Key, typename TMapped, typename TState = HashTableNoState> +struct FixedHashMapCell +{ + using Mapped = TMapped; + using State = TState; + + using value_type = PairNoInit<Key, Mapped>; + using mapped_type = TMapped; + + bool full; + Mapped mapped; + FixedHashMapCell() {} //-V730 - FixedHashMapCell(const Key &, const State &) : full(true) {} - FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {} - - const VoidKey getKey() const { return {}; } - Mapped & getMapped() { return mapped; } - const Mapped & getMapped() const { return mapped; } - - bool isZero(const State &) const { return !full; } - void setZero() { full = false; } - - /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field. - /// Note that we have to assemble a continuous layout for the value_type on each call of getValue(). - struct CellExt - { + FixedHashMapCell(const Key &, const State &) : full(true) {} + FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {} + + const VoidKey getKey() const { return {}; } + Mapped & getMapped() { return mapped; } + const Mapped & getMapped() const { return mapped; } + + bool isZero(const State &) const { return !full; } + void setZero() { full = false; } + + /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field. + /// Note that we have to assemble a continuous layout for the value_type on each call of getValue(). + struct CellExt + { CellExt() {} //-V730 - CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {} - void update(Key && key_, const FixedHashMapCell * ptr_) - { - key = key_; - ptr = const_cast<FixedHashMapCell *>(ptr_); - } - Key key; - FixedHashMapCell * ptr; - - const Key & getKey() const { return key; } - Mapped & getMapped() { return ptr->mapped; } - const Mapped & getMapped() const { return ptr->mapped; } - const value_type getValue() const { return {key, ptr->mapped}; } - }; -}; - + CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {} + void update(Key && key_, const FixedHashMapCell * ptr_) + { + key = key_; + ptr = const_cast<FixedHashMapCell *>(ptr_); + } + Key key; + FixedHashMapCell * ptr; + + const Key & getKey() const { return key; } + Mapped & getMapped() { return ptr->mapped; } + const Mapped & getMapped() const { return ptr->mapped; } + const value_type getValue() const { return {key, ptr->mapped}; } + }; +}; + /// In case when we can encode empty cells with zero mapped values. template <typename Key, typename TMapped, typename TState = HashTableNoState> @@ -101,64 +101,64 @@ template < typename Size = FixedHashTableStoredSize<Cell>, typename Allocator = HashTableAllocator> class FixedHashMap : public FixedHashTable<Key, Cell, Size, Allocator> -{ -public: +{ +public: using Base = FixedHashTable<Key, Cell, Size, Allocator>; - using Self = FixedHashMap; - using LookupResult = typename Base::LookupResult; - - using Base::Base; - - template <typename Func> - void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) - { - for (auto it = this->begin(), end = this->end(); it != end; ++it) - { - typename Self::LookupResult res_it; - bool inserted; - that.emplace(it->getKey(), res_it, inserted, it.getHash()); - func(res_it->getMapped(), it->getMapped(), inserted); - } - } - - template <typename Func> - void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) - { - for (auto it = this->begin(), end = this->end(); it != end; ++it) - { - auto res_it = that.find(it->getKey(), it.getHash()); - if (!res_it) - func(it->getMapped(), it->getMapped(), false); - else - func(res_it->getMapped(), it->getMapped(), true); - } - } - - template <typename Func> - void forEachValue(Func && func) - { - for (auto & v : *this) - func(v.getKey(), v.getMapped()); - } - - template <typename Func> - void forEachMapped(Func && func) - { - for (auto & v : *this) - func(v.getMapped()); - } - - Mapped & ALWAYS_INLINE operator[](const Key & x) - { - LookupResult it; - bool inserted; - this->emplace(x, it, inserted); - if (inserted) - new (&it->getMapped()) Mapped(); - - return it->getMapped(); - } -}; + using Self = FixedHashMap; + using LookupResult = typename Base::LookupResult; + + using Base::Base; + + template <typename Func> + void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) + { + for (auto it = this->begin(), end = this->end(); it != end; ++it) + { + typename Self::LookupResult res_it; + bool inserted; + that.emplace(it->getKey(), res_it, inserted, it.getHash()); + func(res_it->getMapped(), it->getMapped(), inserted); + } + } + + template <typename Func> + void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) + { + for (auto it = this->begin(), end = this->end(); it != end; ++it) + { + auto res_it = that.find(it->getKey(), it.getHash()); + if (!res_it) + func(it->getMapped(), it->getMapped(), false); + else + func(res_it->getMapped(), it->getMapped(), true); + } + } + + template <typename Func> + void forEachValue(Func && func) + { + for (auto & v : *this) + func(v.getKey(), v.getMapped()); + } + + template <typename Func> + void forEachMapped(Func && func) + { + for (auto & v : *this) + func(v.getMapped()); + } + + Mapped & ALWAYS_INLINE operator[](const Key & x) + { + LookupResult it; + bool inserted; + this->emplace(x, it, inserted); + if (inserted) + new (&it->getMapped()) Mapped(); + + return it->getMapped(); + } +}; template <typename Key, typename Mapped, typename Allocator = HashTableAllocator> diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h index 2b9a5d61de..c1d2c8fe6e 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h @@ -1,52 +1,52 @@ -#pragma once - -#include <Common/HashTable/HashTable.h> - -namespace DB -{ - namespace ErrorCodes - { - extern const int NO_AVAILABLE_DATA; - } -} - -template <typename Key, typename TState = HashTableNoState> -struct FixedHashTableCell -{ - using State = TState; - - using value_type = Key; - using mapped_type = VoidMapped; - bool full; - +#pragma once + +#include <Common/HashTable/HashTable.h> + +namespace DB +{ + namespace ErrorCodes + { + extern const int NO_AVAILABLE_DATA; + } +} + +template <typename Key, typename TState = HashTableNoState> +struct FixedHashTableCell +{ + using State = TState; + + using value_type = Key; + using mapped_type = VoidMapped; + bool full; + FixedHashTableCell() {} //-V730 - FixedHashTableCell(const Key &, const State &) : full(true) {} - - const VoidKey getKey() const { return {}; } - VoidMapped getMapped() const { return {}; } - - bool isZero(const State &) const { return !full; } - void setZero() { full = false; } - static constexpr bool need_zero_value_storage = false; - + FixedHashTableCell(const Key &, const State &) : full(true) {} + + const VoidKey getKey() const { return {}; } + VoidMapped getMapped() const { return {}; } + + bool isZero(const State &) const { return !full; } + void setZero() { full = false; } + static constexpr bool need_zero_value_storage = false; + /// This Cell is only stored inside an iterator. It's used to accommodate the fact - /// that the iterator based API always provide a reference to a continuous memory - /// containing the Key. As a result, we have to instantiate a real Key field. - /// All methods that return a mutable reference to the Key field are named with - /// -Mutable suffix, indicating this is uncommon usage. As this is only for lookup - /// tables, it's totally fine to discard the Key mutations. - struct CellExt - { - Key key; - - const VoidKey getKey() const { return {}; } - VoidMapped getMapped() const { return {}; } - const value_type & getValue() const { return key; } - void update(Key && key_, FixedHashTableCell *) { key = key_; } - }; -}; - - + /// that the iterator based API always provide a reference to a continuous memory + /// containing the Key. As a result, we have to instantiate a real Key field. + /// All methods that return a mutable reference to the Key field are named with + /// -Mutable suffix, indicating this is uncommon usage. As this is only for lookup + /// tables, it's totally fine to discard the Key mutations. + struct CellExt + { + Key key; + + const VoidKey getKey() const { return {}; } + VoidMapped getMapped() const { return {}; } + const value_type & getValue() const { return key; } + void update(Key && key_, FixedHashTableCell *) { key = key_; } + }; +}; + + /// How to obtain the size of the table. template <typename Cell> @@ -88,394 +88,394 @@ struct FixedHashTableCalculatedSize }; -/** Used as a lookup table for small keys such as UInt8, UInt16. It's different - * than a HashTable in that keys are not stored in the Cell buf, but inferred - * inside each iterator. There are a bunch of to make it faster than using - * HashTable: a) It doesn't have a conflict chain; b) There is no key +/** Used as a lookup table for small keys such as UInt8, UInt16. It's different + * than a HashTable in that keys are not stored in the Cell buf, but inferred + * inside each iterator. There are a bunch of to make it faster than using + * HashTable: a) It doesn't have a conflict chain; b) There is no key * comparison; c) The number of cycles for checking cell empty is halved; d) - * Memory layout is tighter, especially the Clearable variants. - * - * NOTE: For Set variants this should always be better. For Map variants - * however, as we need to assemble the real cell inside each iterator, there - * might be some cases we fall short. - * - * TODO: Deprecate the cell API so that end users don't rely on the structure - * of cell. Instead iterator should be used for operations such as cell - * transfer, key updates (f.g. StringRef) and serde. This will allow - * TwoLevelHashSet(Map) to contain different type of sets(maps). - */ + * Memory layout is tighter, especially the Clearable variants. + * + * NOTE: For Set variants this should always be better. For Map variants + * however, as we need to assemble the real cell inside each iterator, there + * might be some cases we fall short. + * + * TODO: Deprecate the cell API so that end users don't rely on the structure + * of cell. Instead iterator should be used for operations such as cell + * transfer, key updates (f.g. StringRef) and serde. This will allow + * TwoLevelHashSet(Map) to contain different type of sets(maps). + */ template <typename Key, typename Cell, typename Size, typename Allocator> class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size -{ - static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8); - -protected: - friend class const_iterator; - friend class iterator; - friend class Reader; - - using Self = FixedHashTable; - - Cell * buf; /// A piece of memory for all elements. - - void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); } - - void free() - { - if (buf) - { - Allocator::free(buf, getBufferSizeInBytes()); - buf = nullptr; - } - } - - void destroyElements() - { - if (!std::is_trivially_destructible_v<Cell>) - for (iterator it = begin(), it_end = end(); it != it_end; ++it) - it.ptr->~Cell(); - } - - - template <typename Derived, bool is_const> - class iterator_base - { - using Container = std::conditional_t<is_const, const Self, Self>; - using cell_type = std::conditional_t<is_const, const Cell, Cell>; - - Container * container; - cell_type * ptr; - - friend class FixedHashTable; - - public: - iterator_base() {} - iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_) - { - cell.update(ptr - container->buf, ptr); - } - - bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; } - bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; } - - Derived & operator++() - { - ++ptr; - - /// Skip empty cells in the main buffer. - auto buf_end = container->buf + container->NUM_CELLS; - while (ptr < buf_end && ptr->isZero(*container)) - ++ptr; - - return static_cast<Derived &>(*this); - } - - auto & operator*() - { - if (cell.key != ptr - container->buf) - cell.update(ptr - container->buf, ptr); - return cell; - } - auto * operator-> () - { - if (cell.key != ptr - container->buf) - cell.update(ptr - container->buf, ptr); - return &cell; - } - - auto getPtr() const { return ptr; } - size_t getHash() const { return ptr - container->buf; } - size_t getCollisionChainLength() const { return 0; } - typename cell_type::CellExt cell; - }; - - -public: - using key_type = Key; - using mapped_type = typename Cell::mapped_type; - using value_type = typename Cell::value_type; - using cell_type = Cell; - - using LookupResult = Cell *; - using ConstLookupResult = const Cell *; - - - size_t hash(const Key & x) const { return x; } - - FixedHashTable() { alloc(); } - - FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); } - - ~FixedHashTable() - { - destroyElements(); - free(); - } - - FixedHashTable & operator=(FixedHashTable && rhs) - { - destroyElements(); - free(); - - std::swap(buf, rhs.buf); +{ + static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8); + +protected: + friend class const_iterator; + friend class iterator; + friend class Reader; + + using Self = FixedHashTable; + + Cell * buf; /// A piece of memory for all elements. + + void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); } + + void free() + { + if (buf) + { + Allocator::free(buf, getBufferSizeInBytes()); + buf = nullptr; + } + } + + void destroyElements() + { + if (!std::is_trivially_destructible_v<Cell>) + for (iterator it = begin(), it_end = end(); it != it_end; ++it) + it.ptr->~Cell(); + } + + + template <typename Derived, bool is_const> + class iterator_base + { + using Container = std::conditional_t<is_const, const Self, Self>; + using cell_type = std::conditional_t<is_const, const Cell, Cell>; + + Container * container; + cell_type * ptr; + + friend class FixedHashTable; + + public: + iterator_base() {} + iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_) + { + cell.update(ptr - container->buf, ptr); + } + + bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; } + bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; } + + Derived & operator++() + { + ++ptr; + + /// Skip empty cells in the main buffer. + auto buf_end = container->buf + container->NUM_CELLS; + while (ptr < buf_end && ptr->isZero(*container)) + ++ptr; + + return static_cast<Derived &>(*this); + } + + auto & operator*() + { + if (cell.key != ptr - container->buf) + cell.update(ptr - container->buf, ptr); + return cell; + } + auto * operator-> () + { + if (cell.key != ptr - container->buf) + cell.update(ptr - container->buf, ptr); + return &cell; + } + + auto getPtr() const { return ptr; } + size_t getHash() const { return ptr - container->buf; } + size_t getCollisionChainLength() const { return 0; } + typename cell_type::CellExt cell; + }; + + +public: + using key_type = Key; + using mapped_type = typename Cell::mapped_type; + using value_type = typename Cell::value_type; + using cell_type = Cell; + + using LookupResult = Cell *; + using ConstLookupResult = const Cell *; + + + size_t hash(const Key & x) const { return x; } + + FixedHashTable() { alloc(); } + + FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); } + + ~FixedHashTable() + { + destroyElements(); + free(); + } + + FixedHashTable & operator=(FixedHashTable && rhs) + { + destroyElements(); + free(); + + std::swap(buf, rhs.buf); this->setSize(rhs.size()); - - Allocator::operator=(std::move(rhs)); - Cell::State::operator=(std::move(rhs)); - - return *this; - } - - class Reader final : private Cell::State - { - public: - Reader(DB::ReadBuffer & in_) : in(in_) {} - - Reader(const Reader &) = delete; - Reader & operator=(const Reader &) = delete; - - bool next() - { - if (!is_initialized) - { - Cell::State::read(in); - DB::readVarUInt(size, in); - is_initialized = true; - } - - if (read_count == size) - { - is_eof = true; - return false; - } - - cell.read(in); - ++read_count; - - return true; - } - - inline const value_type & get() const - { - if (!is_initialized || is_eof) - throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); - - return cell.getValue(); - } - - private: - DB::ReadBuffer & in; - Cell cell; - size_t read_count = 0; + + Allocator::operator=(std::move(rhs)); + Cell::State::operator=(std::move(rhs)); + + return *this; + } + + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) : in(in_) {} + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (!is_initialized) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + is_initialized = true; + } + + if (read_count == size) + { + is_eof = true; + return false; + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if (!is_initialized || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return cell.getValue(); + } + + private: + DB::ReadBuffer & in; + Cell cell; + size_t read_count = 0; size_t size = 0; - bool is_eof = false; - bool is_initialized = false; - }; - - - class iterator : public iterator_base<iterator, false> - { - public: - using iterator_base<iterator, false>::iterator_base; - }; - - class const_iterator : public iterator_base<const_iterator, true> - { - public: - using iterator_base<const_iterator, true>::iterator_base; - }; - - - const_iterator begin() const - { - if (!buf) - return end(); - - const Cell * ptr = buf; - auto buf_end = buf + NUM_CELLS; - while (ptr < buf_end && ptr->isZero(*this)) - ++ptr; - - return const_iterator(this, ptr); - } - - const_iterator cbegin() const { return begin(); } - - iterator begin() - { - if (!buf) - return end(); - - Cell * ptr = buf; - auto buf_end = buf + NUM_CELLS; - while (ptr < buf_end && ptr->isZero(*this)) - ++ptr; - - return iterator(this, ptr); - } - - const_iterator end() const - { - /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C. - return const_iterator(this, buf ? buf + NUM_CELLS : buf); - } - - const_iterator cend() const - { - return end(); - } - - iterator end() - { - return iterator(this, buf ? buf + NUM_CELLS : buf); - } - - -public: - /// The last parameter is unused but exists for compatibility with HashTable interface. - void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0) - { - it = &buf[x]; - - if (!buf[x].isZero(*this)) - { - inserted = false; - return; - } - - new (&buf[x]) Cell(x, *this); - inserted = true; + bool is_eof = false; + bool is_initialized = false; + }; + + + class iterator : public iterator_base<iterator, false> + { + public: + using iterator_base<iterator, false>::iterator_base; + }; + + class const_iterator : public iterator_base<const_iterator, true> + { + public: + using iterator_base<const_iterator, true>::iterator_base; + }; + + + const_iterator begin() const + { + if (!buf) + return end(); + + const Cell * ptr = buf; + auto buf_end = buf + NUM_CELLS; + while (ptr < buf_end && ptr->isZero(*this)) + ++ptr; + + return const_iterator(this, ptr); + } + + const_iterator cbegin() const { return begin(); } + + iterator begin() + { + if (!buf) + return end(); + + Cell * ptr = buf; + auto buf_end = buf + NUM_CELLS; + while (ptr < buf_end && ptr->isZero(*this)) + ++ptr; + + return iterator(this, ptr); + } + + const_iterator end() const + { + /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C. + return const_iterator(this, buf ? buf + NUM_CELLS : buf); + } + + const_iterator cend() const + { + return end(); + } + + iterator end() + { + return iterator(this, buf ? buf + NUM_CELLS : buf); + } + + +public: + /// The last parameter is unused but exists for compatibility with HashTable interface. + void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0) + { + it = &buf[x]; + + if (!buf[x].isZero(*this)) + { + inserted = false; + return; + } + + new (&buf[x]) Cell(x, *this); + inserted = true; this->increaseSize(); - } - - std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x) - { - std::pair<LookupResult, bool> res; - emplace(Cell::getKey(x), res.first, res.second); - if (res.second) - insertSetMapped(res.first->getMapped(), x); - - return res; - } - - LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; } - - ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); } - - LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; } - - ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const - { - return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value); - } - - bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); } - bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); } - - void write(DB::WriteBuffer & wb) const - { - Cell::State::write(wb); + } + + std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x) + { + std::pair<LookupResult, bool> res; + emplace(Cell::getKey(x), res.first, res.second); + if (res.second) + insertSetMapped(res.first->getMapped(), x); + + return res; + } + + LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; } + + ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); } + + LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; } + + ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const + { + return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value); + } + + bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); } + bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); } + + void write(DB::WriteBuffer & wb) const + { + Cell::State::write(wb); DB::writeVarUInt(size(), wb); - - if (!buf) - return; - - for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) - { - if (!ptr->isZero(*this)) - { - DB::writeVarUInt(ptr - buf); - ptr->write(wb); - } - } - } - - void writeText(DB::WriteBuffer & wb) const - { - Cell::State::writeText(wb); + + if (!buf) + return; + + for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) + { + if (!ptr->isZero(*this)) + { + DB::writeVarUInt(ptr - buf); + ptr->write(wb); + } + } + } + + void writeText(DB::WriteBuffer & wb) const + { + Cell::State::writeText(wb); DB::writeText(size(), wb); - - if (!buf) - return; - - for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) - { - if (!ptr->isZero(*this)) - { - DB::writeChar(',', wb); - DB::writeText(ptr - buf, wb); - DB::writeChar(',', wb); - ptr->writeText(wb); - } - } - } - - void read(DB::ReadBuffer & rb) - { - Cell::State::read(rb); - destroyElements(); + + if (!buf) + return; + + for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) + { + if (!ptr->isZero(*this)) + { + DB::writeChar(',', wb); + DB::writeText(ptr - buf, wb); + DB::writeChar(',', wb); + ptr->writeText(wb); + } + } + } + + void read(DB::ReadBuffer & rb) + { + Cell::State::read(rb); + destroyElements(); size_t m_size; - DB::readVarUInt(m_size, rb); + DB::readVarUInt(m_size, rb); this->setSize(m_size); - free(); - alloc(); - - for (size_t i = 0; i < m_size; ++i) - { - size_t place_value = 0; - DB::readVarUInt(place_value, rb); - Cell x; - x.read(rb); - new (&buf[place_value]) Cell(x, *this); - } - } - - void readText(DB::ReadBuffer & rb) - { - Cell::State::readText(rb); - destroyElements(); + free(); + alloc(); + + for (size_t i = 0; i < m_size; ++i) + { + size_t place_value = 0; + DB::readVarUInt(place_value, rb); + Cell x; + x.read(rb); + new (&buf[place_value]) Cell(x, *this); + } + } + + void readText(DB::ReadBuffer & rb) + { + Cell::State::readText(rb); + destroyElements(); size_t m_size; - DB::readText(m_size, rb); + DB::readText(m_size, rb); this->setSize(m_size); - free(); - alloc(); - - for (size_t i = 0; i < m_size; ++i) - { - size_t place_value = 0; - DB::assertChar(',', rb); - DB::readText(place_value, rb); - Cell x; - DB::assertChar(',', rb); - x.readText(rb); - new (&buf[place_value]) Cell(x, *this); - } - } - + free(); + alloc(); + + for (size_t i = 0; i < m_size; ++i) + { + size_t place_value = 0; + DB::assertChar(',', rb); + DB::readText(place_value, rb); + Cell x; + DB::assertChar(',', rb); + x.readText(rb); + new (&buf[place_value]) Cell(x, *this); + } + } + size_t size() const { return this->getSize(buf, *this, NUM_CELLS); } bool empty() const { return this->isEmpty(buf, *this, NUM_CELLS); } - - void clear() - { - destroyElements(); + + void clear() + { + destroyElements(); this->clearSize(); - - memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf)); - } - - /// After executing this function, the table can only be destroyed, - /// and also you can use the methods `size`, `empty`, `begin`, `end`. - void clearAndShrink() - { - destroyElements(); + + memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf)); + } + + /// After executing this function, the table can only be destroyed, + /// and also you can use the methods `size`, `empty`, `begin`, `end`. + void clearAndShrink() + { + destroyElements(); this->clearSize(); - free(); - } - - size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); } - - size_t getBufferSizeInCells() const { return NUM_CELLS; } - + free(); + } + + size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); } + + size_t getBufferSizeInCells() const { return NUM_CELLS; } + /// Return offset for result in internal buffer. /// Result can have value up to `getBufferSizeInCells() + 1` /// because offset for zero value considered to be 0 @@ -490,7 +490,7 @@ public: const Cell * data() const { return buf; } Cell * data() { return buf; } -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS - size_t getCollisions() const { return 0; } -#endif -}; +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + size_t getCollisions() const { return 0; } +#endif +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h index dc922dde2f..298580dc83 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h @@ -1,189 +1,189 @@ -#pragma once - -#include <Common/HashTable/HashMap.h> -#include <Common/HashTable/HashTableAllocator.h> -#include <Common/HashTable/StringHashTable.h> - -template <typename Key, typename TMapped> -struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState> -{ - using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>; - using value_type = typename Base::value_type; - using Base::Base; - static constexpr bool need_zero_value_storage = false; - // external - const StringRef getKey() const { return toStringRef(this->value.first); } - // internal - static const Key & getKey(const value_type & value_) { return value_.first; } -}; - -template <typename TMapped> -struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState> -{ - using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>; - using value_type = typename Base::value_type; - using Base::Base; - static constexpr bool need_zero_value_storage = false; - bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } - - // Zero means unoccupied cells in hash table. Use key with last word = 0 as - // zero keys, because such keys are unrepresentable (no way to encode length). +#pragma once + +#include <Common/HashTable/HashMap.h> +#include <Common/HashTable/HashTableAllocator.h> +#include <Common/HashTable/StringHashTable.h> + +template <typename Key, typename TMapped> +struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState> +{ + using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>; + using value_type = typename Base::value_type; + using Base::Base; + static constexpr bool need_zero_value_storage = false; + // external + const StringRef getKey() const { return toStringRef(this->value.first); } + // internal + static const Key & getKey(const value_type & value_) { return value_.first; } +}; + +template <typename TMapped> +struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState> +{ + using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>; + using value_type = typename Base::value_type; + using Base::Base; + static constexpr bool need_zero_value_storage = false; + bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } + + // Zero means unoccupied cells in hash table. Use key with last word = 0 as + // zero keys, because such keys are unrepresentable (no way to encode length). static bool isZero(const StringKey16 & key, const HashTableNoState &) { return key.items[1] == 0; } void setZero() { this->value.first.items[1] = 0; } - - // external - const StringRef getKey() const { return toStringRef(this->value.first); } - // internal - static const StringKey16 & getKey(const value_type & value_) { return value_.first; } -}; - -template <typename TMapped> -struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState> -{ - using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>; - using value_type = typename Base::value_type; - using Base::Base; - static constexpr bool need_zero_value_storage = false; - bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } - - // Zero means unoccupied cells in hash table. Use key with last word = 0 as - // zero keys, because such keys are unrepresentable (no way to encode length). - static bool isZero(const StringKey24 & key, const HashTableNoState &) - { return key.c == 0; } - void setZero() { this->value.first.c = 0; } - - // external - const StringRef getKey() const { return toStringRef(this->value.first); } - // internal - static const StringKey24 & getKey(const value_type & value_) { return value_.first; } -}; - -template <typename TMapped> -struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState> -{ - using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>; - using value_type = typename Base::value_type; - using Base::Base; - static constexpr bool need_zero_value_storage = false; - // external - using Base::getKey; - // internal - static const StringRef & getKey(const value_type & value_) { return value_.first; } -}; - -template <typename TMapped, typename Allocator> -struct StringHashMapSubMaps -{ - using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>; - using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; - using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; - using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; - using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; -}; - -template <typename TMapped, typename Allocator = HashTableAllocator> -class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>> -{ -public: - using Key = StringRef; - using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>; - using Self = StringHashMap; - using LookupResult = typename Base::LookupResult; - - using Base::Base; - - /// Merge every cell's value of current map into the destination map. - /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). - /// Each filled cell in current map will invoke func once. If that map doesn't - /// have a key equals to the given cell, a new cell gets emplaced into that map, - /// and func is invoked with the third argument emplaced set to true. Otherwise - /// emplaced is set to false. - template <typename Func> - void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) - { - if (this->m0.hasZero() && that.m0.hasZero()) - func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false); - else if (this->m0.hasZero()) - { - that.m0.setHasZero(); - func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true); - } - this->m1.mergeToViaEmplace(that.m1, func); - this->m2.mergeToViaEmplace(that.m2, func); - this->m3.mergeToViaEmplace(that.m3, func); - this->ms.mergeToViaEmplace(that.ms, func); - } - - /// Merge every cell's value of current map into the destination map via find. - /// Func should have signature void(Mapped & dst, Mapped & src, bool exist). - /// Each filled cell in current map will invoke func once. If that map doesn't - /// have a key equals to the given cell, func is invoked with the third argument - /// exist set to false. Otherwise exist is set to true. - template <typename Func> - void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) - { - if (this->m0.size() && that.m0.size()) - func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true); - else if (this->m0.size()) - func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false); - this->m1.mergeToViaFind(that.m1, func); - this->m2.mergeToViaFind(that.m2, func); - this->m3.mergeToViaFind(that.m3, func); - this->ms.mergeToViaFind(that.ms, func); - } - - TMapped & ALWAYS_INLINE operator[](const Key & x) - { - LookupResult it; - bool inserted; - this->emplace(x, it, inserted); - if (inserted) - new (&it->getMapped()) TMapped(); - - return it->getMapped(); - } - - template <typename Func> - void ALWAYS_INLINE forEachValue(Func && func) - { - if (this->m0.size()) - { - func(StringRef{}, this->m0.zeroValue()->getMapped()); - } - - for (auto & v : this->m1) - { - func(v.getKey(), v.getMapped()); - } - - for (auto & v : this->m2) - { - func(v.getKey(), v.getMapped()); - } - - for (auto & v : this->m3) - { - func(v.getKey(), v.getMapped()); - } - - for (auto & v : this->ms) - { - func(v.getKey(), v.getMapped()); - } - } - - template <typename Func> - void ALWAYS_INLINE forEachMapped(Func && func) - { - if (this->m0.size()) - func(this->m0.zeroValue()->getMapped()); - for (auto & v : this->m1) - func(v.getMapped()); - for (auto & v : this->m2) - func(v.getMapped()); - for (auto & v : this->m3) - func(v.getMapped()); - for (auto & v : this->ms) - func(v.getMapped()); - } -}; + + // external + const StringRef getKey() const { return toStringRef(this->value.first); } + // internal + static const StringKey16 & getKey(const value_type & value_) { return value_.first; } +}; + +template <typename TMapped> +struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState> +{ + using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>; + using value_type = typename Base::value_type; + using Base::Base; + static constexpr bool need_zero_value_storage = false; + bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } + + // Zero means unoccupied cells in hash table. Use key with last word = 0 as + // zero keys, because such keys are unrepresentable (no way to encode length). + static bool isZero(const StringKey24 & key, const HashTableNoState &) + { return key.c == 0; } + void setZero() { this->value.first.c = 0; } + + // external + const StringRef getKey() const { return toStringRef(this->value.first); } + // internal + static const StringKey24 & getKey(const value_type & value_) { return value_.first; } +}; + +template <typename TMapped> +struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState> +{ + using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>; + using value_type = typename Base::value_type; + using Base::Base; + static constexpr bool need_zero_value_storage = false; + // external + using Base::getKey; + // internal + static const StringRef & getKey(const value_type & value_) { return value_.first; } +}; + +template <typename TMapped, typename Allocator> +struct StringHashMapSubMaps +{ + using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>; + using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; +}; + +template <typename TMapped, typename Allocator = HashTableAllocator> +class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>> +{ +public: + using Key = StringRef; + using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>; + using Self = StringHashMap; + using LookupResult = typename Base::LookupResult; + + using Base::Base; + + /// Merge every cell's value of current map into the destination map. + /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). + /// Each filled cell in current map will invoke func once. If that map doesn't + /// have a key equals to the given cell, a new cell gets emplaced into that map, + /// and func is invoked with the third argument emplaced set to true. Otherwise + /// emplaced is set to false. + template <typename Func> + void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) + { + if (this->m0.hasZero() && that.m0.hasZero()) + func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false); + else if (this->m0.hasZero()) + { + that.m0.setHasZero(); + func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true); + } + this->m1.mergeToViaEmplace(that.m1, func); + this->m2.mergeToViaEmplace(that.m2, func); + this->m3.mergeToViaEmplace(that.m3, func); + this->ms.mergeToViaEmplace(that.ms, func); + } + + /// Merge every cell's value of current map into the destination map via find. + /// Func should have signature void(Mapped & dst, Mapped & src, bool exist). + /// Each filled cell in current map will invoke func once. If that map doesn't + /// have a key equals to the given cell, func is invoked with the third argument + /// exist set to false. Otherwise exist is set to true. + template <typename Func> + void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) + { + if (this->m0.size() && that.m0.size()) + func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true); + else if (this->m0.size()) + func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false); + this->m1.mergeToViaFind(that.m1, func); + this->m2.mergeToViaFind(that.m2, func); + this->m3.mergeToViaFind(that.m3, func); + this->ms.mergeToViaFind(that.ms, func); + } + + TMapped & ALWAYS_INLINE operator[](const Key & x) + { + LookupResult it; + bool inserted; + this->emplace(x, it, inserted); + if (inserted) + new (&it->getMapped()) TMapped(); + + return it->getMapped(); + } + + template <typename Func> + void ALWAYS_INLINE forEachValue(Func && func) + { + if (this->m0.size()) + { + func(StringRef{}, this->m0.zeroValue()->getMapped()); + } + + for (auto & v : this->m1) + { + func(v.getKey(), v.getMapped()); + } + + for (auto & v : this->m2) + { + func(v.getKey(), v.getMapped()); + } + + for (auto & v : this->m3) + { + func(v.getKey(), v.getMapped()); + } + + for (auto & v : this->ms) + { + func(v.getKey(), v.getMapped()); + } + } + + template <typename Func> + void ALWAYS_INLINE forEachMapped(Func && func) + { + if (this->m0.size()) + func(this->m0.zeroValue()->getMapped()); + for (auto & v : this->m1) + func(v.getMapped()); + for (auto & v : this->m2) + func(v.getMapped()); + for (auto & v : this->m3) + func(v.getMapped()); + for (auto & v : this->ms) + func(v.getMapped()); + } +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h index 289195267c..d30271d65d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h @@ -1,227 +1,227 @@ -#pragma once - -#include <Common/HashTable/HashMap.h> -#include <Common/HashTable/HashTable.h> - +#pragma once + +#include <Common/HashTable/HashMap.h> +#include <Common/HashTable/HashTable.h> + #include <new> -#include <variant> - - -using StringKey8 = UInt64; -using StringKey16 = DB::UInt128; -struct StringKey24 -{ - UInt64 a; - UInt64 b; - UInt64 c; - - bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } -}; - -inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n) -{ - assert(n != 0); - return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)}; -} -inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n) -{ +#include <variant> + + +using StringKey8 = UInt64; +using StringKey16 = DB::UInt128; +struct StringKey24 +{ + UInt64 a; + UInt64 b; + UInt64 c; + + bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } +}; + +inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n) +{ + assert(n != 0); + return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)}; +} +inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n) +{ assert(n.items[1] != 0); return {reinterpret_cast<const char *>(&n), 16ul - (__builtin_clzll(n.items[1]) >> 3)}; -} -inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n) -{ - assert(n.c != 0); - return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; -} - -struct StringHashTableHash -{ -#if defined(__SSE4_2__) - size_t ALWAYS_INLINE operator()(StringKey8 key) const - { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key); - return res; - } - size_t ALWAYS_INLINE operator()(StringKey16 key) const - { - size_t res = -1ULL; +} +inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n) +{ + assert(n.c != 0); + return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; +} + +struct StringHashTableHash +{ +#if defined(__SSE4_2__) + size_t ALWAYS_INLINE operator()(StringKey8 key) const + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key); + return res; + } + size_t ALWAYS_INLINE operator()(StringKey16 key) const + { + size_t res = -1ULL; res = _mm_crc32_u64(res, key.items[0]); res = _mm_crc32_u64(res, key.items[1]); - return res; - } - size_t ALWAYS_INLINE operator()(StringKey24 key) const - { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key.a); - res = _mm_crc32_u64(res, key.b); - res = _mm_crc32_u64(res, key.c); - return res; - } -#else - size_t ALWAYS_INLINE operator()(StringKey8 key) const - { - return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 8); - } - size_t ALWAYS_INLINE operator()(StringKey16 key) const - { - return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 16); - } - size_t ALWAYS_INLINE operator()(StringKey24 key) const - { - return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 24); - } -#endif - size_t ALWAYS_INLINE operator()(StringRef key) const - { - return StringRefHash()(key); - } -}; - -template <typename Cell> + return res; + } + size_t ALWAYS_INLINE operator()(StringKey24 key) const + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.a); + res = _mm_crc32_u64(res, key.b); + res = _mm_crc32_u64(res, key.c); + return res; + } +#else + size_t ALWAYS_INLINE operator()(StringKey8 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 8); + } + size_t ALWAYS_INLINE operator()(StringKey16 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 16); + } + size_t ALWAYS_INLINE operator()(StringKey24 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 24); + } +#endif + size_t ALWAYS_INLINE operator()(StringRef key) const + { + return StringRefHash()(key); + } +}; + +template <typename Cell> struct StringHashTableEmpty //-V730 -{ - using Self = StringHashTableEmpty; - - bool has_zero = false; - std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key. - -public: - bool hasZero() const { return has_zero; } - - void setHasZero() - { - has_zero = true; - new (zeroValue()) Cell(); - } - - void setHasZero(const Cell & other) - { - has_zero = true; - new (zeroValue()) Cell(other); - } - - void clearHasZero() - { - has_zero = false; - if (!std::is_trivially_destructible_v<Cell>) - zeroValue()->~Cell(); - } - +{ + using Self = StringHashTableEmpty; + + bool has_zero = false; + std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key. + +public: + bool hasZero() const { return has_zero; } + + void setHasZero() + { + has_zero = true; + new (zeroValue()) Cell(); + } + + void setHasZero(const Cell & other) + { + has_zero = true; + new (zeroValue()) Cell(other); + } + + void clearHasZero() + { + has_zero = false; + if (!std::is_trivially_destructible_v<Cell>) + zeroValue()->~Cell(); + } + Cell * zeroValue() { return std::launder(reinterpret_cast<Cell *>(&zero_value_storage)); } const Cell * zeroValue() const { return std::launder(reinterpret_cast<const Cell *>(&zero_value_storage)); } - - using LookupResult = Cell *; - using ConstLookupResult = const Cell *; - - template <typename KeyHolder> - void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0) - { - if (!hasZero()) - { - setHasZero(); - inserted = true; - } - else - inserted = false; - it = zeroValue(); - } - - template <typename Key> - LookupResult ALWAYS_INLINE find(const Key &, size_t = 0) - { - return hasZero() ? zeroValue() : nullptr; - } - - template <typename Key> - ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const - { - return hasZero() ? zeroValue() : nullptr; - } - - void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } - void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } - void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); } - void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); } - size_t size() const { return hasZero() ? 1 : 0; } - bool empty() const { return !hasZero(); } - size_t getBufferSizeInBytes() const { return sizeof(Cell); } - size_t getCollisions() const { return 0; } -}; - -template <size_t initial_size_degree = 8> -struct StringHashTableGrower : public HashTableGrower<initial_size_degree> -{ - // Smooth growing for string maps - void increaseSize() { this->size_degree += 1; } -}; - -template <typename Mapped> -struct StringHashTableLookupResult -{ - Mapped * mapped_ptr; - StringHashTableLookupResult() {} - StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {} - StringHashTableLookupResult(std::nullptr_t) {} - const VoidKey getKey() const { return {}; } - auto & getMapped() { return *mapped_ptr; } - auto & operator*() { return *this; } - auto & operator*() const { return *this; } - auto * operator->() { return this; } - auto * operator->() const { return this; } - operator bool() const { return mapped_ptr; } - friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; } - friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; } - friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; } - friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; } -}; - -template <typename SubMaps> -class StringHashTable : private boost::noncopyable -{ -protected: - static constexpr size_t NUM_MAPS = 5; - // Map for storing empty string - using T0 = typename SubMaps::T0; - - // Short strings are stored as numbers - using T1 = typename SubMaps::T1; - using T2 = typename SubMaps::T2; - using T3 = typename SubMaps::T3; - - // Long strings are stored as StringRef along with saved hash - using Ts = typename SubMaps::Ts; - using Self = StringHashTable; - - template <typename, typename, size_t> - friend class TwoLevelStringHashTable; - - T0 m0; - T1 m1; - T2 m2; - T3 m3; - Ts ms; - -public: - using Key = StringRef; - using key_type = Key; - using mapped_type = typename Ts::mapped_type; - using value_type = typename Ts::value_type; - using cell_type = typename Ts::cell_type; - - using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>; - using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>; - + + using LookupResult = Cell *; + using ConstLookupResult = const Cell *; + + template <typename KeyHolder> + void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0) + { + if (!hasZero()) + { + setHasZero(); + inserted = true; + } + else + inserted = false; + it = zeroValue(); + } + + template <typename Key> + LookupResult ALWAYS_INLINE find(const Key &, size_t = 0) + { + return hasZero() ? zeroValue() : nullptr; + } + + template <typename Key> + ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const + { + return hasZero() ? zeroValue() : nullptr; + } + + void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } + void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } + void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); } + void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); } + size_t size() const { return hasZero() ? 1 : 0; } + bool empty() const { return !hasZero(); } + size_t getBufferSizeInBytes() const { return sizeof(Cell); } + size_t getCollisions() const { return 0; } +}; + +template <size_t initial_size_degree = 8> +struct StringHashTableGrower : public HashTableGrower<initial_size_degree> +{ + // Smooth growing for string maps + void increaseSize() { this->size_degree += 1; } +}; + +template <typename Mapped> +struct StringHashTableLookupResult +{ + Mapped * mapped_ptr; + StringHashTableLookupResult() {} + StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {} + StringHashTableLookupResult(std::nullptr_t) {} + const VoidKey getKey() const { return {}; } + auto & getMapped() { return *mapped_ptr; } + auto & operator*() { return *this; } + auto & operator*() const { return *this; } + auto * operator->() { return this; } + auto * operator->() const { return this; } + operator bool() const { return mapped_ptr; } + friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; } + friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; } + friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; } + friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; } +}; + +template <typename SubMaps> +class StringHashTable : private boost::noncopyable +{ +protected: + static constexpr size_t NUM_MAPS = 5; + // Map for storing empty string + using T0 = typename SubMaps::T0; + + // Short strings are stored as numbers + using T1 = typename SubMaps::T1; + using T2 = typename SubMaps::T2; + using T3 = typename SubMaps::T3; + + // Long strings are stored as StringRef along with saved hash + using Ts = typename SubMaps::Ts; + using Self = StringHashTable; + + template <typename, typename, size_t> + friend class TwoLevelStringHashTable; + + T0 m0; + T1 m1; + T2 m2; + T3 m3; + Ts ms; + +public: + using Key = StringRef; + using key_type = Key; + using mapped_type = typename Ts::mapped_type; + using value_type = typename Ts::value_type; + using cell_type = typename Ts::cell_type; + + using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>; + using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>; + StringHashTable() = default; - - StringHashTable(size_t reserve_for_num_elements) - : m1{reserve_for_num_elements / 4} - , m2{reserve_for_num_elements / 4} - , m3{reserve_for_num_elements / 4} - , ms{reserve_for_num_elements / 4} - { - } - + + StringHashTable(size_t reserve_for_num_elements) + : m1{reserve_for_num_elements / 4} + , m2{reserve_for_num_elements / 4} + , m3{reserve_for_num_elements / 4} + , ms{reserve_for_num_elements / 4} + { + } + StringHashTable(StringHashTable && rhs) : m1(std::move(rhs.m1)) , m2(std::move(rhs.m2)) @@ -229,207 +229,207 @@ public: , ms(std::move(rhs.ms)) { } - + ~StringHashTable() = default; -public: - // Dispatch is written in a way that maximizes the performance: - // 1. Always memcpy 8 times bytes - // 2. Use switch case extension to generate fast dispatching table - // 3. Funcs are named callables that can be force_inlined +public: + // Dispatch is written in a way that maximizes the performance: + // 1. Always memcpy 8 times bytes + // 2. Use switch case extension to generate fast dispatching table + // 3. Funcs are named callables that can be force_inlined // - // NOTE: It relies on Little Endianness + // NOTE: It relies on Little Endianness // // NOTE: It requires padded to 8 bytes keys (IOW you cannot pass // std::string here, but you can pass i.e. ColumnString::getDataAt()), // since it copies 8 bytes at a time. - template <typename Self, typename KeyHolder, typename Func> - static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) - { - StringHashTableHash hash; - const StringRef & x = keyHolderGetKey(key_holder); - const size_t sz = x.size; - if (sz == 0) - { - keyHolderDiscardKey(key_holder); - return func(self.m0, VoidKey{}, 0); - } - - if (x.data[sz - 1] == 0) - { - // Strings with trailing zeros are not representable as fixed-size - // string keys. Put them to the generic table. - return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x)); - } - - const char * p = x.data; - // pending bits that needs to be shifted out - const char s = (-sz & 7) * 8; - union - { - StringKey8 k8; - StringKey16 k16; - StringKey24 k24; - UInt64 n[3]; - }; - switch ((sz - 1) >> 3) - { - case 0: // 1..8 bytes - { - // first half page - if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0) - { - memcpy(&n[0], p, 8); - n[0] &= -1ul >> s; - } - else - { - const char * lp = x.data + x.size - 8; - memcpy(&n[0], lp, 8); - n[0] >>= s; - } - keyHolderDiscardKey(key_holder); - return func(self.m1, k8, hash(k8)); - } - case 1: // 9..16 bytes - { - memcpy(&n[0], p, 8); - const char * lp = x.data + x.size - 8; - memcpy(&n[1], lp, 8); - n[1] >>= s; - keyHolderDiscardKey(key_holder); - return func(self.m2, k16, hash(k16)); - } - case 2: // 17..24 bytes - { - memcpy(&n[0], p, 16); - const char * lp = x.data + x.size - 8; - memcpy(&n[2], lp, 8); - n[2] >>= s; - keyHolderDiscardKey(key_holder); - return func(self.m3, k24, hash(k24)); - } - default: // >= 25 bytes - { - return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x)); - } - } - } - - struct EmplaceCallable - { - LookupResult & mapped; - bool & inserted; - - EmplaceCallable(LookupResult & mapped_, bool & inserted_) - : mapped(mapped_), inserted(inserted_) {} - - template <typename Map, typename KeyHolder> - void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash) - { - typename Map::LookupResult result; - map.emplace(key_holder, result, inserted, hash); - mapped = &result->getMapped(); - } - }; - - template <typename KeyHolder> - void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) - { - this->dispatch(*this, key_holder, EmplaceCallable(it, inserted)); - } - - struct FindCallable - { - // find() doesn't need any key memory management, so we don't work with - // any key holders here, only with normal keys. The key type is still - // different for every subtable, this is why it is a template parameter. - template <typename Submap, typename SubmapKey> - auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash) - { - auto it = map.find(key, hash); - if (!it) - return decltype(&it->getMapped()){}; - else - return &it->getMapped(); - } - }; - - LookupResult ALWAYS_INLINE find(const Key & x) - { - return dispatch(*this, x, FindCallable{}); - } - - ConstLookupResult ALWAYS_INLINE find(const Key & x) const - { - return dispatch(*this, x, FindCallable{}); - } - - bool ALWAYS_INLINE has(const Key & x, size_t = 0) const - { - return dispatch(*this, x, FindCallable{}) != nullptr; - } - - void write(DB::WriteBuffer & wb) const - { - m0.write(wb); - m1.write(wb); - m2.write(wb); - m3.write(wb); - ms.write(wb); - } - - void writeText(DB::WriteBuffer & wb) const - { - m0.writeText(wb); - DB::writeChar(',', wb); - m1.writeText(wb); - DB::writeChar(',', wb); - m2.writeText(wb); - DB::writeChar(',', wb); - m3.writeText(wb); - DB::writeChar(',', wb); - ms.writeText(wb); - } - - void read(DB::ReadBuffer & rb) - { - m0.read(rb); - m1.read(rb); - m2.read(rb); - m3.read(rb); - ms.read(rb); - } - - void readText(DB::ReadBuffer & rb) - { - m0.readText(rb); - DB::assertChar(',', rb); - m1.readText(rb); - DB::assertChar(',', rb); - m2.readText(rb); - DB::assertChar(',', rb); - m3.readText(rb); - DB::assertChar(',', rb); - ms.readText(rb); - } - - size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); } - - bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); } - - size_t getBufferSizeInBytes() const - { - return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes() - + ms.getBufferSizeInBytes(); - } - - void clearAndShrink() - { - m1.clearHasZero(); - m1.clearAndShrink(); - m2.clearAndShrink(); - m3.clearAndShrink(); - ms.clearAndShrink(); - } -}; + template <typename Self, typename KeyHolder, typename Func> + static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) + { + StringHashTableHash hash; + const StringRef & x = keyHolderGetKey(key_holder); + const size_t sz = x.size; + if (sz == 0) + { + keyHolderDiscardKey(key_holder); + return func(self.m0, VoidKey{}, 0); + } + + if (x.data[sz - 1] == 0) + { + // Strings with trailing zeros are not representable as fixed-size + // string keys. Put them to the generic table. + return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x)); + } + + const char * p = x.data; + // pending bits that needs to be shifted out + const char s = (-sz & 7) * 8; + union + { + StringKey8 k8; + StringKey16 k16; + StringKey24 k24; + UInt64 n[3]; + }; + switch ((sz - 1) >> 3) + { + case 0: // 1..8 bytes + { + // first half page + if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0) + { + memcpy(&n[0], p, 8); + n[0] &= -1ul >> s; + } + else + { + const char * lp = x.data + x.size - 8; + memcpy(&n[0], lp, 8); + n[0] >>= s; + } + keyHolderDiscardKey(key_holder); + return func(self.m1, k8, hash(k8)); + } + case 1: // 9..16 bytes + { + memcpy(&n[0], p, 8); + const char * lp = x.data + x.size - 8; + memcpy(&n[1], lp, 8); + n[1] >>= s; + keyHolderDiscardKey(key_holder); + return func(self.m2, k16, hash(k16)); + } + case 2: // 17..24 bytes + { + memcpy(&n[0], p, 16); + const char * lp = x.data + x.size - 8; + memcpy(&n[2], lp, 8); + n[2] >>= s; + keyHolderDiscardKey(key_holder); + return func(self.m3, k24, hash(k24)); + } + default: // >= 25 bytes + { + return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x)); + } + } + } + + struct EmplaceCallable + { + LookupResult & mapped; + bool & inserted; + + EmplaceCallable(LookupResult & mapped_, bool & inserted_) + : mapped(mapped_), inserted(inserted_) {} + + template <typename Map, typename KeyHolder> + void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash) + { + typename Map::LookupResult result; + map.emplace(key_holder, result, inserted, hash); + mapped = &result->getMapped(); + } + }; + + template <typename KeyHolder> + void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) + { + this->dispatch(*this, key_holder, EmplaceCallable(it, inserted)); + } + + struct FindCallable + { + // find() doesn't need any key memory management, so we don't work with + // any key holders here, only with normal keys. The key type is still + // different for every subtable, this is why it is a template parameter. + template <typename Submap, typename SubmapKey> + auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash) + { + auto it = map.find(key, hash); + if (!it) + return decltype(&it->getMapped()){}; + else + return &it->getMapped(); + } + }; + + LookupResult ALWAYS_INLINE find(const Key & x) + { + return dispatch(*this, x, FindCallable{}); + } + + ConstLookupResult ALWAYS_INLINE find(const Key & x) const + { + return dispatch(*this, x, FindCallable{}); + } + + bool ALWAYS_INLINE has(const Key & x, size_t = 0) const + { + return dispatch(*this, x, FindCallable{}) != nullptr; + } + + void write(DB::WriteBuffer & wb) const + { + m0.write(wb); + m1.write(wb); + m2.write(wb); + m3.write(wb); + ms.write(wb); + } + + void writeText(DB::WriteBuffer & wb) const + { + m0.writeText(wb); + DB::writeChar(',', wb); + m1.writeText(wb); + DB::writeChar(',', wb); + m2.writeText(wb); + DB::writeChar(',', wb); + m3.writeText(wb); + DB::writeChar(',', wb); + ms.writeText(wb); + } + + void read(DB::ReadBuffer & rb) + { + m0.read(rb); + m1.read(rb); + m2.read(rb); + m3.read(rb); + ms.read(rb); + } + + void readText(DB::ReadBuffer & rb) + { + m0.readText(rb); + DB::assertChar(',', rb); + m1.readText(rb); + DB::assertChar(',', rb); + m2.readText(rb); + DB::assertChar(',', rb); + m3.readText(rb); + DB::assertChar(',', rb); + ms.readText(rb); + } + + size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); } + + bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); } + + size_t getBufferSizeInBytes() const + { + return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes() + + ms.getBufferSizeInBytes(); + } + + void clearAndShrink() + { + m1.clearHasZero(); + m1.clearAndShrink(); + m2.clearAndShrink(); + m3.clearAndShrink(); + ms.clearAndShrink(); + } +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h index bd59ec714b..7bebf0d8af 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h @@ -1,66 +1,66 @@ -#pragma once - -#include <Common/HashTable/TwoLevelHashTable.h> -#include <Common/HashTable/HashMap.h> - - -template -< - typename Key, - typename Cell, - typename Hash = DefaultHash<Key>, - typename Grower = TwoLevelHashTableGrower<>, - typename Allocator = HashTableAllocator, - template <typename ...> typename ImplTable = HashMapTable -> -class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>> -{ -public: - using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>; - using LookupResult = typename Impl::LookupResult; - - using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>::TwoLevelHashTable; - - template <typename Func> - void ALWAYS_INLINE forEachMapped(Func && func) - { - for (auto i = 0u; i < this->NUM_BUCKETS; ++i) - this->impls[i].forEachMapped(func); - } - - typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) - { - LookupResult it; - bool inserted; - this->emplace(x, it, inserted); - - if (inserted) - new (&it->getMapped()) typename Cell::Mapped(); - - return it->getMapped(); - } -}; - - -template -< - typename Key, - typename Mapped, - typename Hash = DefaultHash<Key>, - typename Grower = TwoLevelHashTableGrower<>, - typename Allocator = HashTableAllocator, - template <typename ...> typename ImplTable = HashMapTable -> -using TwoLevelHashMap = TwoLevelHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>; - - -template -< - typename Key, - typename Mapped, - typename Hash = DefaultHash<Key>, - typename Grower = TwoLevelHashTableGrower<>, - typename Allocator = HashTableAllocator, - template <typename ...> typename ImplTable = HashMapTable -> -using TwoLevelHashMapWithSavedHash = TwoLevelHashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>; +#pragma once + +#include <Common/HashTable/TwoLevelHashTable.h> +#include <Common/HashTable/HashMap.h> + + +template +< + typename Key, + typename Cell, + typename Hash = DefaultHash<Key>, + typename Grower = TwoLevelHashTableGrower<>, + typename Allocator = HashTableAllocator, + template <typename ...> typename ImplTable = HashMapTable +> +class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>> +{ +public: + using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>; + using LookupResult = typename Impl::LookupResult; + + using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>::TwoLevelHashTable; + + template <typename Func> + void ALWAYS_INLINE forEachMapped(Func && func) + { + for (auto i = 0u; i < this->NUM_BUCKETS; ++i) + this->impls[i].forEachMapped(func); + } + + typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) + { + LookupResult it; + bool inserted; + this->emplace(x, it, inserted); + + if (inserted) + new (&it->getMapped()) typename Cell::Mapped(); + + return it->getMapped(); + } +}; + + +template +< + typename Key, + typename Mapped, + typename Hash = DefaultHash<Key>, + typename Grower = TwoLevelHashTableGrower<>, + typename Allocator = HashTableAllocator, + template <typename ...> typename ImplTable = HashMapTable +> +using TwoLevelHashMap = TwoLevelHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>; + + +template +< + typename Key, + typename Mapped, + typename Hash = DefaultHash<Key>, + typename Grower = TwoLevelHashTableGrower<>, + typename Allocator = HashTableAllocator, + template <typename ...> typename ImplTable = HashMapTable +> +using TwoLevelHashMapWithSavedHash = TwoLevelHashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h index 2376e08d5f..14afb91c07 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h @@ -1,335 +1,335 @@ -#pragma once - -#include <Common/HashTable/HashTable.h> - - -/** Two-level hash table. - * Represents 256 (or 1ULL << BITS_FOR_BUCKET) small hash tables (buckets of the first level). - * To determine which one to use, one of the bytes of the hash function is taken. - * - * Usually works a little slower than a simple hash table. - * However, it has advantages in some cases: - * - if you need to merge two hash tables together, then you can easily parallelize it by buckets; - * - delay during resizes is amortized, since the small hash tables will be resized separately; - * - in theory, resizes are cache-local in a larger range of sizes. - */ - -template <size_t initial_size_degree = 8> -struct TwoLevelHashTableGrower : public HashTableGrower<initial_size_degree> -{ - /// Increase the size of the hash table. - void increaseSize() - { - this->size_degree += this->size_degree >= 15 ? 1 : 2; - } -}; - -template -< - typename Key, - typename Cell, - typename Hash, - typename Grower, - typename Allocator, - typename ImplTable = HashTable<Key, Cell, Hash, Grower, Allocator>, - size_t BITS_FOR_BUCKET = 8 -> -class TwoLevelHashTable : - private boost::noncopyable, - protected Hash /// empty base optimization -{ -protected: - friend class const_iterator; - friend class iterator; - - using HashValue = size_t; - using Self = TwoLevelHashTable; -public: - using Impl = ImplTable; - - static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; - static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; - - size_t hash(const Key & x) const { return Hash::operator()(x); } - - /// NOTE Bad for hash tables with more than 2^32 cells. - static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } - -protected: - typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket) - { - while (bucket != NUM_BUCKETS && impls[bucket].empty()) - ++bucket; - - if (bucket != NUM_BUCKETS) - return impls[bucket].begin(); - - --bucket; - return impls[MAX_BUCKET].end(); - } - - typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const - { - while (bucket != NUM_BUCKETS && impls[bucket].empty()) - ++bucket; - - if (bucket != NUM_BUCKETS) - return impls[bucket].begin(); - - --bucket; - return impls[MAX_BUCKET].end(); - } - -public: - using key_type = typename Impl::key_type; - using mapped_type = typename Impl::mapped_type; - using value_type = typename Impl::value_type; - using cell_type = typename Impl::cell_type; - - using LookupResult = typename Impl::LookupResult; - using ConstLookupResult = typename Impl::ConstLookupResult; - - Impl impls[NUM_BUCKETS]; - - - TwoLevelHashTable() {} - - /// Copy the data from another (normal) hash table. It should have the same hash function. - template <typename Source> - TwoLevelHashTable(const Source & src) - { - typename Source::const_iterator it = src.begin(); - - /// It is assumed that the zero key (stored separately) is first in iteration order. - if (it != src.end() && it.getPtr()->isZero(src)) - { - insert(it->getValue()); - ++it; - } - - for (; it != src.end(); ++it) - { - const Cell * cell = it.getPtr(); - size_t hash_value = cell->getHash(src); - size_t buck = getBucketFromHash(hash_value); - impls[buck].insertUniqueNonZero(cell, hash_value); - } - } - - - class iterator - { +#pragma once + +#include <Common/HashTable/HashTable.h> + + +/** Two-level hash table. + * Represents 256 (or 1ULL << BITS_FOR_BUCKET) small hash tables (buckets of the first level). + * To determine which one to use, one of the bytes of the hash function is taken. + * + * Usually works a little slower than a simple hash table. + * However, it has advantages in some cases: + * - if you need to merge two hash tables together, then you can easily parallelize it by buckets; + * - delay during resizes is amortized, since the small hash tables will be resized separately; + * - in theory, resizes are cache-local in a larger range of sizes. + */ + +template <size_t initial_size_degree = 8> +struct TwoLevelHashTableGrower : public HashTableGrower<initial_size_degree> +{ + /// Increase the size of the hash table. + void increaseSize() + { + this->size_degree += this->size_degree >= 15 ? 1 : 2; + } +}; + +template +< + typename Key, + typename Cell, + typename Hash, + typename Grower, + typename Allocator, + typename ImplTable = HashTable<Key, Cell, Hash, Grower, Allocator>, + size_t BITS_FOR_BUCKET = 8 +> +class TwoLevelHashTable : + private boost::noncopyable, + protected Hash /// empty base optimization +{ +protected: + friend class const_iterator; + friend class iterator; + + using HashValue = size_t; + using Self = TwoLevelHashTable; +public: + using Impl = ImplTable; + + static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; + static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + + size_t hash(const Key & x) const { return Hash::operator()(x); } + + /// NOTE Bad for hash tables with more than 2^32 cells. + static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } + +protected: + typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket) + { + while (bucket != NUM_BUCKETS && impls[bucket].empty()) + ++bucket; + + if (bucket != NUM_BUCKETS) + return impls[bucket].begin(); + + --bucket; + return impls[MAX_BUCKET].end(); + } + + typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const + { + while (bucket != NUM_BUCKETS && impls[bucket].empty()) + ++bucket; + + if (bucket != NUM_BUCKETS) + return impls[bucket].begin(); + + --bucket; + return impls[MAX_BUCKET].end(); + } + +public: + using key_type = typename Impl::key_type; + using mapped_type = typename Impl::mapped_type; + using value_type = typename Impl::value_type; + using cell_type = typename Impl::cell_type; + + using LookupResult = typename Impl::LookupResult; + using ConstLookupResult = typename Impl::ConstLookupResult; + + Impl impls[NUM_BUCKETS]; + + + TwoLevelHashTable() {} + + /// Copy the data from another (normal) hash table. It should have the same hash function. + template <typename Source> + TwoLevelHashTable(const Source & src) + { + typename Source::const_iterator it = src.begin(); + + /// It is assumed that the zero key (stored separately) is first in iteration order. + if (it != src.end() && it.getPtr()->isZero(src)) + { + insert(it->getValue()); + ++it; + } + + for (; it != src.end(); ++it) + { + const Cell * cell = it.getPtr(); + size_t hash_value = cell->getHash(src); + size_t buck = getBucketFromHash(hash_value); + impls[buck].insertUniqueNonZero(cell, hash_value); + } + } + + + class iterator + { Self * container{}; size_t bucket{}; typename Impl::iterator current_it{}; - - friend class TwoLevelHashTable; - - iterator(Self * container_, size_t bucket_, typename Impl::iterator current_it_) - : container(container_), bucket(bucket_), current_it(current_it_) {} - - public: - iterator() {} - - bool operator== (const iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; } - bool operator!= (const iterator & rhs) const { return !(*this == rhs); } - - iterator & operator++() - { - ++current_it; - if (current_it == container->impls[bucket].end()) - { - ++bucket; - current_it = container->beginOfNextNonEmptyBucket(bucket); - } - - return *this; - } - - Cell & operator* () const { return *current_it; } - Cell * operator->() const { return current_it.getPtr(); } - - Cell * getPtr() const { return current_it.getPtr(); } - size_t getHash() const { return current_it.getHash(); } - }; - - - class const_iterator - { + + friend class TwoLevelHashTable; + + iterator(Self * container_, size_t bucket_, typename Impl::iterator current_it_) + : container(container_), bucket(bucket_), current_it(current_it_) {} + + public: + iterator() {} + + bool operator== (const iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; } + bool operator!= (const iterator & rhs) const { return !(*this == rhs); } + + iterator & operator++() + { + ++current_it; + if (current_it == container->impls[bucket].end()) + { + ++bucket; + current_it = container->beginOfNextNonEmptyBucket(bucket); + } + + return *this; + } + + Cell & operator* () const { return *current_it; } + Cell * operator->() const { return current_it.getPtr(); } + + Cell * getPtr() const { return current_it.getPtr(); } + size_t getHash() const { return current_it.getHash(); } + }; + + + class const_iterator + { Self * container{}; size_t bucket{}; typename Impl::const_iterator current_it{}; - - friend class TwoLevelHashTable; - - const_iterator(Self * container_, size_t bucket_, typename Impl::const_iterator current_it_) - : container(container_), bucket(bucket_), current_it(current_it_) {} - - public: - const_iterator() {} - const_iterator(const iterator & rhs) : container(rhs.container), bucket(rhs.bucket), current_it(rhs.current_it) {} - - bool operator== (const const_iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; } - bool operator!= (const const_iterator & rhs) const { return !(*this == rhs); } - - const_iterator & operator++() - { - ++current_it; - if (current_it == container->impls[bucket].end()) - { - ++bucket; - current_it = container->beginOfNextNonEmptyBucket(bucket); - } - - return *this; - } - - const Cell & operator* () const { return *current_it; } - const Cell * operator->() const { return current_it->getPtr(); } - - const Cell * getPtr() const { return current_it.getPtr(); } - size_t getHash() const { return current_it.getHash(); } - }; - - - const_iterator begin() const - { - size_t buck = 0; - typename Impl::const_iterator impl_it = beginOfNextNonEmptyBucket(buck); - return { this, buck, impl_it }; - } - - iterator begin() - { - size_t buck = 0; - typename Impl::iterator impl_it = beginOfNextNonEmptyBucket(buck); - return { this, buck, impl_it }; - } - - const_iterator end() const { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; } - iterator end() { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; } - - - /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function. - std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x) - { - size_t hash_value = hash(Cell::getKey(x)); - - std::pair<LookupResult, bool> res; - emplace(Cell::getKey(x), res.first, res.second, hash_value); - - if (res.second) - insertSetMapped(res.first->getMapped(), x); - - return res; - } - - - /** Insert the key, - * return an iterator to a position that can be used for `placement new` of value, - * as well as the flag - whether a new key was inserted. - * - * You have to make `placement new` values if you inserted a new key, - * since when destroying a hash table, the destructor will be invoked for it! - * - * Example usage: - * - * Map::iterator it; - * bool inserted; - * map.emplace(key, it, inserted); - * if (inserted) - * new(&it->second) Mapped(value); - */ - template <typename KeyHolder> - void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) - { - size_t hash_value = hash(keyHolderGetKey(key_holder)); - emplace(key_holder, it, inserted, hash_value); - } - - - /// Same, but with a precalculated values of hash function. - template <typename KeyHolder> - void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, - bool & inserted, size_t hash_value) - { - size_t buck = getBucketFromHash(hash_value); - impls[buck].emplace(key_holder, it, inserted, hash_value); - } - - LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) - { - size_t buck = getBucketFromHash(hash_value); - return impls[buck].find(x, hash_value); - } - - ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const - { - return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value); - } - - LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); } - - ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x, hash(x)); } - - - void write(DB::WriteBuffer & wb) const - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - impls[i].write(wb); - } - - void writeText(DB::WriteBuffer & wb) const - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - { - if (i != 0) - DB::writeChar(',', wb); - impls[i].writeText(wb); - } - } - - void read(DB::ReadBuffer & rb) - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - impls[i].read(rb); - } - - void readText(DB::ReadBuffer & rb) - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - { - if (i != 0) - DB::assertChar(',', rb); - impls[i].readText(rb); - } - } - - - size_t size() const - { - size_t res = 0; - for (size_t i = 0; i < NUM_BUCKETS; ++i) - res += impls[i].size(); - - return res; - } - - bool empty() const - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - if (!impls[i].empty()) - return false; - - return true; - } - - size_t getBufferSizeInBytes() const - { - size_t res = 0; - for (size_t i = 0; i < NUM_BUCKETS; ++i) - res += impls[i].getBufferSizeInBytes(); - - return res; - } -}; + + friend class TwoLevelHashTable; + + const_iterator(Self * container_, size_t bucket_, typename Impl::const_iterator current_it_) + : container(container_), bucket(bucket_), current_it(current_it_) {} + + public: + const_iterator() {} + const_iterator(const iterator & rhs) : container(rhs.container), bucket(rhs.bucket), current_it(rhs.current_it) {} + + bool operator== (const const_iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; } + bool operator!= (const const_iterator & rhs) const { return !(*this == rhs); } + + const_iterator & operator++() + { + ++current_it; + if (current_it == container->impls[bucket].end()) + { + ++bucket; + current_it = container->beginOfNextNonEmptyBucket(bucket); + } + + return *this; + } + + const Cell & operator* () const { return *current_it; } + const Cell * operator->() const { return current_it->getPtr(); } + + const Cell * getPtr() const { return current_it.getPtr(); } + size_t getHash() const { return current_it.getHash(); } + }; + + + const_iterator begin() const + { + size_t buck = 0; + typename Impl::const_iterator impl_it = beginOfNextNonEmptyBucket(buck); + return { this, buck, impl_it }; + } + + iterator begin() + { + size_t buck = 0; + typename Impl::iterator impl_it = beginOfNextNonEmptyBucket(buck); + return { this, buck, impl_it }; + } + + const_iterator end() const { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; } + iterator end() { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; } + + + /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function. + std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x) + { + size_t hash_value = hash(Cell::getKey(x)); + + std::pair<LookupResult, bool> res; + emplace(Cell::getKey(x), res.first, res.second, hash_value); + + if (res.second) + insertSetMapped(res.first->getMapped(), x); + + return res; + } + + + /** Insert the key, + * return an iterator to a position that can be used for `placement new` of value, + * as well as the flag - whether a new key was inserted. + * + * You have to make `placement new` values if you inserted a new key, + * since when destroying a hash table, the destructor will be invoked for it! + * + * Example usage: + * + * Map::iterator it; + * bool inserted; + * map.emplace(key, it, inserted); + * if (inserted) + * new(&it->second) Mapped(value); + */ + template <typename KeyHolder> + void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) + { + size_t hash_value = hash(keyHolderGetKey(key_holder)); + emplace(key_holder, it, inserted, hash_value); + } + + + /// Same, but with a precalculated values of hash function. + template <typename KeyHolder> + void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, + bool & inserted, size_t hash_value) + { + size_t buck = getBucketFromHash(hash_value); + impls[buck].emplace(key_holder, it, inserted, hash_value); + } + + LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) + { + size_t buck = getBucketFromHash(hash_value); + return impls[buck].find(x, hash_value); + } + + ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const + { + return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value); + } + + LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); } + + ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x, hash(x)); } + + + void write(DB::WriteBuffer & wb) const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + impls[i].write(wb); + } + + void writeText(DB::WriteBuffer & wb) const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + { + if (i != 0) + DB::writeChar(',', wb); + impls[i].writeText(wb); + } + } + + void read(DB::ReadBuffer & rb) + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + impls[i].read(rb); + } + + void readText(DB::ReadBuffer & rb) + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + { + if (i != 0) + DB::assertChar(',', rb); + impls[i].readText(rb); + } + } + + + size_t size() const + { + size_t res = 0; + for (size_t i = 0; i < NUM_BUCKETS; ++i) + res += impls[i].size(); + + return res; + } + + bool empty() const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + if (!impls[i].empty()) + return false; + + return true; + } + + size_t getBufferSizeInBytes() const + { + size_t res = 0; + for (size_t i = 0; i < NUM_BUCKETS; ++i) + res += impls[i].getBufferSizeInBytes(); + + return res; + } +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h index e87535da2f..6bd8f74dbd 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h @@ -1,33 +1,33 @@ -#pragma once - -#include <Common/HashTable/StringHashMap.h> -#include <Common/HashTable/TwoLevelStringHashTable.h> - -template <typename TMapped, typename Allocator = HashTableAllocator, template <typename...> typename ImplTable = StringHashMap> -class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>> -{ -public: - using Key = StringRef; - using Self = TwoLevelStringHashMap; - using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>; - using LookupResult = typename Base::LookupResult; - - using Base::Base; - - template <typename Func> - void ALWAYS_INLINE forEachMapped(Func && func) - { - for (auto i = 0u; i < this->NUM_BUCKETS; ++i) +#pragma once + +#include <Common/HashTable/StringHashMap.h> +#include <Common/HashTable/TwoLevelStringHashTable.h> + +template <typename TMapped, typename Allocator = HashTableAllocator, template <typename...> typename ImplTable = StringHashMap> +class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>> +{ +public: + using Key = StringRef; + using Self = TwoLevelStringHashMap; + using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>; + using LookupResult = typename Base::LookupResult; + + using Base::Base; + + template <typename Func> + void ALWAYS_INLINE forEachMapped(Func && func) + { + for (auto i = 0u; i < this->NUM_BUCKETS; ++i) this->impls[i].forEachMapped(func); - } - - TMapped & ALWAYS_INLINE operator[](const Key & x) - { - bool inserted; - LookupResult it; - this->emplace(x, it, inserted); - if (inserted) - new (&it->getMapped()) TMapped(); - return it->getMapped(); - } -}; + } + + TMapped & ALWAYS_INLINE operator[](const Key & x) + { + bool inserted; + LookupResult it; + this->emplace(x, it, inserted); + if (inserted) + new (&it->getMapped()) TMapped(); + return it->getMapped(); + } +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h index ff2a4ab12e..93bbcb2835 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h @@ -1,235 +1,235 @@ -#pragma once - -#include <Common/HashTable/StringHashTable.h> - -template <typename SubMaps, typename ImplTable = StringHashTable<SubMaps>, size_t BITS_FOR_BUCKET = 8> -class TwoLevelStringHashTable : private boost::noncopyable -{ -protected: - using HashValue = size_t; - using Self = TwoLevelStringHashTable; - -public: - using Key = StringRef; - using Impl = ImplTable; - - static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; - static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; - - // TODO: currently hashing contains redundant computations when doing distributed or external aggregations - size_t hash(const Key & x) const - { - return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; }); - } - - size_t operator()(const Key & x) const { return hash(x); } - - /// NOTE Bad for hash tables with more than 2^32 cells. - static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } - -public: - using key_type = typename Impl::key_type; - using mapped_type = typename Impl::mapped_type; - using value_type = typename Impl::value_type; - using cell_type = typename Impl::cell_type; - - using LookupResult = typename Impl::LookupResult; - using ConstLookupResult = typename Impl::ConstLookupResult; - - Impl impls[NUM_BUCKETS]; - - TwoLevelStringHashTable() {} - - template <typename Source> - TwoLevelStringHashTable(const Source & src) - { - if (src.m0.hasZero()) - impls[0].m0.setHasZero(*src.m0.zeroValue()); - - for (auto & v : src.m1) - { - size_t hash_value = v.getHash(src.m1); - size_t buck = getBucketFromHash(hash_value); - impls[buck].m1.insertUniqueNonZero(&v, hash_value); - } - for (auto & v : src.m2) - { - size_t hash_value = v.getHash(src.m2); - size_t buck = getBucketFromHash(hash_value); - impls[buck].m2.insertUniqueNonZero(&v, hash_value); - } - for (auto & v : src.m3) - { - size_t hash_value = v.getHash(src.m3); - size_t buck = getBucketFromHash(hash_value); - impls[buck].m3.insertUniqueNonZero(&v, hash_value); - } - for (auto & v : src.ms) - { - size_t hash_value = v.getHash(src.ms); - size_t buck = getBucketFromHash(hash_value); - impls[buck].ms.insertUniqueNonZero(&v, hash_value); - } - } - - // This function is mostly the same as StringHashTable::dispatch, but with - // added bucket computation. See the comments there. - template <typename Self, typename Func, typename KeyHolder> - static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) - { - StringHashTableHash hash; - const StringRef & x = keyHolderGetKey(key_holder); - const size_t sz = x.size; - if (sz == 0) - { - keyHolderDiscardKey(key_holder); - return func(self.impls[0].m0, VoidKey{}, 0); - } - - if (x.data[x.size - 1] == 0) - { - // Strings with trailing zeros are not representable as fixed-size - // string keys. Put them to the generic table. - auto res = hash(x); - auto buck = getBucketFromHash(res); - return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), - res); - } - - const char * p = x.data; - // pending bits that needs to be shifted out - const char s = (-sz & 7) * 8; - union - { - StringKey8 k8; - StringKey16 k16; - StringKey24 k24; - UInt64 n[3]; - }; - switch ((sz - 1) >> 3) - { - case 0: - { - // first half page - if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0) - { - memcpy(&n[0], p, 8); - n[0] &= -1ul >> s; - } - else - { - const char * lp = x.data + x.size - 8; - memcpy(&n[0], lp, 8); - n[0] >>= s; - } - auto res = hash(k8); - auto buck = getBucketFromHash(res); - keyHolderDiscardKey(key_holder); - return func(self.impls[buck].m1, k8, res); - } - case 1: - { - memcpy(&n[0], p, 8); - const char * lp = x.data + x.size - 8; - memcpy(&n[1], lp, 8); - n[1] >>= s; - auto res = hash(k16); - auto buck = getBucketFromHash(res); - keyHolderDiscardKey(key_holder); - return func(self.impls[buck].m2, k16, res); - } - case 2: - { - memcpy(&n[0], p, 16); - const char * lp = x.data + x.size - 8; - memcpy(&n[2], lp, 8); - n[2] >>= s; - auto res = hash(k24); - auto buck = getBucketFromHash(res); - keyHolderDiscardKey(key_holder); - return func(self.impls[buck].m3, k24, res); - } - default: - { - auto res = hash(x); - auto buck = getBucketFromHash(res); - return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res); - } - } - } - - template <typename KeyHolder> - void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) - { - dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted}); - } - - LookupResult ALWAYS_INLINE find(const Key x) - { - return dispatch(*this, x, typename Impl::FindCallable{}); - } - - ConstLookupResult ALWAYS_INLINE find(const Key x) const - { - return dispatch(*this, x, typename Impl::FindCallable{}); - } - - void write(DB::WriteBuffer & wb) const - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - impls[i].write(wb); - } - - void writeText(DB::WriteBuffer & wb) const - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - { - if (i != 0) - DB::writeChar(',', wb); - impls[i].writeText(wb); - } - } - - void read(DB::ReadBuffer & rb) - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - impls[i].read(rb); - } - - void readText(DB::ReadBuffer & rb) - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - { - if (i != 0) - DB::assertChar(',', rb); - impls[i].readText(rb); - } - } - - size_t size() const - { - size_t res = 0; - for (size_t i = 0; i < NUM_BUCKETS; ++i) - res += impls[i].size(); - - return res; - } - - bool empty() const - { - for (size_t i = 0; i < NUM_BUCKETS; ++i) - if (!impls[i].empty()) - return false; - - return true; - } - - size_t getBufferSizeInBytes() const - { - size_t res = 0; - for (size_t i = 0; i < NUM_BUCKETS; ++i) - res += impls[i].getBufferSizeInBytes(); - - return res; - } -}; +#pragma once + +#include <Common/HashTable/StringHashTable.h> + +template <typename SubMaps, typename ImplTable = StringHashTable<SubMaps>, size_t BITS_FOR_BUCKET = 8> +class TwoLevelStringHashTable : private boost::noncopyable +{ +protected: + using HashValue = size_t; + using Self = TwoLevelStringHashTable; + +public: + using Key = StringRef; + using Impl = ImplTable; + + static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; + static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + + // TODO: currently hashing contains redundant computations when doing distributed or external aggregations + size_t hash(const Key & x) const + { + return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; }); + } + + size_t operator()(const Key & x) const { return hash(x); } + + /// NOTE Bad for hash tables with more than 2^32 cells. + static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } + +public: + using key_type = typename Impl::key_type; + using mapped_type = typename Impl::mapped_type; + using value_type = typename Impl::value_type; + using cell_type = typename Impl::cell_type; + + using LookupResult = typename Impl::LookupResult; + using ConstLookupResult = typename Impl::ConstLookupResult; + + Impl impls[NUM_BUCKETS]; + + TwoLevelStringHashTable() {} + + template <typename Source> + TwoLevelStringHashTable(const Source & src) + { + if (src.m0.hasZero()) + impls[0].m0.setHasZero(*src.m0.zeroValue()); + + for (auto & v : src.m1) + { + size_t hash_value = v.getHash(src.m1); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m1.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.m2) + { + size_t hash_value = v.getHash(src.m2); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m2.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.m3) + { + size_t hash_value = v.getHash(src.m3); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m3.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.ms) + { + size_t hash_value = v.getHash(src.ms); + size_t buck = getBucketFromHash(hash_value); + impls[buck].ms.insertUniqueNonZero(&v, hash_value); + } + } + + // This function is mostly the same as StringHashTable::dispatch, but with + // added bucket computation. See the comments there. + template <typename Self, typename Func, typename KeyHolder> + static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) + { + StringHashTableHash hash; + const StringRef & x = keyHolderGetKey(key_holder); + const size_t sz = x.size; + if (sz == 0) + { + keyHolderDiscardKey(key_holder); + return func(self.impls[0].m0, VoidKey{}, 0); + } + + if (x.data[x.size - 1] == 0) + { + // Strings with trailing zeros are not representable as fixed-size + // string keys. Put them to the generic table. + auto res = hash(x); + auto buck = getBucketFromHash(res); + return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), + res); + } + + const char * p = x.data; + // pending bits that needs to be shifted out + const char s = (-sz & 7) * 8; + union + { + StringKey8 k8; + StringKey16 k16; + StringKey24 k24; + UInt64 n[3]; + }; + switch ((sz - 1) >> 3) + { + case 0: + { + // first half page + if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0) + { + memcpy(&n[0], p, 8); + n[0] &= -1ul >> s; + } + else + { + const char * lp = x.data + x.size - 8; + memcpy(&n[0], lp, 8); + n[0] >>= s; + } + auto res = hash(k8); + auto buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(self.impls[buck].m1, k8, res); + } + case 1: + { + memcpy(&n[0], p, 8); + const char * lp = x.data + x.size - 8; + memcpy(&n[1], lp, 8); + n[1] >>= s; + auto res = hash(k16); + auto buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(self.impls[buck].m2, k16, res); + } + case 2: + { + memcpy(&n[0], p, 16); + const char * lp = x.data + x.size - 8; + memcpy(&n[2], lp, 8); + n[2] >>= s; + auto res = hash(k24); + auto buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(self.impls[buck].m3, k24, res); + } + default: + { + auto res = hash(x); + auto buck = getBucketFromHash(res); + return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res); + } + } + } + + template <typename KeyHolder> + void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) + { + dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted}); + } + + LookupResult ALWAYS_INLINE find(const Key x) + { + return dispatch(*this, x, typename Impl::FindCallable{}); + } + + ConstLookupResult ALWAYS_INLINE find(const Key x) const + { + return dispatch(*this, x, typename Impl::FindCallable{}); + } + + void write(DB::WriteBuffer & wb) const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + impls[i].write(wb); + } + + void writeText(DB::WriteBuffer & wb) const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + { + if (i != 0) + DB::writeChar(',', wb); + impls[i].writeText(wb); + } + } + + void read(DB::ReadBuffer & rb) + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + impls[i].read(rb); + } + + void readText(DB::ReadBuffer & rb) + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + { + if (i != 0) + DB::assertChar(',', rb); + impls[i].readText(rb); + } + } + + size_t size() const + { + size_t res = 0; + for (size_t i = 0; i < NUM_BUCKETS; ++i) + res += impls[i].size(); + + return res; + } + + bool empty() const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + if (!impls[i].empty()) + return false; + + return true; + } + + size_t getBufferSizeInBytes() const + { + size_t res = 0; + for (size_t i = 0; i < NUM_BUCKETS; ++i) + res += impls[i].getBufferSizeInBytes(); + + return res; + } +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp index 7ba91838f2..a8363a46de 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp @@ -1,45 +1,45 @@ -#include "IPv6ToBinary.h" -#include <Poco/Net/IPAddress.h> +#include "IPv6ToBinary.h" +#include <Poco/Net/IPAddress.h> #include <Poco/ByteOrder.h> #include <Common/formatIPv6.h> -#include <cstring> - - -namespace DB -{ - +#include <cstring> + + +namespace DB +{ + /// Result array could be indexed with all possible uint8 values without extra check. /// For values greater than 128 we will store same value as for 128 (all bits set). constexpr size_t IPV6_MASKS_COUNT = 256; using RawMaskArrayV6 = std::array<uint8_t, IPV6_BINARY_LENGTH>; void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res) -{ - if (Poco::Net::IPAddress::IPv6 == address.family()) - { +{ + if (Poco::Net::IPAddress::IPv6 == address.family()) + { memcpy(res, address.addr(), 16); - } - else if (Poco::Net::IPAddress::IPv4 == address.family()) - { - /// Convert to IPv6-mapped address. + } + else if (Poco::Net::IPAddress::IPv4 == address.family()) + { + /// Convert to IPv6-mapped address. memset(res, 0, 10); - res[10] = '\xFF'; - res[11] = '\xFF'; - memcpy(&res[12], address.addr(), 4); - } - else + res[10] = '\xFF'; + res[11] = '\xFF'; + memcpy(&res[12], address.addr(), 4); + } + else memset(res, 0, 16); } - + std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address) { std::array<char, 16> res; IPv6ToRawBinary(address, res.data()); - return res; -} - + return res; +} + template <typename RawMaskArrayT> static constexpr RawMaskArrayT generateBitMask(size_t prefix) { @@ -54,7 +54,7 @@ static constexpr RawMaskArrayT generateBitMask(size_t prefix) while (i < arr.size()) arr[i++] = 0x00; return arr; -} +} template <typename RawMaskArrayT, size_t masksCount> static constexpr std::array<RawMaskArrayT, masksCount> generateBitMasks() diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h index 7d432faa00..d766d40835 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h @@ -1,19 +1,19 @@ #pragma once -#include <array> +#include <array> #include <common/types.h> - -namespace Poco { namespace Net { class IPAddress; }} - -namespace DB -{ - + +namespace Poco { namespace Net { class IPAddress; }} + +namespace DB +{ + /// Convert IP address to raw binary with IPv6 data (big endian). If it's an IPv4, map it to IPv6. /// Saves result into the first 16 bytes of `res`. void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res); -/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6. -std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address); - +/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6. +std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address); + /// Returns a reference to 16-byte array containing mask with first `prefix_len` bits set to `1` and `128 - prefix_len` to `0`. /// The reference is valid during all program execution time. /// Values of prefix_len greater than 128 interpreted as 128 exactly. @@ -23,4 +23,4 @@ const std::array<uint8_t, 16> & getCIDRMaskIPv6(UInt8 prefix_len); bool matchIPv4Subnet(UInt32 addr, UInt32 cidr_addr, UInt8 prefix); bool matchIPv6Subnet(const uint8_t * addr, const uint8_t * cidr_addr, UInt8 prefix); -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h index 3441d79a37..ff7d7c5c5f 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h @@ -1,22 +1,22 @@ -#pragma once - -#include <Common/Exception.h> - - -namespace DB -{ - -class NetException : public Exception -{ -public: - NetException(const std::string & msg, int code) : Exception(msg, code) {} - - NetException * clone() const override { return new NetException(*this); } - void rethrow() const override { throw *this; } - -private: - const char * name() const throw() override { return "DB::NetException"; } - const char * className() const throw() override { return "DB::NetException"; } -}; - -} +#pragma once + +#include <Common/Exception.h> + + +namespace DB +{ + +class NetException : public Exception +{ +public: + NetException(const std::string & msg, int code) : Exception(msg, code) {} + + NetException * clone() const override { return new NetException(*this); } + void rethrow() const override { throw *this; } + +private: + const char * name() const throw() override { return "DB::NetException"; } + const char * className() const throw() override { return "DB::NetException"; } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp index 0339b46940..bf8ee6eedf 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp @@ -1,13 +1,13 @@ -#if USE_SSL -#include "OpenSSLHelpers.h" +#if USE_SSL +#include "OpenSSLHelpers.h" #include <common/scope_guard.h> -#include <openssl/err.h> -#include <openssl/sha.h> - -namespace DB -{ -#pragma GCC diagnostic warning "-Wold-style-cast" - +#include <openssl/err.h> +#include <openssl/sha.h> + +namespace DB +{ +#pragma GCC diagnostic warning "-Wold-style-cast" + std::string encodeSHA256(const std::string_view & text) { return encodeSHA256(text.data(), text.size()); @@ -19,20 +19,20 @@ std::string encodeSHA256(const void * text, size_t size) encodeSHA256(text, size, reinterpret_cast<unsigned char *>(out.data())); return out; } -void encodeSHA256(const std::string_view & text, unsigned char * out) -{ +void encodeSHA256(const std::string_view & text, unsigned char * out) +{ encodeSHA256(text.data(), text.size(), out); } void encodeSHA256(const void * text, size_t size, unsigned char * out) { - SHA256_CTX ctx; - SHA256_Init(&ctx); + SHA256_CTX ctx; + SHA256_Init(&ctx); SHA256_Update(&ctx, reinterpret_cast<const UInt8 *>(text), size); - SHA256_Final(out, &ctx); -} - -String getOpenSSLErrors() -{ + SHA256_Final(out, &ctx); +} + +String getOpenSSLErrors() +{ String res; ERR_print_errors_cb([](const char * str, size_t len, void * ctx) { @@ -43,7 +43,7 @@ String getOpenSSLErrors() return 1; }, &res); return res; -} - -} -#endif +} + +} +#endif diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h index 9b2754ce5e..192631ac6d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h @@ -1,22 +1,22 @@ -#pragma once - - -#if USE_SSL +#pragma once + + +#if USE_SSL # include <common/types.h> - - -namespace DB -{ + + +namespace DB +{ /// Encodes `text` and returns it. std::string encodeSHA256(const std::string_view & text); std::string encodeSHA256(const void * text, size_t size); /// `out` must be at least 32 bytes long. -void encodeSHA256(const std::string_view & text, unsigned char * out); +void encodeSHA256(const std::string_view & text, unsigned char * out); void encodeSHA256(const void * text, size_t size, unsigned char * out); - -/// Returns concatenation of error strings for all errors that OpenSSL has recorded, emptying the error queue. -String getOpenSSLErrors(); - -} -#endif + +/// Returns concatenation of error strings for all errors that OpenSSL has recorded, emptying the error queue. +String getOpenSSLErrors(); + +} +#endif diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp index b08c956bd9..a5c21e3d87 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp @@ -1,84 +1,84 @@ -#include <Common/PipeFDs.h> -#include <Common/Exception.h> -#include <Common/formatReadable.h> - -#include <common/logger_useful.h> -#include <common/errnoToString.h> - -#include <unistd.h> -#include <fcntl.h> -#include <string> -#include <algorithm> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_PIPE; - extern const int CANNOT_FCNTL; - extern const int LOGICAL_ERROR; -} - -void LazyPipeFDs::open() -{ - for (int & fd : fds_rw) - if (fd >= 0) - throw Exception("Pipe is already opened", ErrorCodes::LOGICAL_ERROR); - -#ifndef __APPLE__ - if (0 != pipe2(fds_rw, O_CLOEXEC)) - throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE); -#else - if (0 != pipe(fds_rw)) - throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE); - if (0 != fcntl(fds_rw[0], F_SETFD, FD_CLOEXEC)) - throwFromErrno("Cannot setup auto-close on exec for read end of pipe", ErrorCodes::CANNOT_FCNTL); - if (0 != fcntl(fds_rw[1], F_SETFD, FD_CLOEXEC)) - throwFromErrno("Cannot setup auto-close on exec for write end of pipe", ErrorCodes::CANNOT_FCNTL); -#endif -} - -void LazyPipeFDs::close() -{ - for (int & fd : fds_rw) - { - if (fd < 0) - continue; - if (0 != ::close(fd)) - throwFromErrno("Cannot close pipe", ErrorCodes::CANNOT_PIPE); - fd = -1; - } -} - -PipeFDs::PipeFDs() -{ - open(); -} - -LazyPipeFDs::~LazyPipeFDs() -{ - try - { - close(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - - +#include <Common/PipeFDs.h> +#include <Common/Exception.h> +#include <Common/formatReadable.h> + +#include <common/logger_useful.h> +#include <common/errnoToString.h> + +#include <unistd.h> +#include <fcntl.h> +#include <string> +#include <algorithm> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_PIPE; + extern const int CANNOT_FCNTL; + extern const int LOGICAL_ERROR; +} + +void LazyPipeFDs::open() +{ + for (int & fd : fds_rw) + if (fd >= 0) + throw Exception("Pipe is already opened", ErrorCodes::LOGICAL_ERROR); + +#ifndef __APPLE__ + if (0 != pipe2(fds_rw, O_CLOEXEC)) + throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE); +#else + if (0 != pipe(fds_rw)) + throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE); + if (0 != fcntl(fds_rw[0], F_SETFD, FD_CLOEXEC)) + throwFromErrno("Cannot setup auto-close on exec for read end of pipe", ErrorCodes::CANNOT_FCNTL); + if (0 != fcntl(fds_rw[1], F_SETFD, FD_CLOEXEC)) + throwFromErrno("Cannot setup auto-close on exec for write end of pipe", ErrorCodes::CANNOT_FCNTL); +#endif +} + +void LazyPipeFDs::close() +{ + for (int & fd : fds_rw) + { + if (fd < 0) + continue; + if (0 != ::close(fd)) + throwFromErrno("Cannot close pipe", ErrorCodes::CANNOT_PIPE); + fd = -1; + } +} + +PipeFDs::PipeFDs() +{ + open(); +} + +LazyPipeFDs::~LazyPipeFDs() +{ + try + { + close(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + void LazyPipeFDs::setNonBlockingWrite() -{ - int flags = fcntl(fds_rw[1], F_GETFL, 0); - if (-1 == flags) - throwFromErrno("Cannot get file status flags of pipe", ErrorCodes::CANNOT_FCNTL); - if (-1 == fcntl(fds_rw[1], F_SETFL, flags | O_NONBLOCK)) - throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL); -} - +{ + int flags = fcntl(fds_rw[1], F_GETFL, 0); + if (-1 == flags) + throwFromErrno("Cannot get file status flags of pipe", ErrorCodes::CANNOT_FCNTL); + if (-1 == fcntl(fds_rw[1], F_SETFL, flags | O_NONBLOCK)) + throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL); +} + void LazyPipeFDs::setNonBlockingRead() { int flags = fcntl(fds_rw[0], F_GETFL, 0); @@ -94,35 +94,35 @@ void LazyPipeFDs::setNonBlockingReadWrite() setNonBlockingWrite(); } -void LazyPipeFDs::tryIncreaseSize(int desired_size) -{ -#if defined(OS_LINUX) - Poco::Logger * log = &Poco::Logger::get("Pipe"); - - /** Increase pipe size to avoid slowdown during fine-grained trace collection. - */ - int pipe_size = fcntl(fds_rw[1], F_GETPIPE_SZ); - if (-1 == pipe_size) - { - if (errno == EINVAL) - { +void LazyPipeFDs::tryIncreaseSize(int desired_size) +{ +#if defined(OS_LINUX) + Poco::Logger * log = &Poco::Logger::get("Pipe"); + + /** Increase pipe size to avoid slowdown during fine-grained trace collection. + */ + int pipe_size = fcntl(fds_rw[1], F_GETPIPE_SZ); + if (-1 == pipe_size) + { + if (errno == EINVAL) + { LOG_INFO(log, "Cannot get pipe capacity, {}. Very old Linux kernels have no support for this fcntl.", errnoToString(ErrorCodes::CANNOT_FCNTL)); - /// It will work nevertheless. - } - else - throwFromErrno("Cannot get pipe capacity", ErrorCodes::CANNOT_FCNTL); - } - else - { - for (errno = 0; errno != EPERM && pipe_size < desired_size; pipe_size *= 2) - if (-1 == fcntl(fds_rw[1], F_SETPIPE_SZ, pipe_size * 2) && errno != EPERM) - throwFromErrno("Cannot increase pipe capacity to " + std::to_string(pipe_size * 2), ErrorCodes::CANNOT_FCNTL); - - LOG_TRACE(log, "Pipe capacity is {}", ReadableSize(std::min(pipe_size, desired_size))); - } -#else - (void)desired_size; -#endif -} - -} + /// It will work nevertheless. + } + else + throwFromErrno("Cannot get pipe capacity", ErrorCodes::CANNOT_FCNTL); + } + else + { + for (errno = 0; errno != EPERM && pipe_size < desired_size; pipe_size *= 2) + if (-1 == fcntl(fds_rw[1], F_SETPIPE_SZ, pipe_size * 2) && errno != EPERM) + throwFromErrno("Cannot increase pipe capacity to " + std::to_string(pipe_size * 2), ErrorCodes::CANNOT_FCNTL); + + LOG_TRACE(log, "Pipe capacity is {}", ReadableSize(std::min(pipe_size, desired_size))); + } +#else + (void)desired_size; +#endif +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h index 8f72bf9e54..20bd847c07 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h @@ -1,40 +1,40 @@ -#pragma once - -#include <cstddef> - - -namespace DB -{ - -/** Struct containing a pipe with lazy initialization. - * Use `open` and `close` methods to manipulate pipe and `fds_rw` field to access - * pipe's file descriptors. - */ -struct LazyPipeFDs -{ - int fds_rw[2] = {-1, -1}; - - void open(); - void close(); - +#pragma once + +#include <cstddef> + + +namespace DB +{ + +/** Struct containing a pipe with lazy initialization. + * Use `open` and `close` methods to manipulate pipe and `fds_rw` field to access + * pipe's file descriptors. + */ +struct LazyPipeFDs +{ + int fds_rw[2] = {-1, -1}; + + void open(); + void close(); + /// Set O_NONBLOCK to different ends of pipe preserving existing flags. /// Throws an exception if fcntl was not successful. void setNonBlockingWrite(); void setNonBlockingRead(); void setNonBlockingReadWrite(); - void tryIncreaseSize(int desired_size); - - ~LazyPipeFDs(); -}; - - -/** Struct which opens new pipe on creation and closes it on destruction. - * Use `fds_rw` field to access pipe's file descriptors. - */ -struct PipeFDs : public LazyPipeFDs -{ - PipeFDs(); -}; - -} + void tryIncreaseSize(int desired_size); + + ~LazyPipeFDs(); +}; + + +/** Struct which opens new pipe on creation and closes it on destruction. + * Use `fds_rw` field to access pipe's file descriptors. + */ +struct PipeFDs : public LazyPipeFDs +{ + PipeFDs(); +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h index 7f14a4b0d4..e84e249d17 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h @@ -1,98 +1,98 @@ -#pragma once - -#include <time.h> -#include <cstdlib> -#include <climits> -#include <random> -#include <functional> -#include <common/types.h> +#pragma once + +#include <time.h> +#include <cstdlib> +#include <climits> +#include <random> +#include <functional> +#include <common/types.h> #include <common/scope_guard.h> -#include <Common/PoolBase.h> -#include <Common/ProfileEvents.h> -#include <Common/NetException.h> -#include <Common/Exception.h> -#include <Common/randomSeed.h> - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int ALL_CONNECTION_TRIES_FAILED; - extern const int ALL_REPLICAS_ARE_STALE; - extern const int LOGICAL_ERROR; -} -} - -namespace ProfileEvents -{ - extern const Event DistributedConnectionFailTry; - extern const Event DistributedConnectionFailAtAll; -} - -/// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB. -/// Initialized by several PoolBase objects. -/// When a connection is requested, tries to create or choose an alive connection from one of the nested pools. -/// Pools are tried in the order consistent with lexicographical order of (error count, priority, random number) tuples. -/// Number of tries for a single pool is limited by max_tries parameter. -/// The client can set nested pool priority by passing a GetPriority functor. -/// -/// NOTE: if one of the nested pools blocks because it is empty, this pool will also block. -/// -/// The client must provide a TryGetEntryFunc functor, which should perform a single try to get a connection from a nested pool. -/// This functor can also check if the connection satisfies some eligibility criterion (e.g. check if -/// the replica is up-to-date). - -template <typename TNestedPool> -class PoolWithFailoverBase : private boost::noncopyable -{ -public: - using NestedPool = TNestedPool; - using NestedPoolPtr = std::shared_ptr<NestedPool>; - using Entry = typename NestedPool::Entry; - using NestedPools = std::vector<NestedPoolPtr>; - - PoolWithFailoverBase( - NestedPools nested_pools_, - time_t decrease_error_period_, - size_t max_error_cap_, - Poco::Logger * log_) - : nested_pools(std::move(nested_pools_)) - , decrease_error_period(decrease_error_period_) - , max_error_cap(max_error_cap_) - , shared_pool_states(nested_pools.size()) - , log(log_) - { - for (size_t i = 0;i < nested_pools.size(); ++i) - shared_pool_states[i].config_priority = nested_pools[i]->getPriority(); - } - - struct TryResult - { - TryResult() = default; - - explicit TryResult(Entry entry_) - : entry(std::move(entry_)) - , is_usable(true) - , is_up_to_date(true) - { - } - - void reset() - { - entry = Entry(); - is_usable = false; - is_up_to_date = false; - staleness = 0.0; - } - - Entry entry; - bool is_usable = false; /// If false, the entry is unusable for current request - /// (but may be usable for other requests, so error counts are not incremented) - bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica. - double staleness = 0.0; /// Helps choosing the "least stale" option when all replicas are stale. - }; - +#include <Common/PoolBase.h> +#include <Common/ProfileEvents.h> +#include <Common/NetException.h> +#include <Common/Exception.h> +#include <Common/randomSeed.h> + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ALL_CONNECTION_TRIES_FAILED; + extern const int ALL_REPLICAS_ARE_STALE; + extern const int LOGICAL_ERROR; +} +} + +namespace ProfileEvents +{ + extern const Event DistributedConnectionFailTry; + extern const Event DistributedConnectionFailAtAll; +} + +/// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB. +/// Initialized by several PoolBase objects. +/// When a connection is requested, tries to create or choose an alive connection from one of the nested pools. +/// Pools are tried in the order consistent with lexicographical order of (error count, priority, random number) tuples. +/// Number of tries for a single pool is limited by max_tries parameter. +/// The client can set nested pool priority by passing a GetPriority functor. +/// +/// NOTE: if one of the nested pools blocks because it is empty, this pool will also block. +/// +/// The client must provide a TryGetEntryFunc functor, which should perform a single try to get a connection from a nested pool. +/// This functor can also check if the connection satisfies some eligibility criterion (e.g. check if +/// the replica is up-to-date). + +template <typename TNestedPool> +class PoolWithFailoverBase : private boost::noncopyable +{ +public: + using NestedPool = TNestedPool; + using NestedPoolPtr = std::shared_ptr<NestedPool>; + using Entry = typename NestedPool::Entry; + using NestedPools = std::vector<NestedPoolPtr>; + + PoolWithFailoverBase( + NestedPools nested_pools_, + time_t decrease_error_period_, + size_t max_error_cap_, + Poco::Logger * log_) + : nested_pools(std::move(nested_pools_)) + , decrease_error_period(decrease_error_period_) + , max_error_cap(max_error_cap_) + , shared_pool_states(nested_pools.size()) + , log(log_) + { + for (size_t i = 0;i < nested_pools.size(); ++i) + shared_pool_states[i].config_priority = nested_pools[i]->getPriority(); + } + + struct TryResult + { + TryResult() = default; + + explicit TryResult(Entry entry_) + : entry(std::move(entry_)) + , is_usable(true) + , is_up_to_date(true) + { + } + + void reset() + { + entry = Entry(); + is_usable = false; + is_up_to_date = false; + staleness = 0.0; + } + + Entry entry; + bool is_usable = false; /// If false, the entry is unusable for current request + /// (but may be usable for other requests, so error counts are not incremented) + bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica. + double staleness = 0.0; /// Helps choosing the "least stale" option when all replicas are stale. + }; + struct PoolState; using PoolStates = std::vector<PoolState>; @@ -106,34 +106,34 @@ public: size_t slowdown_count = 0; }; - /// This functor must be provided by a client. It must perform a single try that takes a connection - /// from the provided pool and checks that it is good. - using TryGetEntryFunc = std::function<TryResult(NestedPool & pool, std::string & fail_message)>; - - /// The client can provide this functor to affect load balancing - the index of a pool is passed to - /// this functor. The pools with lower result value will be tried first. - using GetPriorityFunc = std::function<size_t(size_t index)>; - - - /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool). - /// The method will throw if it is unable to get min_entries alive connections or - /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas. - std::vector<TryResult> getMany( - size_t min_entries, size_t max_entries, size_t max_tries, - size_t max_ignored_errors, - bool fallback_to_stale_replicas, - const TryGetEntryFunc & try_get_entry, - const GetPriorityFunc & get_priority = GetPriorityFunc()); - -protected: - - /// Returns a single connection. - Entry get(size_t max_ignored_errors, bool fallback_to_stale_replicas, - const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority = GetPriorityFunc()); - - /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states. - PoolStates updatePoolStates(size_t max_ignored_errors); - + /// This functor must be provided by a client. It must perform a single try that takes a connection + /// from the provided pool and checks that it is good. + using TryGetEntryFunc = std::function<TryResult(NestedPool & pool, std::string & fail_message)>; + + /// The client can provide this functor to affect load balancing - the index of a pool is passed to + /// this functor. The pools with lower result value will be tried first. + using GetPriorityFunc = std::function<size_t(size_t index)>; + + + /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool). + /// The method will throw if it is unable to get min_entries alive connections or + /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas. + std::vector<TryResult> getMany( + size_t min_entries, size_t max_entries, size_t max_tries, + size_t max_ignored_errors, + bool fallback_to_stale_replicas, + const TryGetEntryFunc & try_get_entry, + const GetPriorityFunc & get_priority = GetPriorityFunc()); + +protected: + + /// Returns a single connection. + Entry get(size_t max_ignored_errors, bool fallback_to_stale_replicas, + const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority = GetPriorityFunc()); + + /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states. + PoolStates updatePoolStates(size_t max_ignored_errors); + void updateErrorCounts(PoolStates & states, time_t & last_decrease_time) const; std::vector<ShuffledPool> getShuffledPools(size_t max_ignored_errors, const GetPriorityFunc & get_priority); @@ -146,21 +146,21 @@ protected: return std::make_tuple(shared_pool_states, nested_pools, last_error_decrease_time); } - NestedPools nested_pools; - - const time_t decrease_error_period; - const size_t max_error_cap; - - mutable std::mutex pool_states_mutex; - PoolStates shared_pool_states; - /// The time when error counts were last decreased. - time_t last_error_decrease_time = 0; - - Poco::Logger * log; -}; - - -template <typename TNestedPool> + NestedPools nested_pools; + + const time_t decrease_error_period; + const size_t max_error_cap; + + mutable std::mutex pool_states_mutex; + PoolStates shared_pool_states; + /// The time when error counts were last decreased. + time_t last_error_decrease_time = 0; + + Poco::Logger * log; +}; + + +template <typename TNestedPool> std::vector<typename PoolWithFailoverBase<TNestedPool>::ShuffledPool> PoolWithFailoverBase<TNestedPool>::getShuffledPools( size_t max_ignored_errors, const PoolWithFailoverBase::GetPriorityFunc & get_priority) @@ -201,181 +201,181 @@ inline void PoolWithFailoverBase<TNestedPool>::updateSharedErrorCounts(std::vect } template <typename TNestedPool> -typename TNestedPool::Entry -PoolWithFailoverBase<TNestedPool>::get(size_t max_ignored_errors, bool fallback_to_stale_replicas, - const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority) -{ - std::vector<TryResult> results = getMany( - 1 /* min entries */, 1 /* max entries */, 1 /* max tries */, - max_ignored_errors, fallback_to_stale_replicas, - try_get_entry, get_priority); - if (results.empty() || results[0].entry.isNull()) - throw DB::Exception( - "PoolWithFailoverBase::getMany() returned less than min_entries entries.", - DB::ErrorCodes::LOGICAL_ERROR); - return results[0].entry; -} - -template <typename TNestedPool> -std::vector<typename PoolWithFailoverBase<TNestedPool>::TryResult> -PoolWithFailoverBase<TNestedPool>::getMany( - size_t min_entries, size_t max_entries, size_t max_tries, - size_t max_ignored_errors, - bool fallback_to_stale_replicas, - const TryGetEntryFunc & try_get_entry, - const GetPriorityFunc & get_priority) -{ +typename TNestedPool::Entry +PoolWithFailoverBase<TNestedPool>::get(size_t max_ignored_errors, bool fallback_to_stale_replicas, + const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority) +{ + std::vector<TryResult> results = getMany( + 1 /* min entries */, 1 /* max entries */, 1 /* max tries */, + max_ignored_errors, fallback_to_stale_replicas, + try_get_entry, get_priority); + if (results.empty() || results[0].entry.isNull()) + throw DB::Exception( + "PoolWithFailoverBase::getMany() returned less than min_entries entries.", + DB::ErrorCodes::LOGICAL_ERROR); + return results[0].entry; +} + +template <typename TNestedPool> +std::vector<typename PoolWithFailoverBase<TNestedPool>::TryResult> +PoolWithFailoverBase<TNestedPool>::getMany( + size_t min_entries, size_t max_entries, size_t max_tries, + size_t max_ignored_errors, + bool fallback_to_stale_replicas, + const TryGetEntryFunc & try_get_entry, + const GetPriorityFunc & get_priority) +{ std::vector<ShuffledPool> shuffled_pools = getShuffledPools(max_ignored_errors, get_priority); - - /// We will try to get a connection from each pool until a connection is produced or max_tries is reached. - std::vector<TryResult> try_results(shuffled_pools.size()); - size_t entries_count = 0; - size_t usable_count = 0; - size_t up_to_date_count = 0; - size_t failed_pools_count = 0; - - /// At exit update shared error counts with error counts occurred during this call. - SCOPE_EXIT( - { + + /// We will try to get a connection from each pool until a connection is produced or max_tries is reached. + std::vector<TryResult> try_results(shuffled_pools.size()); + size_t entries_count = 0; + size_t usable_count = 0; + size_t up_to_date_count = 0; + size_t failed_pools_count = 0; + + /// At exit update shared error counts with error counts occurred during this call. + SCOPE_EXIT( + { updateSharedErrorCounts(shuffled_pools); - }); - - std::string fail_messages; - bool finished = false; - while (!finished) - { - for (size_t i = 0; i < shuffled_pools.size(); ++i) - { - if (up_to_date_count >= max_entries /// Already enough good entries. - || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced. - { - finished = true; - break; - } - - ShuffledPool & shuffled_pool = shuffled_pools[i]; - TryResult & result = try_results[i]; + }); + + std::string fail_messages; + bool finished = false; + while (!finished) + { + for (size_t i = 0; i < shuffled_pools.size(); ++i) + { + if (up_to_date_count >= max_entries /// Already enough good entries. + || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced. + { + finished = true; + break; + } + + ShuffledPool & shuffled_pool = shuffled_pools[i]; + TryResult & result = try_results[i]; if (max_tries && (shuffled_pool.error_count >= max_tries || !result.entry.isNull())) - continue; - - std::string fail_message; - result = try_get_entry(*shuffled_pool.pool, fail_message); - - if (!fail_message.empty()) - fail_messages += fail_message + '\n'; - - if (!result.entry.isNull()) - { - ++entries_count; - if (result.is_usable) - { - ++usable_count; - if (result.is_up_to_date) - ++up_to_date_count; - } - } - else - { + continue; + + std::string fail_message; + result = try_get_entry(*shuffled_pool.pool, fail_message); + + if (!fail_message.empty()) + fail_messages += fail_message + '\n'; + + if (!result.entry.isNull()) + { + ++entries_count; + if (result.is_usable) + { + ++usable_count; + if (result.is_up_to_date) + ++up_to_date_count; + } + } + else + { LOG_WARNING(log, "Connection failed at try â„–{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); - - shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1); - - if (shuffled_pool.error_count >= max_tries) - { - ++failed_pools_count; - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll); - } - } - } - } - - if (usable_count < min_entries) - throw DB::NetException( - "All connection tries failed. Log: \n\n" + fail_messages + "\n", - DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED); - - try_results.erase( - std::remove_if( - try_results.begin(), try_results.end(), - [](const TryResult & r) { return r.entry.isNull() || !r.is_usable; }), - try_results.end()); - - /// Sort so that preferred items are near the beginning. - std::stable_sort( - try_results.begin(), try_results.end(), - [](const TryResult & left, const TryResult & right) - { - return std::forward_as_tuple(!left.is_up_to_date, left.staleness) - < std::forward_as_tuple(!right.is_up_to_date, right.staleness); - }); - - if (fallback_to_stale_replicas) - { - /// There is not enough up-to-date entries but we are allowed to return stale entries. - /// Gather all up-to-date ones and least-bad stale ones. - - size_t size = std::min(try_results.size(), max_entries); - try_results.resize(size); - } - else if (up_to_date_count >= min_entries) - { - /// There is enough up-to-date entries. - try_results.resize(up_to_date_count); - } - else - throw DB::Exception( - "Could not find enough connections to up-to-date replicas. Got: " + std::to_string(up_to_date_count) - + ", needed: " + std::to_string(min_entries), - DB::ErrorCodes::ALL_REPLICAS_ARE_STALE); - - return try_results; -} - -template <typename TNestedPool> -struct PoolWithFailoverBase<TNestedPool>::PoolState -{ - UInt64 error_count = 0; + ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); + + shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1); + + if (shuffled_pool.error_count >= max_tries) + { + ++failed_pools_count; + ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll); + } + } + } + } + + if (usable_count < min_entries) + throw DB::NetException( + "All connection tries failed. Log: \n\n" + fail_messages + "\n", + DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED); + + try_results.erase( + std::remove_if( + try_results.begin(), try_results.end(), + [](const TryResult & r) { return r.entry.isNull() || !r.is_usable; }), + try_results.end()); + + /// Sort so that preferred items are near the beginning. + std::stable_sort( + try_results.begin(), try_results.end(), + [](const TryResult & left, const TryResult & right) + { + return std::forward_as_tuple(!left.is_up_to_date, left.staleness) + < std::forward_as_tuple(!right.is_up_to_date, right.staleness); + }); + + if (fallback_to_stale_replicas) + { + /// There is not enough up-to-date entries but we are allowed to return stale entries. + /// Gather all up-to-date ones and least-bad stale ones. + + size_t size = std::min(try_results.size(), max_entries); + try_results.resize(size); + } + else if (up_to_date_count >= min_entries) + { + /// There is enough up-to-date entries. + try_results.resize(up_to_date_count); + } + else + throw DB::Exception( + "Could not find enough connections to up-to-date replicas. Got: " + std::to_string(up_to_date_count) + + ", needed: " + std::to_string(min_entries), + DB::ErrorCodes::ALL_REPLICAS_ARE_STALE); + + return try_results; +} + +template <typename TNestedPool> +struct PoolWithFailoverBase<TNestedPool>::PoolState +{ + UInt64 error_count = 0; /// The number of slowdowns that led to changing replica in HedgedRequestsFactory UInt64 slowdown_count = 0; - /// Priority from the <remote_server> configuration. - Int64 config_priority = 1; - /// Priority from the GetPriorityFunc. - Int64 priority = 0; - UInt32 random = 0; - - void randomize() - { - random = rng(); - } - - static bool compare(const PoolState & lhs, const PoolState & rhs) - { + /// Priority from the <remote_server> configuration. + Int64 config_priority = 1; + /// Priority from the GetPriorityFunc. + Int64 priority = 0; + UInt32 random = 0; + + void randomize() + { + random = rng(); + } + + static bool compare(const PoolState & lhs, const PoolState & rhs) + { return std::forward_as_tuple(lhs.error_count, lhs.slowdown_count, lhs.config_priority, lhs.priority, lhs.random) < std::forward_as_tuple(rhs.error_count, rhs.slowdown_count, rhs.config_priority, rhs.priority, rhs.random); - } - -private: - std::minstd_rand rng = std::minstd_rand(randomSeed()); -}; - -template <typename TNestedPool> -typename PoolWithFailoverBase<TNestedPool>::PoolStates -PoolWithFailoverBase<TNestedPool>::updatePoolStates(size_t max_ignored_errors) -{ - PoolStates result; - result.reserve(nested_pools.size()); - - { - std::lock_guard lock(pool_states_mutex); - - for (auto & state : shared_pool_states) - state.randomize(); - + } + +private: + std::minstd_rand rng = std::minstd_rand(randomSeed()); +}; + +template <typename TNestedPool> +typename PoolWithFailoverBase<TNestedPool>::PoolStates +PoolWithFailoverBase<TNestedPool>::updatePoolStates(size_t max_ignored_errors) +{ + PoolStates result; + result.reserve(nested_pools.size()); + + { + std::lock_guard lock(pool_states_mutex); + + for (auto & state : shared_pool_states) + state.randomize(); + updateErrorCounts(shared_pool_states, last_error_decrease_time); result.assign(shared_pool_states.begin(), shared_pool_states.end()); } - + /// distributed_replica_max_ignored_errors for (auto & state : result) state.error_count = std::max<UInt64>(0, state.error_count - max_ignored_errors); @@ -393,7 +393,7 @@ void PoolWithFailoverBase<TNestedPool>::updateErrorCounts(PoolWithFailoverBase<T time_t delta = current_time - last_decrease_time; if (delta >= 0) - { + { const UInt64 MAX_BITS = sizeof(UInt64) * CHAR_BIT; size_t shift_amount = MAX_BITS; /// Divide error counts by 2 every decrease_error_period seconds. @@ -403,25 +403,25 @@ void PoolWithFailoverBase<TNestedPool>::updateErrorCounts(PoolWithFailoverBase<T /// Else if the function is called often enough, error count will never decrease. if (shift_amount) last_decrease_time = current_time; - + if (shift_amount >= MAX_BITS) - { + { for (auto & state : states) - { + { state.error_count = 0; state.slowdown_count = 0; - } + } } else if (shift_amount) { for (auto & state : states) - { + { state.error_count >>= shift_amount; state.slowdown_count >>= shift_amount; - } - } - } - } + } + } + } + } else last_decrease_time = current_time; -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp index 3dce47b34c..fcc4124aa8 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp @@ -1,198 +1,198 @@ -#include "ProcfsMetricsProvider.h" - -#if defined(__linux__) - -#include <Common/Exception.h> -#include <IO/ReadBufferFromMemory.h> -#include <IO/ReadHelpers.h> - -#include <common/find_symbols.h> -#include <common/logger_useful.h> - -#include <cassert> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <linux/taskstats.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; -} - -static constexpr auto thread_schedstat = "/proc/thread-self/schedstat"; -static constexpr auto thread_stat = "/proc/thread-self/stat"; -static constexpr auto thread_io = "/proc/thread-self/io"; - - -namespace -{ -[[noreturn]] inline void throwWithFailedToOpenFile(const std::string & filename) -{ - throwFromErrno( - "Cannot open file " + filename, - errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); -} - -inline void emitErrorMsgWithFailedToCloseFile(const std::string & filename) -{ - try - { - throwFromErrno( - "File descriptor for \"" + filename + "\" could not be closed. " - "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE); - } - catch (const ErrnoException &) - { - DB::tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - -ssize_t readFromFD(const int fd, const char * filename, char * buf, size_t buf_size) -{ - ssize_t res = 0; - - do - { - res = ::pread(fd, buf, buf_size, 0); - - if (-1 == res) - { - if (errno == EINTR) - continue; - - throwFromErrno( - "Cannot read from file " + std::string(filename), - ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); - } - - assert(res >= 0); - break; - } while (true); - - return res; -} -} - - -bool ProcfsMetricsProvider::isAvailable() noexcept -{ - struct stat sb; - int res = ::stat(thread_schedstat, &sb); - - /// Verify that procfs is mounted, one of the stats file exists and is a regular file - return res != -1 && (sb.st_mode & S_IFMT) == S_IFREG; -} - - -ProcfsMetricsProvider::ProcfsMetricsProvider(const pid_t /*tid*/) -{ - thread_schedstat_fd = ::open(thread_schedstat, O_RDONLY | O_CLOEXEC); - if (-1 == thread_schedstat_fd) - { - throwWithFailedToOpenFile(thread_schedstat); - } - thread_stat_fd = ::open(thread_stat, O_RDONLY | O_CLOEXEC); - if (-1 == thread_stat_fd) - { - ::close(thread_schedstat_fd); - throwWithFailedToOpenFile(thread_stat); - } - thread_io_fd = ::open(thread_io, O_RDONLY | O_CLOEXEC); - if (-1 != thread_io_fd) - { - stats_version = 3; - } -} - - -ProcfsMetricsProvider::~ProcfsMetricsProvider() -{ - if (stats_version >= 3 && 0 != ::close(thread_io_fd)) - emitErrorMsgWithFailedToCloseFile(thread_io); - if (0 != ::close(thread_stat_fd)) - emitErrorMsgWithFailedToCloseFile(thread_stat); - if (0 != ::close(thread_schedstat_fd)) - emitErrorMsgWithFailedToCloseFile(thread_schedstat); -} - - -void ProcfsMetricsProvider::getTaskStats(::taskstats & out_stats) const -{ - constexpr size_t buf_size = 1024; - char buf[buf_size]; - - out_stats.version = stats_version; - - readParseAndSetThreadCPUStat(out_stats, buf, buf_size); - readParseAndSetThreadBlkIOStat(out_stats, buf, buf_size); - - if (stats_version >= 3) - { - readParseAndSetThreadIOStat(out_stats, buf, buf_size); - } -} - - -void ProcfsMetricsProvider::readParseAndSetThreadCPUStat(::taskstats & out_stats, char * buf, size_t buf_size) const -{ - ssize_t res = readFromFD(thread_schedstat_fd, thread_schedstat, buf, buf_size); - ReadBufferFromMemory in_schedstat(buf, res); - - readIntText(out_stats.cpu_run_virtual_total, in_schedstat); - skipWhitespaceIfAny(in_schedstat); - readIntText(out_stats.cpu_delay_total, in_schedstat); -} - - -void ProcfsMetricsProvider::readParseAndSetThreadBlkIOStat(::taskstats & out_stats, char * buf, size_t buf_size) const -{ - ssize_t res = readFromFD(thread_stat_fd, thread_stat, buf, buf_size - 1); - ReadBufferFromMemory in_stat(buf, res); - - /// We need to skip the first 41 fields of the string read from /proc/thread-self/stat. - for (int i = 0; i < 41; ++i) - { - in_stat.position() = find_first_symbols<' ', '\t'>(in_stat.position(), in_stat.buffer().end()); - skipWhitespaceIfAny(in_stat); - } - - /// Read field #42 - Aggregated block I/O delays, measured in clock ticks (centiseconds) - readIntText(out_stats.blkio_delay_total, in_stat); - out_stats.blkio_delay_total *= 10000000ul; /// We need to return time in nanoseconds -} - - -void ProcfsMetricsProvider::readParseAndSetThreadIOStat(::taskstats & out_stats, char * buf, size_t buf_size) const -{ - ssize_t res = readFromFD(thread_io_fd, thread_io, buf, buf_size); - ReadBufferFromMemory in_thread_io(buf, res); - - assertString("rchar:", in_thread_io); - skipWhitespaceIfAny(in_thread_io); - readIntText(out_stats.read_char, in_thread_io); - skipWhitespaceIfAny(in_thread_io); - assertString("wchar:", in_thread_io); - skipWhitespaceIfAny(in_thread_io); - readIntText(out_stats.write_char, in_thread_io); - skipWhitespaceIfAny(in_thread_io); - skipToNextLineOrEOF(in_thread_io); - skipToNextLineOrEOF(in_thread_io); - assertString("read_bytes:", in_thread_io); - skipWhitespaceIfAny(in_thread_io); - readIntText(out_stats.read_bytes, in_thread_io); - skipWhitespaceIfAny(in_thread_io); - assertString("write_bytes:", in_thread_io); - skipWhitespaceIfAny(in_thread_io); - readIntText(out_stats.write_bytes, in_thread_io); -} -} - -#endif +#include "ProcfsMetricsProvider.h" + +#if defined(__linux__) + +#include <Common/Exception.h> +#include <IO/ReadBufferFromMemory.h> +#include <IO/ReadHelpers.h> + +#include <common/find_symbols.h> +#include <common/logger_useful.h> + +#include <cassert> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <linux/taskstats.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; + extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; +} + +static constexpr auto thread_schedstat = "/proc/thread-self/schedstat"; +static constexpr auto thread_stat = "/proc/thread-self/stat"; +static constexpr auto thread_io = "/proc/thread-self/io"; + + +namespace +{ +[[noreturn]] inline void throwWithFailedToOpenFile(const std::string & filename) +{ + throwFromErrno( + "Cannot open file " + filename, + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); +} + +inline void emitErrorMsgWithFailedToCloseFile(const std::string & filename) +{ + try + { + throwFromErrno( + "File descriptor for \"" + filename + "\" could not be closed. " + "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE); + } + catch (const ErrnoException &) + { + DB::tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +ssize_t readFromFD(const int fd, const char * filename, char * buf, size_t buf_size) +{ + ssize_t res = 0; + + do + { + res = ::pread(fd, buf, buf_size, 0); + + if (-1 == res) + { + if (errno == EINTR) + continue; + + throwFromErrno( + "Cannot read from file " + std::string(filename), + ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + } + + assert(res >= 0); + break; + } while (true); + + return res; +} +} + + +bool ProcfsMetricsProvider::isAvailable() noexcept +{ + struct stat sb; + int res = ::stat(thread_schedstat, &sb); + + /// Verify that procfs is mounted, one of the stats file exists and is a regular file + return res != -1 && (sb.st_mode & S_IFMT) == S_IFREG; +} + + +ProcfsMetricsProvider::ProcfsMetricsProvider(const pid_t /*tid*/) +{ + thread_schedstat_fd = ::open(thread_schedstat, O_RDONLY | O_CLOEXEC); + if (-1 == thread_schedstat_fd) + { + throwWithFailedToOpenFile(thread_schedstat); + } + thread_stat_fd = ::open(thread_stat, O_RDONLY | O_CLOEXEC); + if (-1 == thread_stat_fd) + { + ::close(thread_schedstat_fd); + throwWithFailedToOpenFile(thread_stat); + } + thread_io_fd = ::open(thread_io, O_RDONLY | O_CLOEXEC); + if (-1 != thread_io_fd) + { + stats_version = 3; + } +} + + +ProcfsMetricsProvider::~ProcfsMetricsProvider() +{ + if (stats_version >= 3 && 0 != ::close(thread_io_fd)) + emitErrorMsgWithFailedToCloseFile(thread_io); + if (0 != ::close(thread_stat_fd)) + emitErrorMsgWithFailedToCloseFile(thread_stat); + if (0 != ::close(thread_schedstat_fd)) + emitErrorMsgWithFailedToCloseFile(thread_schedstat); +} + + +void ProcfsMetricsProvider::getTaskStats(::taskstats & out_stats) const +{ + constexpr size_t buf_size = 1024; + char buf[buf_size]; + + out_stats.version = stats_version; + + readParseAndSetThreadCPUStat(out_stats, buf, buf_size); + readParseAndSetThreadBlkIOStat(out_stats, buf, buf_size); + + if (stats_version >= 3) + { + readParseAndSetThreadIOStat(out_stats, buf, buf_size); + } +} + + +void ProcfsMetricsProvider::readParseAndSetThreadCPUStat(::taskstats & out_stats, char * buf, size_t buf_size) const +{ + ssize_t res = readFromFD(thread_schedstat_fd, thread_schedstat, buf, buf_size); + ReadBufferFromMemory in_schedstat(buf, res); + + readIntText(out_stats.cpu_run_virtual_total, in_schedstat); + skipWhitespaceIfAny(in_schedstat); + readIntText(out_stats.cpu_delay_total, in_schedstat); +} + + +void ProcfsMetricsProvider::readParseAndSetThreadBlkIOStat(::taskstats & out_stats, char * buf, size_t buf_size) const +{ + ssize_t res = readFromFD(thread_stat_fd, thread_stat, buf, buf_size - 1); + ReadBufferFromMemory in_stat(buf, res); + + /// We need to skip the first 41 fields of the string read from /proc/thread-self/stat. + for (int i = 0; i < 41; ++i) + { + in_stat.position() = find_first_symbols<' ', '\t'>(in_stat.position(), in_stat.buffer().end()); + skipWhitespaceIfAny(in_stat); + } + + /// Read field #42 - Aggregated block I/O delays, measured in clock ticks (centiseconds) + readIntText(out_stats.blkio_delay_total, in_stat); + out_stats.blkio_delay_total *= 10000000ul; /// We need to return time in nanoseconds +} + + +void ProcfsMetricsProvider::readParseAndSetThreadIOStat(::taskstats & out_stats, char * buf, size_t buf_size) const +{ + ssize_t res = readFromFD(thread_io_fd, thread_io, buf, buf_size); + ReadBufferFromMemory in_thread_io(buf, res); + + assertString("rchar:", in_thread_io); + skipWhitespaceIfAny(in_thread_io); + readIntText(out_stats.read_char, in_thread_io); + skipWhitespaceIfAny(in_thread_io); + assertString("wchar:", in_thread_io); + skipWhitespaceIfAny(in_thread_io); + readIntText(out_stats.write_char, in_thread_io); + skipWhitespaceIfAny(in_thread_io); + skipToNextLineOrEOF(in_thread_io); + skipToNextLineOrEOF(in_thread_io); + assertString("read_bytes:", in_thread_io); + skipWhitespaceIfAny(in_thread_io); + readIntText(out_stats.read_bytes, in_thread_io); + skipWhitespaceIfAny(in_thread_io); + assertString("write_bytes:", in_thread_io); + skipWhitespaceIfAny(in_thread_io); + readIntText(out_stats.write_bytes, in_thread_io); +} +} + +#endif diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h index 475a16af5e..60eb94bfcc 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h @@ -1,44 +1,44 @@ -#pragma once - -#include <sys/types.h> -#include <boost/noncopyable.hpp> - - -#if defined(__linux__) -struct taskstats; - -namespace DB -{ -/// Provides several essential per-task metrics by reading data from Procfs (when available). -class ProcfsMetricsProvider : private boost::noncopyable -{ -public: - ProcfsMetricsProvider(const pid_t /*tid*/); - ~ProcfsMetricsProvider(); - - /// Updates only a part of taskstats struct's fields: - /// - cpu_run_virtual_total, cpu_delay_total (when /proc/thread-self/schedstat is available) - /// - blkio_delay_total (when /proc/thread-self/stat is available) - /// - rchar, wchar, read_bytes, write_bytes (when /prod/thread-self/io is available) - /// See: man procfs - void getTaskStats(::taskstats & out_stats) const; - - /// Tells whether this metrics (via Procfs) is provided on the current platform - static bool isAvailable() noexcept; - -private: - void readParseAndSetThreadCPUStat(::taskstats & out_stats, char *, size_t) const; - void readParseAndSetThreadBlkIOStat(::taskstats & out_stats, char *, size_t) const; - void readParseAndSetThreadIOStat(::taskstats & out_stats, char *, size_t) const; - -private: - int thread_schedstat_fd = -1; - int thread_stat_fd = -1; - int thread_io_fd = -1; - - /// This field is used for compatibility with TasksStatsCounters::incrementProfileEvents() - unsigned short stats_version = 1; -}; - -} -#endif +#pragma once + +#include <sys/types.h> +#include <boost/noncopyable.hpp> + + +#if defined(__linux__) +struct taskstats; + +namespace DB +{ +/// Provides several essential per-task metrics by reading data from Procfs (when available). +class ProcfsMetricsProvider : private boost::noncopyable +{ +public: + ProcfsMetricsProvider(const pid_t /*tid*/); + ~ProcfsMetricsProvider(); + + /// Updates only a part of taskstats struct's fields: + /// - cpu_run_virtual_total, cpu_delay_total (when /proc/thread-self/schedstat is available) + /// - blkio_delay_total (when /proc/thread-self/stat is available) + /// - rchar, wchar, read_bytes, write_bytes (when /prod/thread-self/io is available) + /// See: man procfs + void getTaskStats(::taskstats & out_stats) const; + + /// Tells whether this metrics (via Procfs) is provided on the current platform + static bool isAvailable() noexcept; + +private: + void readParseAndSetThreadCPUStat(::taskstats & out_stats, char *, size_t) const; + void readParseAndSetThreadBlkIOStat(::taskstats & out_stats, char *, size_t) const; + void readParseAndSetThreadIOStat(::taskstats & out_stats, char *, size_t) const; + +private: + int thread_schedstat_fd = -1; + int thread_stat_fd = -1; + int thread_io_fd = -1; + + /// This field is used for compatibility with TasksStatsCounters::incrementProfileEvents() + unsigned short stats_version = 1; +}; + +} +#endif diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h index b370272302..8830945957 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h @@ -1,72 +1,72 @@ -#pragma once - -#include <memory> -#include <vector> -#include <cstdint> - -namespace Poco -{ -namespace Util -{ - class AbstractConfiguration; -} -} - -/// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules - -/// It's used as a singleton via getInstance method - -/// Initially it's empty (nullptr) and after manual initialization -/// (one-time, done by setInstance call) it takes the proper value which -/// is stored in unique_ptr. - -/// It looks like the singleton is the best option here, as -/// two users of that object (OwnSplitChannel & Interpreters/executeQuery) +#pragma once + +#include <memory> +#include <vector> +#include <cstdint> + +namespace Poco +{ +namespace Util +{ + class AbstractConfiguration; +} +} + +/// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules + +/// It's used as a singleton via getInstance method + +/// Initially it's empty (nullptr) and after manual initialization +/// (one-time, done by setInstance call) it takes the proper value which +/// is stored in unique_ptr. + +/// It looks like the singleton is the best option here, as +/// two users of that object (OwnSplitChannel & Interpreters/executeQuery) /// can't own/share that Masker properly without synchronization & locks, -/// and we can't afford setting global locks for each logged line. - -/// I've considered singleton alternatives, but it's unclear who should own the object, -/// and it introduce unnecessary complexity in implementation (passing references back and forward): -/// -/// context can't own, as Context is destroyed before logger, -/// and logger lives longer and logging can still happen after Context destruction. -/// resetting masker in the logger at the moment of -/// context destruction can't be done w/o synchronization / locks in a safe manner. -/// -/// logger is Poco derived and i didn't want to brake it's interface, -/// also logger can be dynamically reconfigured without server restart, -/// and it actually recreates OwnSplitChannel when reconfiguration happen, -/// so that makes it's quite tricky. So it a bad candidate for owning masker too. - -namespace DB -{ -class SensitiveDataMasker -{ -private: - class MaskingRule; - std::vector<std::unique_ptr<MaskingRule>> all_masking_rules; - static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker; - -public: - SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); - ~SensitiveDataMasker(); - - /// Returns the number of matched rules. - size_t wipeSensitiveData(std::string & data) const; - - /// setInstance is not thread-safe and should be called once in single-thread mode. - /// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367 - static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_); - static SensitiveDataMasker * getInstance(); - - /// Used in tests. - void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string); - -#ifndef NDEBUG - void printStats(); -#endif - - size_t rulesCount() const; -}; - -}; +/// and we can't afford setting global locks for each logged line. + +/// I've considered singleton alternatives, but it's unclear who should own the object, +/// and it introduce unnecessary complexity in implementation (passing references back and forward): +/// +/// context can't own, as Context is destroyed before logger, +/// and logger lives longer and logging can still happen after Context destruction. +/// resetting masker in the logger at the moment of +/// context destruction can't be done w/o synchronization / locks in a safe manner. +/// +/// logger is Poco derived and i didn't want to brake it's interface, +/// also logger can be dynamically reconfigured without server restart, +/// and it actually recreates OwnSplitChannel when reconfiguration happen, +/// so that makes it's quite tricky. So it a bad candidate for owning masker too. + +namespace DB +{ +class SensitiveDataMasker +{ +private: + class MaskingRule; + std::vector<std::unique_ptr<MaskingRule>> all_masking_rules; + static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker; + +public: + SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); + ~SensitiveDataMasker(); + + /// Returns the number of matched rules. + size_t wipeSensitiveData(std::string & data) const; + + /// setInstance is not thread-safe and should be called once in single-thread mode. + /// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367 + static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_); + static SensitiveDataMasker * getInstance(); + + /// Used in tests. + void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string); + +#ifndef NDEBUG + void printStats(); +#endif + + size_t rulesCount() const; +}; + +}; diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp index 6b09a9741a..92978a0ad8 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp @@ -1,319 +1,319 @@ -#include "TaskStatsInfoGetter.h" -#include <Common/Exception.h> +#include "TaskStatsInfoGetter.h" +#include <Common/Exception.h> #include <common/types.h> - -#include <unistd.h> - -#if defined(OS_LINUX) - -#include "hasLinuxCapability.h" -#include <common/unaligned.h> - -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/socket.h> -#include <linux/genetlink.h> -#include <linux/netlink.h> -#include <linux/taskstats.h> -#include <linux/capability.h> - -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -#endif - -/// Basic idea is motivated by "iotop" tool. -/// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NETLINK_ERROR; -} - -// Replace NLMSG_OK with explicit casts since that system macro contains signedness bugs which are not going to be fixed. -static inline bool is_nlmsg_ok(const struct nlmsghdr * const nlh, const ssize_t len) -{ - return len >= static_cast<ssize_t>(sizeof(*nlh)) && nlh->nlmsg_len >= sizeof(*nlh) && static_cast<size_t>(len) >= nlh->nlmsg_len; -} - -namespace -{ - - -/** The message contains: - * - Netlink protocol header; - * - Generic Netlink (is a sub-protocol of Netlink that we use) protocol header; - * - Payload - * -- that itself is a list of "Attributes" (sub-messages), each of them contains length (including header), type, and its own payload. - * -- and attribute payload may be represented by the list of embedded attributes. - */ -struct NetlinkMessage -{ - static size_t constexpr MAX_MSG_SIZE = 1024; - - alignas(NLMSG_ALIGNTO) ::nlmsghdr header; - - struct Attribute - { - ::nlattr header; - - alignas(NLMSG_ALIGNTO) char payload[0]; - - const Attribute * next() const - { - return reinterpret_cast<const Attribute *>(reinterpret_cast<const char *>(this) + NLA_ALIGN(header.nla_len)); - } - }; - - union alignas(NLMSG_ALIGNTO) - { - struct - { - ::genlmsghdr generic_header; - - union alignas(NLMSG_ALIGNTO) - { - char buf[MAX_MSG_SIZE]; - Attribute attribute; /// First attribute. There may be more. - } payload; - }; - - ::nlmsgerr error; - }; - - const Attribute * end() const - { - return reinterpret_cast<const Attribute *>(reinterpret_cast<const char *>(this) + header.nlmsg_len); - } - - void send(int fd) const - { - const char * request_buf = reinterpret_cast<const char *>(this); - ssize_t request_size = header.nlmsg_len; - - union - { - ::sockaddr_nl nladdr{}; - ::sockaddr sockaddr; - }; - - nladdr.nl_family = AF_NETLINK; - - while (true) - { - ssize_t bytes_sent = ::sendto(fd, request_buf, request_size, 0, &sockaddr, sizeof(nladdr)); - - if (bytes_sent <= 0) - { - if (errno == EAGAIN) - continue; - else - throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); - } - - if (bytes_sent > request_size) - throw Exception("Wrong result of sendto system call: bytes_sent is greater than request size", ErrorCodes::NETLINK_ERROR); - - if (bytes_sent == request_size) - break; - - request_buf += bytes_sent; - request_size -= bytes_sent; - } - } - - void receive(int fd) - { - ssize_t bytes_received = ::recv(fd, this, sizeof(*this), 0); - - if (header.nlmsg_type == NLMSG_ERROR) - throw Exception("Can't receive Netlink response: error " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR); - - if (!is_nlmsg_ok(&header, bytes_received)) - throw Exception("Can't receive Netlink response: wrong number of bytes received", ErrorCodes::NETLINK_ERROR); - } -}; - - -NetlinkMessage query( - int fd, - UInt16 type, - UInt32 pid, - UInt8 command, - UInt16 attribute_type, - const void * attribute_data, - int attribute_size) -{ - NetlinkMessage request{}; - - request.header.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); /// Length of both headers. - request.header.nlmsg_type = type; - request.header.nlmsg_flags = NLM_F_REQUEST; /// A request. - request.header.nlmsg_seq = 0; - request.header.nlmsg_pid = pid; - - request.generic_header.cmd = command; - request.generic_header.version = 1; - - request.payload.attribute.header.nla_type = attribute_type; - request.payload.attribute.header.nla_len = attribute_size + NLA_HDRLEN; - - memcpy(&request.payload.attribute.payload, attribute_data, attribute_size); - - request.header.nlmsg_len += NLMSG_ALIGN(request.payload.attribute.header.nla_len); - - request.send(fd); - - NetlinkMessage response; - response.receive(fd); - - return response; -} - - -UInt16 getFamilyIdImpl(int fd) -{ - NetlinkMessage answer = query(fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, TASKSTATS_GENL_NAME, strlen(TASKSTATS_GENL_NAME) + 1); - - /// NOTE Why the relevant info is located in the second attribute? - const NetlinkMessage::Attribute * attr = answer.payload.attribute.next(); - - if (attr->header.nla_type != CTRL_ATTR_FAMILY_ID) - throw Exception("Received wrong attribute as an answer to GET_FAMILY Netlink command", ErrorCodes::NETLINK_ERROR); - - return unalignedLoad<UInt16>(attr->payload); -} - - -bool checkPermissionsImpl() -{ - static bool res = hasLinuxCapability(CAP_NET_ADMIN); - if (!res) - return false; - - /// Check that we can successfully initialize TaskStatsInfoGetter. - /// It will ask about family id through Netlink. - /// On some LXC containers we have capability but we still cannot use Netlink. - - try - { - TaskStatsInfoGetter(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - return false; - } - - return true; -} - - -UInt16 getFamilyId(int fd) -{ - /// It is thread and exception safe since C++11 and even before. - static UInt16 res = getFamilyIdImpl(fd); - return res; -} - -} - - -bool TaskStatsInfoGetter::checkPermissions() -{ - static bool res = checkPermissionsImpl(); - return res; -} - - -TaskStatsInfoGetter::TaskStatsInfoGetter() -{ - netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); - if (netlink_socket_fd < 0) - throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); - - /// On some containerized environments, operation on Netlink socket could hang forever. - /// We set reasonably small timeout to overcome this issue. - - struct timeval tv; - tv.tv_sec = 0; - tv.tv_usec = 50000; - - if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<const char *>(&tv), sizeof(tv))) - throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); - - union - { - ::sockaddr_nl addr{}; - ::sockaddr sockaddr; - }; - addr.nl_family = AF_NETLINK; - - if (::bind(netlink_socket_fd, &sockaddr, sizeof(addr)) < 0) - throwFromErrno("Can't bind PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); - - taskstats_family_id = getFamilyId(netlink_socket_fd); -} - - -void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const -{ - NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid)); - - for (const NetlinkMessage::Attribute * attr = &answer.payload.attribute; - attr < answer.end(); - attr = attr->next()) - { - if (attr->header.nla_type == TASKSTATS_TYPE_AGGR_TGID || attr->header.nla_type == TASKSTATS_TYPE_AGGR_PID) - { - for (const NetlinkMessage::Attribute * nested_attr = reinterpret_cast<const NetlinkMessage::Attribute *>(attr->payload); - nested_attr < attr->next(); - nested_attr = nested_attr->next()) - { - if (nested_attr->header.nla_type == TASKSTATS_TYPE_STATS) - { - out_stats = unalignedLoad<::taskstats>(nested_attr->payload); - return; - } - } - } - } - - throw Exception("There is no TASKSTATS_TYPE_STATS attribute in the Netlink response", ErrorCodes::NETLINK_ERROR); -} - - -TaskStatsInfoGetter::~TaskStatsInfoGetter() -{ - if (netlink_socket_fd >= 0) - close(netlink_socket_fd); -} - -} - - -#else - -namespace DB -{ - -bool TaskStatsInfoGetter::checkPermissions() -{ - return false; -} - -TaskStatsInfoGetter::TaskStatsInfoGetter() = default; -TaskStatsInfoGetter::~TaskStatsInfoGetter() = default; - -void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) const -{ -} - -} - -#endif + +#include <unistd.h> + +#if defined(OS_LINUX) + +#include "hasLinuxCapability.h" +#include <common/unaligned.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <linux/genetlink.h> +#include <linux/netlink.h> +#include <linux/taskstats.h> +#include <linux/capability.h> + +#if defined(__clang__) + #pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#endif + +/// Basic idea is motivated by "iotop" tool. +/// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NETLINK_ERROR; +} + +// Replace NLMSG_OK with explicit casts since that system macro contains signedness bugs which are not going to be fixed. +static inline bool is_nlmsg_ok(const struct nlmsghdr * const nlh, const ssize_t len) +{ + return len >= static_cast<ssize_t>(sizeof(*nlh)) && nlh->nlmsg_len >= sizeof(*nlh) && static_cast<size_t>(len) >= nlh->nlmsg_len; +} + +namespace +{ + + +/** The message contains: + * - Netlink protocol header; + * - Generic Netlink (is a sub-protocol of Netlink that we use) protocol header; + * - Payload + * -- that itself is a list of "Attributes" (sub-messages), each of them contains length (including header), type, and its own payload. + * -- and attribute payload may be represented by the list of embedded attributes. + */ +struct NetlinkMessage +{ + static size_t constexpr MAX_MSG_SIZE = 1024; + + alignas(NLMSG_ALIGNTO) ::nlmsghdr header; + + struct Attribute + { + ::nlattr header; + + alignas(NLMSG_ALIGNTO) char payload[0]; + + const Attribute * next() const + { + return reinterpret_cast<const Attribute *>(reinterpret_cast<const char *>(this) + NLA_ALIGN(header.nla_len)); + } + }; + + union alignas(NLMSG_ALIGNTO) + { + struct + { + ::genlmsghdr generic_header; + + union alignas(NLMSG_ALIGNTO) + { + char buf[MAX_MSG_SIZE]; + Attribute attribute; /// First attribute. There may be more. + } payload; + }; + + ::nlmsgerr error; + }; + + const Attribute * end() const + { + return reinterpret_cast<const Attribute *>(reinterpret_cast<const char *>(this) + header.nlmsg_len); + } + + void send(int fd) const + { + const char * request_buf = reinterpret_cast<const char *>(this); + ssize_t request_size = header.nlmsg_len; + + union + { + ::sockaddr_nl nladdr{}; + ::sockaddr sockaddr; + }; + + nladdr.nl_family = AF_NETLINK; + + while (true) + { + ssize_t bytes_sent = ::sendto(fd, request_buf, request_size, 0, &sockaddr, sizeof(nladdr)); + + if (bytes_sent <= 0) + { + if (errno == EAGAIN) + continue; + else + throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); + } + + if (bytes_sent > request_size) + throw Exception("Wrong result of sendto system call: bytes_sent is greater than request size", ErrorCodes::NETLINK_ERROR); + + if (bytes_sent == request_size) + break; + + request_buf += bytes_sent; + request_size -= bytes_sent; + } + } + + void receive(int fd) + { + ssize_t bytes_received = ::recv(fd, this, sizeof(*this), 0); + + if (header.nlmsg_type == NLMSG_ERROR) + throw Exception("Can't receive Netlink response: error " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR); + + if (!is_nlmsg_ok(&header, bytes_received)) + throw Exception("Can't receive Netlink response: wrong number of bytes received", ErrorCodes::NETLINK_ERROR); + } +}; + + +NetlinkMessage query( + int fd, + UInt16 type, + UInt32 pid, + UInt8 command, + UInt16 attribute_type, + const void * attribute_data, + int attribute_size) +{ + NetlinkMessage request{}; + + request.header.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); /// Length of both headers. + request.header.nlmsg_type = type; + request.header.nlmsg_flags = NLM_F_REQUEST; /// A request. + request.header.nlmsg_seq = 0; + request.header.nlmsg_pid = pid; + + request.generic_header.cmd = command; + request.generic_header.version = 1; + + request.payload.attribute.header.nla_type = attribute_type; + request.payload.attribute.header.nla_len = attribute_size + NLA_HDRLEN; + + memcpy(&request.payload.attribute.payload, attribute_data, attribute_size); + + request.header.nlmsg_len += NLMSG_ALIGN(request.payload.attribute.header.nla_len); + + request.send(fd); + + NetlinkMessage response; + response.receive(fd); + + return response; +} + + +UInt16 getFamilyIdImpl(int fd) +{ + NetlinkMessage answer = query(fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, TASKSTATS_GENL_NAME, strlen(TASKSTATS_GENL_NAME) + 1); + + /// NOTE Why the relevant info is located in the second attribute? + const NetlinkMessage::Attribute * attr = answer.payload.attribute.next(); + + if (attr->header.nla_type != CTRL_ATTR_FAMILY_ID) + throw Exception("Received wrong attribute as an answer to GET_FAMILY Netlink command", ErrorCodes::NETLINK_ERROR); + + return unalignedLoad<UInt16>(attr->payload); +} + + +bool checkPermissionsImpl() +{ + static bool res = hasLinuxCapability(CAP_NET_ADMIN); + if (!res) + return false; + + /// Check that we can successfully initialize TaskStatsInfoGetter. + /// It will ask about family id through Netlink. + /// On some LXC containers we have capability but we still cannot use Netlink. + + try + { + TaskStatsInfoGetter(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + return false; + } + + return true; +} + + +UInt16 getFamilyId(int fd) +{ + /// It is thread and exception safe since C++11 and even before. + static UInt16 res = getFamilyIdImpl(fd); + return res; +} + +} + + +bool TaskStatsInfoGetter::checkPermissions() +{ + static bool res = checkPermissionsImpl(); + return res; +} + + +TaskStatsInfoGetter::TaskStatsInfoGetter() +{ + netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (netlink_socket_fd < 0) + throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); + + /// On some containerized environments, operation on Netlink socket could hang forever. + /// We set reasonably small timeout to overcome this issue. + + struct timeval tv; + tv.tv_sec = 0; + tv.tv_usec = 50000; + + if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<const char *>(&tv), sizeof(tv))) + throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); + + union + { + ::sockaddr_nl addr{}; + ::sockaddr sockaddr; + }; + addr.nl_family = AF_NETLINK; + + if (::bind(netlink_socket_fd, &sockaddr, sizeof(addr)) < 0) + throwFromErrno("Can't bind PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); + + taskstats_family_id = getFamilyId(netlink_socket_fd); +} + + +void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const +{ + NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid)); + + for (const NetlinkMessage::Attribute * attr = &answer.payload.attribute; + attr < answer.end(); + attr = attr->next()) + { + if (attr->header.nla_type == TASKSTATS_TYPE_AGGR_TGID || attr->header.nla_type == TASKSTATS_TYPE_AGGR_PID) + { + for (const NetlinkMessage::Attribute * nested_attr = reinterpret_cast<const NetlinkMessage::Attribute *>(attr->payload); + nested_attr < attr->next(); + nested_attr = nested_attr->next()) + { + if (nested_attr->header.nla_type == TASKSTATS_TYPE_STATS) + { + out_stats = unalignedLoad<::taskstats>(nested_attr->payload); + return; + } + } + } + } + + throw Exception("There is no TASKSTATS_TYPE_STATS attribute in the Netlink response", ErrorCodes::NETLINK_ERROR); +} + + +TaskStatsInfoGetter::~TaskStatsInfoGetter() +{ + if (netlink_socket_fd >= 0) + close(netlink_socket_fd); +} + +} + + +#else + +namespace DB +{ + +bool TaskStatsInfoGetter::checkPermissions() +{ + return false; +} + +TaskStatsInfoGetter::TaskStatsInfoGetter() = default; +TaskStatsInfoGetter::~TaskStatsInfoGetter() = default; + +void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) const +{ +} + +} + +#endif diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h index 2141ec63f0..00ecf91c47 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h @@ -1,31 +1,31 @@ -#pragma once - -#include <sys/types.h> +#pragma once + +#include <sys/types.h> #include <common/types.h> -#include <boost/noncopyable.hpp> - -struct taskstats; - -namespace DB -{ - -/// Get taskstat info from OS kernel via Netlink protocol. -class TaskStatsInfoGetter : private boost::noncopyable -{ -public: - TaskStatsInfoGetter(); - ~TaskStatsInfoGetter(); - - void getStat(::taskstats & out_stats, pid_t tid) const; - +#include <boost/noncopyable.hpp> + +struct taskstats; + +namespace DB +{ + +/// Get taskstat info from OS kernel via Netlink protocol. +class TaskStatsInfoGetter : private boost::noncopyable +{ +public: + TaskStatsInfoGetter(); + ~TaskStatsInfoGetter(); + + void getStat(::taskstats & out_stats, pid_t tid) const; + /// Whether the current process has permissions (sudo or cap_net_admin capabilities) to get taskstats info - static bool checkPermissions(); - -#if defined(OS_LINUX) -private: - int netlink_socket_fd = -1; - UInt16 taskstats_family_id = 0; -#endif -}; - -} + static bool checkPermissions(); + +#if defined(OS_LINUX) +private: + int netlink_socket_fd = -1; + UInt16 taskstats_family_id = 0; +#endif +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp index dba6d3b057..7b69bf766d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp @@ -1,576 +1,576 @@ -#include "ThreadProfileEvents.h" - -#if defined(__linux__) - -#include "TaskStatsInfoGetter.h" -#include "ProcfsMetricsProvider.h" -#include "hasLinuxCapability.h" - -#include <filesystem> -#include <fstream> -#include <optional> -#include <sstream> -#include <unordered_set> - -#include <fcntl.h> -#include <unistd.h> -#include <linux/perf_event.h> -#include <syscall.h> -#include <sys/ioctl.h> -#include <cerrno> -#include <sys/types.h> -#include <dirent.h> - +#include "ThreadProfileEvents.h" + +#if defined(__linux__) + +#include "TaskStatsInfoGetter.h" +#include "ProcfsMetricsProvider.h" +#include "hasLinuxCapability.h" + +#include <filesystem> +#include <fstream> +#include <optional> +#include <sstream> +#include <unordered_set> + +#include <fcntl.h> +#include <unistd.h> +#include <linux/perf_event.h> +#include <syscall.h> +#include <sys/ioctl.h> +#include <cerrno> +#include <sys/types.h> +#include <dirent.h> + #include <common/errnoToString.h> -namespace DB -{ - -bool TasksStatsCounters::checkIfAvailable() -{ - return findBestAvailableProvider() != MetricsProvider::None; -} - -std::unique_ptr<TasksStatsCounters> TasksStatsCounters::create(const UInt64 tid) -{ - std::unique_ptr<TasksStatsCounters> instance; - if (checkIfAvailable()) - instance.reset(new TasksStatsCounters(tid, findBestAvailableProvider())); - return instance; -} - -TasksStatsCounters::MetricsProvider TasksStatsCounters::findBestAvailableProvider() -{ - /// This initialization is thread-safe and executed once since C++11 - static std::optional<MetricsProvider> provider = - []() -> MetricsProvider - { - if (TaskStatsInfoGetter::checkPermissions()) - { - return MetricsProvider::Netlink; - } - else if (ProcfsMetricsProvider::isAvailable()) - { - return MetricsProvider::Procfs; - } - return MetricsProvider::None; - }(); - - return *provider; -} - - -TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider provider) -{ - switch (provider) - { - case MetricsProvider::Netlink: - stats_getter = [metrics_provider = std::make_shared<TaskStatsInfoGetter>(), tid]() - { +namespace DB +{ + +bool TasksStatsCounters::checkIfAvailable() +{ + return findBestAvailableProvider() != MetricsProvider::None; +} + +std::unique_ptr<TasksStatsCounters> TasksStatsCounters::create(const UInt64 tid) +{ + std::unique_ptr<TasksStatsCounters> instance; + if (checkIfAvailable()) + instance.reset(new TasksStatsCounters(tid, findBestAvailableProvider())); + return instance; +} + +TasksStatsCounters::MetricsProvider TasksStatsCounters::findBestAvailableProvider() +{ + /// This initialization is thread-safe and executed once since C++11 + static std::optional<MetricsProvider> provider = + []() -> MetricsProvider + { + if (TaskStatsInfoGetter::checkPermissions()) + { + return MetricsProvider::Netlink; + } + else if (ProcfsMetricsProvider::isAvailable()) + { + return MetricsProvider::Procfs; + } + return MetricsProvider::None; + }(); + + return *provider; +} + + +TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider provider) +{ + switch (provider) + { + case MetricsProvider::Netlink: + stats_getter = [metrics_provider = std::make_shared<TaskStatsInfoGetter>(), tid]() + { ::taskstats result{}; - metrics_provider->getStat(result, tid); - return result; - }; - break; - case MetricsProvider::Procfs: - stats_getter = [metrics_provider = std::make_shared<ProcfsMetricsProvider>(tid)]() - { + metrics_provider->getStat(result, tid); + return result; + }; + break; + case MetricsProvider::Procfs: + stats_getter = [metrics_provider = std::make_shared<ProcfsMetricsProvider>(tid)]() + { ::taskstats result{}; - metrics_provider->getTaskStats(result); - return result; - }; - break; - case MetricsProvider::None: - ; - } -} - -void TasksStatsCounters::reset() -{ - if (stats_getter) - stats = stats_getter(); -} - -void TasksStatsCounters::updateCounters(ProfileEvents::Counters & profile_events) -{ - if (!stats_getter) - return; - - const auto new_stats = stats_getter(); - incrementProfileEvents(stats, new_stats, profile_events); - stats = new_stats; -} - -void TasksStatsCounters::incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events) -{ - profile_events.increment(ProfileEvents::OSCPUWaitMicroseconds, - safeDiff(prev.cpu_delay_total, curr.cpu_delay_total) / 1000U); - profile_events.increment(ProfileEvents::OSIOWaitMicroseconds, - safeDiff(prev.blkio_delay_total, curr.blkio_delay_total) / 1000U); - profile_events.increment(ProfileEvents::OSCPUVirtualTimeMicroseconds, - safeDiff(prev.cpu_run_virtual_total, curr.cpu_run_virtual_total) / 1000U); - - /// Since TASKSTATS_VERSION = 3 extended accounting and IO accounting is available. - if (curr.version < 3) - return; - - profile_events.increment(ProfileEvents::OSReadChars, safeDiff(prev.read_char, curr.read_char)); - profile_events.increment(ProfileEvents::OSWriteChars, safeDiff(prev.write_char, curr.write_char)); - profile_events.increment(ProfileEvents::OSReadBytes, safeDiff(prev.read_bytes, curr.read_bytes)); - profile_events.increment(ProfileEvents::OSWriteBytes, safeDiff(prev.write_bytes, curr.write_bytes)); -} - -} - -#endif - + metrics_provider->getTaskStats(result); + return result; + }; + break; + case MetricsProvider::None: + ; + } +} + +void TasksStatsCounters::reset() +{ + if (stats_getter) + stats = stats_getter(); +} + +void TasksStatsCounters::updateCounters(ProfileEvents::Counters & profile_events) +{ + if (!stats_getter) + return; + + const auto new_stats = stats_getter(); + incrementProfileEvents(stats, new_stats, profile_events); + stats = new_stats; +} + +void TasksStatsCounters::incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events) +{ + profile_events.increment(ProfileEvents::OSCPUWaitMicroseconds, + safeDiff(prev.cpu_delay_total, curr.cpu_delay_total) / 1000U); + profile_events.increment(ProfileEvents::OSIOWaitMicroseconds, + safeDiff(prev.blkio_delay_total, curr.blkio_delay_total) / 1000U); + profile_events.increment(ProfileEvents::OSCPUVirtualTimeMicroseconds, + safeDiff(prev.cpu_run_virtual_total, curr.cpu_run_virtual_total) / 1000U); + + /// Since TASKSTATS_VERSION = 3 extended accounting and IO accounting is available. + if (curr.version < 3) + return; + + profile_events.increment(ProfileEvents::OSReadChars, safeDiff(prev.read_char, curr.read_char)); + profile_events.increment(ProfileEvents::OSWriteChars, safeDiff(prev.write_char, curr.write_char)); + profile_events.increment(ProfileEvents::OSReadBytes, safeDiff(prev.read_bytes, curr.read_bytes)); + profile_events.increment(ProfileEvents::OSWriteBytes, safeDiff(prev.write_bytes, curr.write_bytes)); +} + +} + +#endif + #if defined(__linux__) - -namespace DB -{ - -thread_local PerfEventsCounters current_thread_counters; - -#define SOFTWARE_EVENT(PERF_NAME, LOCAL_NAME) \ - PerfEventInfo \ - { \ - .event_type = perf_type_id::PERF_TYPE_SOFTWARE, \ - .event_config = (PERF_NAME), \ - .profile_event = ProfileEvents::LOCAL_NAME, \ - .settings_name = #LOCAL_NAME \ - } - -#define HARDWARE_EVENT(PERF_NAME, LOCAL_NAME) \ - PerfEventInfo \ - { \ - .event_type = perf_type_id::PERF_TYPE_HARDWARE, \ - .event_config = (PERF_NAME), \ - .profile_event = ProfileEvents::LOCAL_NAME, \ - .settings_name = #LOCAL_NAME \ - } - -// One event for cache accesses and one for cache misses. -// Type is ACCESS or MISS -#define CACHE_EVENT(PERF_NAME, LOCAL_NAME, TYPE) \ - PerfEventInfo \ - { \ - .event_type = perf_type_id::PERF_TYPE_HW_CACHE, \ - .event_config = (PERF_NAME) \ - | (PERF_COUNT_HW_CACHE_OP_READ << 8) \ - | (PERF_COUNT_HW_CACHE_RESULT_ ## TYPE << 16), \ - .profile_event = ProfileEvents::LOCAL_NAME, \ - .settings_name = #LOCAL_NAME \ - } - -// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html -static const PerfEventInfo raw_events_info[] = { - HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles), - HARDWARE_EVENT(PERF_COUNT_HW_INSTRUCTIONS, PerfInstructions), - HARDWARE_EVENT(PERF_COUNT_HW_CACHE_REFERENCES, PerfCacheReferences), - HARDWARE_EVENT(PERF_COUNT_HW_CACHE_MISSES, PerfCacheMisses), - HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PerfBranchInstructions), - HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_MISSES, PerfBranchMisses), - HARDWARE_EVENT(PERF_COUNT_HW_BUS_CYCLES, PerfBusCycles), - HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, PerfStalledCyclesFrontend), - HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, PerfStalledCyclesBackend), - HARDWARE_EVENT(PERF_COUNT_HW_REF_CPU_CYCLES, PerfRefCpuCycles), - - // `cpu-clock` is a bit broken according to this: https://stackoverflow.com/a/56967896 - SOFTWARE_EVENT(PERF_COUNT_SW_CPU_CLOCK, PerfCpuClock), - SOFTWARE_EVENT(PERF_COUNT_SW_TASK_CLOCK, PerfTaskClock), - SOFTWARE_EVENT(PERF_COUNT_SW_CONTEXT_SWITCHES, PerfContextSwitches), - SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations), - SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults), - SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults), - - // Don't add them -- they are the same as SoftPageFaults and HardPageFaults, - // match well numerically. - // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor), - // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor), - - CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS), - CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS), - - // Apparently it doesn't make sense to treat these values as relative: - // https://stackoverflow.com/questions/49933319/how-to-interpret-perf-itlb-loads-itlb-load-misses - CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS), - CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBMisses, MISS), + +namespace DB +{ + +thread_local PerfEventsCounters current_thread_counters; + +#define SOFTWARE_EVENT(PERF_NAME, LOCAL_NAME) \ + PerfEventInfo \ + { \ + .event_type = perf_type_id::PERF_TYPE_SOFTWARE, \ + .event_config = (PERF_NAME), \ + .profile_event = ProfileEvents::LOCAL_NAME, \ + .settings_name = #LOCAL_NAME \ + } + +#define HARDWARE_EVENT(PERF_NAME, LOCAL_NAME) \ + PerfEventInfo \ + { \ + .event_type = perf_type_id::PERF_TYPE_HARDWARE, \ + .event_config = (PERF_NAME), \ + .profile_event = ProfileEvents::LOCAL_NAME, \ + .settings_name = #LOCAL_NAME \ + } + +// One event for cache accesses and one for cache misses. +// Type is ACCESS or MISS +#define CACHE_EVENT(PERF_NAME, LOCAL_NAME, TYPE) \ + PerfEventInfo \ + { \ + .event_type = perf_type_id::PERF_TYPE_HW_CACHE, \ + .event_config = (PERF_NAME) \ + | (PERF_COUNT_HW_CACHE_OP_READ << 8) \ + | (PERF_COUNT_HW_CACHE_RESULT_ ## TYPE << 16), \ + .profile_event = ProfileEvents::LOCAL_NAME, \ + .settings_name = #LOCAL_NAME \ + } + +// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html +static const PerfEventInfo raw_events_info[] = { + HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles), + HARDWARE_EVENT(PERF_COUNT_HW_INSTRUCTIONS, PerfInstructions), + HARDWARE_EVENT(PERF_COUNT_HW_CACHE_REFERENCES, PerfCacheReferences), + HARDWARE_EVENT(PERF_COUNT_HW_CACHE_MISSES, PerfCacheMisses), + HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PerfBranchInstructions), + HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_MISSES, PerfBranchMisses), + HARDWARE_EVENT(PERF_COUNT_HW_BUS_CYCLES, PerfBusCycles), + HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, PerfStalledCyclesFrontend), + HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, PerfStalledCyclesBackend), + HARDWARE_EVENT(PERF_COUNT_HW_REF_CPU_CYCLES, PerfRefCpuCycles), + + // `cpu-clock` is a bit broken according to this: https://stackoverflow.com/a/56967896 + SOFTWARE_EVENT(PERF_COUNT_SW_CPU_CLOCK, PerfCpuClock), + SOFTWARE_EVENT(PERF_COUNT_SW_TASK_CLOCK, PerfTaskClock), + SOFTWARE_EVENT(PERF_COUNT_SW_CONTEXT_SWITCHES, PerfContextSwitches), + SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations), + SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults), + SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults), + + // Don't add them -- they are the same as SoftPageFaults and HardPageFaults, + // match well numerically. + // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor), + // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor), + + CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS), + + // Apparently it doesn't make sense to treat these values as relative: + // https://stackoverflow.com/questions/49933319/how-to-interpret-perf-itlb-loads-itlb-load-misses + CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBMisses, MISS), CACHE_EVENT(PERF_COUNT_HW_CACHE_NODE, PerfLocalMemoryReferences, ACCESS), CACHE_EVENT(PERF_COUNT_HW_CACHE_NODE, PerfLocalMemoryMisses, MISS), -}; - -static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS); - -#undef HARDWARE_EVENT -#undef SOFTWARE_EVENT +}; + +static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS); + +#undef HARDWARE_EVENT +#undef SOFTWARE_EVENT #undef CACHE_EVENT - -// A map of event name -> event index, to parse event list in settings. -static std::unordered_map<std::string, size_t> populateEventMap() -{ - std::unordered_map<std::string, size_t> name_to_index; - name_to_index.reserve(NUMBER_OF_RAW_EVENTS); - - for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) - { - name_to_index.emplace(raw_events_info[i].settings_name, i); - } - - return name_to_index; -} - -static const auto event_name_to_index = populateEventMap(); - -static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, UInt64 flags) -{ - return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags)); -} - -static int openPerfEventDisabled(Int32 perf_event_paranoid, bool has_cap_sys_admin, UInt32 perf_event_type, UInt64 perf_event_config) -{ - perf_event_attr pe{}; - pe.type = perf_event_type; - pe.size = sizeof(struct perf_event_attr); - pe.config = perf_event_config; - // disable by default to add as little extra time as possible - pe.disabled = 1; - // can record kernel only when `perf_event_paranoid` <= 1 or have CAP_SYS_ADMIN - pe.exclude_kernel = perf_event_paranoid >= 2 && !has_cap_sys_admin; - pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; - - return openPerfEvent(&pe, /* measure the calling thread */ 0, /* on any cpu */ -1, -1, 0); -} - -static void enablePerfEvent(int event_fd) -{ - if (ioctl(event_fd, PERF_EVENT_IOC_ENABLE, 0)) - { - LOG_WARNING(&Poco::Logger::get("PerfEvents"), - "Can't enable perf event with file descriptor {}: '{}' ({})", + +// A map of event name -> event index, to parse event list in settings. +static std::unordered_map<std::string, size_t> populateEventMap() +{ + std::unordered_map<std::string, size_t> name_to_index; + name_to_index.reserve(NUMBER_OF_RAW_EVENTS); + + for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) + { + name_to_index.emplace(raw_events_info[i].settings_name, i); + } + + return name_to_index; +} + +static const auto event_name_to_index = populateEventMap(); + +static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, UInt64 flags) +{ + return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags)); +} + +static int openPerfEventDisabled(Int32 perf_event_paranoid, bool has_cap_sys_admin, UInt32 perf_event_type, UInt64 perf_event_config) +{ + perf_event_attr pe{}; + pe.type = perf_event_type; + pe.size = sizeof(struct perf_event_attr); + pe.config = perf_event_config; + // disable by default to add as little extra time as possible + pe.disabled = 1; + // can record kernel only when `perf_event_paranoid` <= 1 or have CAP_SYS_ADMIN + pe.exclude_kernel = perf_event_paranoid >= 2 && !has_cap_sys_admin; + pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; + + return openPerfEvent(&pe, /* measure the calling thread */ 0, /* on any cpu */ -1, -1, 0); +} + +static void enablePerfEvent(int event_fd) +{ + if (ioctl(event_fd, PERF_EVENT_IOC_ENABLE, 0)) + { + LOG_WARNING(&Poco::Logger::get("PerfEvents"), + "Can't enable perf event with file descriptor {}: '{}' ({})", event_fd, errnoToString(errno), errno); - } -} - -static void disablePerfEvent(int event_fd) -{ - if (ioctl(event_fd, PERF_EVENT_IOC_DISABLE, 0)) - { - LOG_WARNING(&Poco::Logger::get("PerfEvents"), - "Can't disable perf event with file descriptor {}: '{}' ({})", + } +} + +static void disablePerfEvent(int event_fd) +{ + if (ioctl(event_fd, PERF_EVENT_IOC_DISABLE, 0)) + { + LOG_WARNING(&Poco::Logger::get("PerfEvents"), + "Can't disable perf event with file descriptor {}: '{}' ({})", event_fd, errnoToString(errno), errno); - } -} - -static void releasePerfEvent(int event_fd) -{ - if (close(event_fd)) - { - LOG_WARNING(&Poco::Logger::get("PerfEvents"), - "Can't close perf event file descriptor {}: {} ({})", + } +} + +static void releasePerfEvent(int event_fd) +{ + if (close(event_fd)) + { + LOG_WARNING(&Poco::Logger::get("PerfEvents"), + "Can't close perf event file descriptor {}: {} ({})", event_fd, errnoToString(errno), errno); - } -} - -static bool validatePerfEventDescriptor(int & fd) -{ - if (fcntl(fd, F_GETFL) != -1) - return true; - - if (errno == EBADF) - { - LOG_WARNING(&Poco::Logger::get("PerfEvents"), - "Event descriptor {} was closed from the outside; reopening", fd); - } - else - { - LOG_WARNING(&Poco::Logger::get("PerfEvents"), - "Error while checking availability of event descriptor {}: {} ({})", + } +} + +static bool validatePerfEventDescriptor(int & fd) +{ + if (fcntl(fd, F_GETFL) != -1) + return true; + + if (errno == EBADF) + { + LOG_WARNING(&Poco::Logger::get("PerfEvents"), + "Event descriptor {} was closed from the outside; reopening", fd); + } + else + { + LOG_WARNING(&Poco::Logger::get("PerfEvents"), + "Error while checking availability of event descriptor {}: {} ({})", fd, errnoToString(errno), errno); - - disablePerfEvent(fd); - releasePerfEvent(fd); - } - - fd = -1; - return false; -} - -bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_events_list) -{ - const auto valid_event_indices = eventIndicesFromString(needed_events_list); - - // find state changes (if there are any) - bool old_state[NUMBER_OF_RAW_EVENTS]; - for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) - old_state[i] = thread_events_descriptors_holder.descriptors[i] != -1; - - bool new_state[NUMBER_OF_RAW_EVENTS]; - std::fill_n(new_state, NUMBER_OF_RAW_EVENTS, false); - for (size_t opened_index : valid_event_indices) - new_state[opened_index] = true; - - std::vector<size_t> events_to_open; - std::vector<size_t> events_to_release; - for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) - { - bool old_one = old_state[i]; - bool new_one = new_state[i]; - - if (old_one == new_one) - { - if (old_one - && !validatePerfEventDescriptor( - thread_events_descriptors_holder.descriptors[i])) - { - events_to_open.push_back(i); - } - continue; - } - - if (new_one) - events_to_open.push_back(i); - else - events_to_release.push_back(i); - } - - // release unused descriptors - for (size_t i : events_to_release) - { - int & fd = thread_events_descriptors_holder.descriptors[i]; - disablePerfEvent(fd); - releasePerfEvent(fd); - fd = -1; - } - - if (events_to_open.empty()) - { - return true; - } - - // check permissions - // cat /proc/sys/kernel/perf_event_paranoid - // -1: Allow use of (almost) all events by all users - // >=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK - // >=1: Disallow CPU event access by users without CAP_SYS_ADMIN - // >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN - // >=3: Disallow all event access by users without CAP_SYS_ADMIN - Int32 perf_event_paranoid = 0; - std::ifstream paranoid_file("/proc/sys/kernel/perf_event_paranoid"); - paranoid_file >> perf_event_paranoid; - - bool has_cap_sys_admin = hasLinuxCapability(CAP_SYS_ADMIN); - if (perf_event_paranoid >= 3 && !has_cap_sys_admin) - { - LOG_WARNING(&Poco::Logger::get("PerfEvents"), - "Not enough permissions to record perf events: " - "perf_event_paranoid = {} and CAP_SYS_ADMIN = 0", - perf_event_paranoid); - return false; - } - - // Open descriptors for new events. - // Theoretically, we can run out of file descriptors. Threads go up to 10k, - // and there might be a dozen perf events per thread, so we're looking at - // 100k open files. In practice, this is not likely -- perf events are - // mostly used in performance tests or other kinds of testing, and the - // number of threads stays below hundred. - // We used to check the number of open files by enumerating /proc/self/fd, - // but listing all open files before opening more files is obviously - // quadratic, and quadraticity never ends well. - for (size_t i : events_to_open) - { - const PerfEventInfo & event_info = raw_events_info[i]; - int & fd = thread_events_descriptors_holder.descriptors[i]; - // disable by default to add as little extra time as possible - fd = openPerfEventDisabled(perf_event_paranoid, has_cap_sys_admin, event_info.event_type, event_info.event_config); - - if (fd == -1 && errno != ENOENT) - { - // ENOENT means that the event is not supported. Don't log it, because - // this is called for each thread and would be too verbose. Log other - // error codes because they might signify an error. - LOG_WARNING(&Poco::Logger::get("PerfEvents"), - "Failed to open perf event {} (event_type={}, event_config={}): " - "'{}' ({})", event_info.settings_name, event_info.event_type, + + disablePerfEvent(fd); + releasePerfEvent(fd); + } + + fd = -1; + return false; +} + +bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_events_list) +{ + const auto valid_event_indices = eventIndicesFromString(needed_events_list); + + // find state changes (if there are any) + bool old_state[NUMBER_OF_RAW_EVENTS]; + for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) + old_state[i] = thread_events_descriptors_holder.descriptors[i] != -1; + + bool new_state[NUMBER_OF_RAW_EVENTS]; + std::fill_n(new_state, NUMBER_OF_RAW_EVENTS, false); + for (size_t opened_index : valid_event_indices) + new_state[opened_index] = true; + + std::vector<size_t> events_to_open; + std::vector<size_t> events_to_release; + for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) + { + bool old_one = old_state[i]; + bool new_one = new_state[i]; + + if (old_one == new_one) + { + if (old_one + && !validatePerfEventDescriptor( + thread_events_descriptors_holder.descriptors[i])) + { + events_to_open.push_back(i); + } + continue; + } + + if (new_one) + events_to_open.push_back(i); + else + events_to_release.push_back(i); + } + + // release unused descriptors + for (size_t i : events_to_release) + { + int & fd = thread_events_descriptors_holder.descriptors[i]; + disablePerfEvent(fd); + releasePerfEvent(fd); + fd = -1; + } + + if (events_to_open.empty()) + { + return true; + } + + // check permissions + // cat /proc/sys/kernel/perf_event_paranoid + // -1: Allow use of (almost) all events by all users + // >=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK + // >=1: Disallow CPU event access by users without CAP_SYS_ADMIN + // >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN + // >=3: Disallow all event access by users without CAP_SYS_ADMIN + Int32 perf_event_paranoid = 0; + std::ifstream paranoid_file("/proc/sys/kernel/perf_event_paranoid"); + paranoid_file >> perf_event_paranoid; + + bool has_cap_sys_admin = hasLinuxCapability(CAP_SYS_ADMIN); + if (perf_event_paranoid >= 3 && !has_cap_sys_admin) + { + LOG_WARNING(&Poco::Logger::get("PerfEvents"), + "Not enough permissions to record perf events: " + "perf_event_paranoid = {} and CAP_SYS_ADMIN = 0", + perf_event_paranoid); + return false; + } + + // Open descriptors for new events. + // Theoretically, we can run out of file descriptors. Threads go up to 10k, + // and there might be a dozen perf events per thread, so we're looking at + // 100k open files. In practice, this is not likely -- perf events are + // mostly used in performance tests or other kinds of testing, and the + // number of threads stays below hundred. + // We used to check the number of open files by enumerating /proc/self/fd, + // but listing all open files before opening more files is obviously + // quadratic, and quadraticity never ends well. + for (size_t i : events_to_open) + { + const PerfEventInfo & event_info = raw_events_info[i]; + int & fd = thread_events_descriptors_holder.descriptors[i]; + // disable by default to add as little extra time as possible + fd = openPerfEventDisabled(perf_event_paranoid, has_cap_sys_admin, event_info.event_type, event_info.event_config); + + if (fd == -1 && errno != ENOENT) + { + // ENOENT means that the event is not supported. Don't log it, because + // this is called for each thread and would be too verbose. Log other + // error codes because they might signify an error. + LOG_WARNING(&Poco::Logger::get("PerfEvents"), + "Failed to open perf event {} (event_type={}, event_config={}): " + "'{}' ({})", event_info.settings_name, event_info.event_type, event_info.event_config, errnoToString(errno), errno); - } - } - - return true; -} - + } + } + + return true; +} + // Parse comma-separated list of event names. Empty means all available events. -std::vector<size_t> PerfEventsCounters::eventIndicesFromString(const std::string & events_list) -{ - std::vector<size_t> result; - result.reserve(NUMBER_OF_RAW_EVENTS); - - if (events_list.empty()) - { - for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) - { - result.push_back(i); - } - return result; - } - +std::vector<size_t> PerfEventsCounters::eventIndicesFromString(const std::string & events_list) +{ + std::vector<size_t> result; + result.reserve(NUMBER_OF_RAW_EVENTS); + + if (events_list.empty()) + { + for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) + { + result.push_back(i); + } + return result; + } + std::istringstream iss(events_list); // STYLE_CHECK_ALLOW_STD_STRING_STREAM - std::string event_name; - while (std::getline(iss, event_name, ',')) - { + std::string event_name; + while (std::getline(iss, event_name, ',')) + { // Allow spaces at the beginning of the token, so that you can write 'a, b'. - event_name.erase(0, event_name.find_first_not_of(' ')); - - auto entry = event_name_to_index.find(event_name); - if (entry != event_name_to_index.end()) - { - result.push_back(entry->second); - } - else - { - LOG_ERROR(&Poco::Logger::get("PerfEvents"), - "Unknown perf event name '{}' specified in settings", event_name); - } - } - - return result; -} - -void PerfEventsCounters::initializeProfileEvents(const std::string & events_list) -{ - if (!processThreadLocalChanges(events_list)) - return; - - for (int fd : thread_events_descriptors_holder.descriptors) - { - if (fd == -1) - continue; - - // We don't reset the event, because the time_running and time_enabled - // can't be reset anyway and we have to calculate deltas. - enablePerfEvent(fd); - } -} - -void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile_events) -{ - // Disable all perf events. - for (auto fd : thread_events_descriptors_holder.descriptors) - { - if (fd == -1) - continue; - disablePerfEvent(fd); - } - - // Read the counter values. - PerfEventValue current_values[NUMBER_OF_RAW_EVENTS]; - for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) - { - int fd = thread_events_descriptors_holder.descriptors[i]; - if (fd == -1) - continue; - - constexpr ssize_t bytes_to_read = sizeof(current_values[0]); - const int bytes_read = read(fd, ¤t_values[i], bytes_to_read); - - if (bytes_read != bytes_to_read) - { - LOG_WARNING(&Poco::Logger::get("PerfEvents"), - "Can't read event value from file descriptor {}: '{}' ({})", + event_name.erase(0, event_name.find_first_not_of(' ')); + + auto entry = event_name_to_index.find(event_name); + if (entry != event_name_to_index.end()) + { + result.push_back(entry->second); + } + else + { + LOG_ERROR(&Poco::Logger::get("PerfEvents"), + "Unknown perf event name '{}' specified in settings", event_name); + } + } + + return result; +} + +void PerfEventsCounters::initializeProfileEvents(const std::string & events_list) +{ + if (!processThreadLocalChanges(events_list)) + return; + + for (int fd : thread_events_descriptors_holder.descriptors) + { + if (fd == -1) + continue; + + // We don't reset the event, because the time_running and time_enabled + // can't be reset anyway and we have to calculate deltas. + enablePerfEvent(fd); + } +} + +void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile_events) +{ + // Disable all perf events. + for (auto fd : thread_events_descriptors_holder.descriptors) + { + if (fd == -1) + continue; + disablePerfEvent(fd); + } + + // Read the counter values. + PerfEventValue current_values[NUMBER_OF_RAW_EVENTS]; + for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) + { + int fd = thread_events_descriptors_holder.descriptors[i]; + if (fd == -1) + continue; + + constexpr ssize_t bytes_to_read = sizeof(current_values[0]); + const int bytes_read = read(fd, ¤t_values[i], bytes_to_read); + + if (bytes_read != bytes_to_read) + { + LOG_WARNING(&Poco::Logger::get("PerfEvents"), + "Can't read event value from file descriptor {}: '{}' ({})", fd, errnoToString(errno), errno); - current_values[i] = {}; - } - } - - // Actually process counters' values. Track the minimal time that a performance - // counter was enabled, and the corresponding running time, to give some idea - // about the amount of counter multiplexing. - UInt64 min_enabled_time = -1; - UInt64 running_time_for_min_enabled_time = 0; - - for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) - { - int fd = thread_events_descriptors_holder.descriptors[i]; - if (fd == -1) - continue; - - const PerfEventInfo & info = raw_events_info[i]; - const PerfEventValue & previous_value = previous_values[i]; - const PerfEventValue & current_value = current_values[i]; - - // Account for counter multiplexing. time_running and time_enabled are - // not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate - // deltas from old values. - const auto enabled = current_value.time_enabled - previous_value.time_enabled; - const auto running = current_value.time_running - previous_value.time_running; - const UInt64 delta = (current_value.value - previous_value.value) - * enabled / std::max(1.f, float(running)); - - if (min_enabled_time > enabled) - { - min_enabled_time = enabled; - running_time_for_min_enabled_time = running; - } - - profile_events.increment(info.profile_event, delta); - } - - // If we had at least one enabled event, also show multiplexing-related - // statistics. - if (min_enabled_time != UInt64(-1)) - { - profile_events.increment(ProfileEvents::PerfMinEnabledTime, - min_enabled_time); - profile_events.increment(ProfileEvents::PerfMinEnabledRunningTime, - running_time_for_min_enabled_time); - } - - // Store current counter values for the next profiling period. - memcpy(previous_values, current_values, sizeof(current_values)); -} - -void PerfEventsCounters::closeEventDescriptors() -{ - thread_events_descriptors_holder.releaseResources(); -} - -PerfDescriptorsHolder::PerfDescriptorsHolder() -{ - for (int & descriptor : descriptors) - descriptor = -1; -} - -PerfDescriptorsHolder::~PerfDescriptorsHolder() -{ - releaseResources(); -} - -void PerfDescriptorsHolder::releaseResources() -{ - for (int & descriptor : descriptors) - { - if (descriptor == -1) - continue; - - disablePerfEvent(descriptor); - releasePerfEvent(descriptor); - descriptor = -1; - } -} - -} - -#else - -namespace DB -{ - + current_values[i] = {}; + } + } + + // Actually process counters' values. Track the minimal time that a performance + // counter was enabled, and the corresponding running time, to give some idea + // about the amount of counter multiplexing. + UInt64 min_enabled_time = -1; + UInt64 running_time_for_min_enabled_time = 0; + + for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) + { + int fd = thread_events_descriptors_holder.descriptors[i]; + if (fd == -1) + continue; + + const PerfEventInfo & info = raw_events_info[i]; + const PerfEventValue & previous_value = previous_values[i]; + const PerfEventValue & current_value = current_values[i]; + + // Account for counter multiplexing. time_running and time_enabled are + // not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate + // deltas from old values. + const auto enabled = current_value.time_enabled - previous_value.time_enabled; + const auto running = current_value.time_running - previous_value.time_running; + const UInt64 delta = (current_value.value - previous_value.value) + * enabled / std::max(1.f, float(running)); + + if (min_enabled_time > enabled) + { + min_enabled_time = enabled; + running_time_for_min_enabled_time = running; + } + + profile_events.increment(info.profile_event, delta); + } + + // If we had at least one enabled event, also show multiplexing-related + // statistics. + if (min_enabled_time != UInt64(-1)) + { + profile_events.increment(ProfileEvents::PerfMinEnabledTime, + min_enabled_time); + profile_events.increment(ProfileEvents::PerfMinEnabledRunningTime, + running_time_for_min_enabled_time); + } + + // Store current counter values for the next profiling period. + memcpy(previous_values, current_values, sizeof(current_values)); +} + +void PerfEventsCounters::closeEventDescriptors() +{ + thread_events_descriptors_holder.releaseResources(); +} + +PerfDescriptorsHolder::PerfDescriptorsHolder() +{ + for (int & descriptor : descriptors) + descriptor = -1; +} + +PerfDescriptorsHolder::~PerfDescriptorsHolder() +{ + releaseResources(); +} + +void PerfDescriptorsHolder::releaseResources() +{ + for (int & descriptor : descriptors) + { + if (descriptor == -1) + continue; + + disablePerfEvent(descriptor); + releasePerfEvent(descriptor); + descriptor = -1; + } +} + +} + +#else + +namespace DB +{ + // the functionality is disabled when we are not running on Linux. -PerfEventsCounters current_thread_counters; - -} - -#endif +PerfEventsCounters current_thread_counters; + +} + +#endif diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h index 416f512687..cf5196f363 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h @@ -1,135 +1,135 @@ -#pragma once - +#pragma once + #include <common/types.h> -#include <Common/ProfileEvents.h> -#include <sys/time.h> -#include <sys/resource.h> -#include <pthread.h> -#include <common/logger_useful.h> - - -#if defined(__linux__) -#include <linux/taskstats.h> -#else -struct taskstats {}; -#endif - -/** Implement ProfileEvents with statistics about resource consumption of the current thread. - */ - -namespace ProfileEvents -{ - extern const Event RealTimeMicroseconds; - extern const Event UserTimeMicroseconds; - extern const Event SystemTimeMicroseconds; - extern const Event SoftPageFaults; - extern const Event HardPageFaults; - extern const Event VoluntaryContextSwitches; - extern const Event InvoluntaryContextSwitches; - -#if defined(__linux__) - extern const Event OSIOWaitMicroseconds; - extern const Event OSCPUWaitMicroseconds; - extern const Event OSCPUVirtualTimeMicroseconds; - extern const Event OSReadChars; - extern const Event OSWriteChars; - extern const Event OSReadBytes; - extern const Event OSWriteBytes; - - extern const Event PerfCpuCycles; - extern const Event PerfInstructions; - extern const Event PerfCacheReferences; - extern const Event PerfCacheMisses; - extern const Event PerfBranchInstructions; - extern const Event PerfBranchMisses; - extern const Event PerfBusCycles; - extern const Event PerfStalledCyclesFrontend; - extern const Event PerfStalledCyclesBackend; - extern const Event PerfRefCpuCycles; - - extern const Event PerfCpuClock; - extern const Event PerfTaskClock; - extern const Event PerfContextSwitches; - extern const Event PerfCpuMigrations; - extern const Event PerfAlignmentFaults; - extern const Event PerfEmulationFaults; - extern const Event PerfMinEnabledTime; - extern const Event PerfMinEnabledRunningTime; - extern const Event PerfDataTLBReferences; - extern const Event PerfDataTLBMisses; - extern const Event PerfInstructionTLBReferences; - extern const Event PerfInstructionTLBMisses; +#include <Common/ProfileEvents.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <pthread.h> +#include <common/logger_useful.h> + + +#if defined(__linux__) +#include <linux/taskstats.h> +#else +struct taskstats {}; +#endif + +/** Implement ProfileEvents with statistics about resource consumption of the current thread. + */ + +namespace ProfileEvents +{ + extern const Event RealTimeMicroseconds; + extern const Event UserTimeMicroseconds; + extern const Event SystemTimeMicroseconds; + extern const Event SoftPageFaults; + extern const Event HardPageFaults; + extern const Event VoluntaryContextSwitches; + extern const Event InvoluntaryContextSwitches; + +#if defined(__linux__) + extern const Event OSIOWaitMicroseconds; + extern const Event OSCPUWaitMicroseconds; + extern const Event OSCPUVirtualTimeMicroseconds; + extern const Event OSReadChars; + extern const Event OSWriteChars; + extern const Event OSReadBytes; + extern const Event OSWriteBytes; + + extern const Event PerfCpuCycles; + extern const Event PerfInstructions; + extern const Event PerfCacheReferences; + extern const Event PerfCacheMisses; + extern const Event PerfBranchInstructions; + extern const Event PerfBranchMisses; + extern const Event PerfBusCycles; + extern const Event PerfStalledCyclesFrontend; + extern const Event PerfStalledCyclesBackend; + extern const Event PerfRefCpuCycles; + + extern const Event PerfCpuClock; + extern const Event PerfTaskClock; + extern const Event PerfContextSwitches; + extern const Event PerfCpuMigrations; + extern const Event PerfAlignmentFaults; + extern const Event PerfEmulationFaults; + extern const Event PerfMinEnabledTime; + extern const Event PerfMinEnabledRunningTime; + extern const Event PerfDataTLBReferences; + extern const Event PerfDataTLBMisses; + extern const Event PerfInstructionTLBReferences; + extern const Event PerfInstructionTLBMisses; extern const Event PerfLocalMemoryReferences; extern const Event PerfLocalMemoryMisses; -#endif -} - -namespace DB -{ - -/// Handles overflow -template <typename TUInt> -inline TUInt safeDiff(TUInt prev, TUInt curr) -{ - return curr >= prev ? curr - prev : 0; -} - - -struct RUsageCounters -{ - /// In nanoseconds - UInt64 real_time = 0; - UInt64 user_time = 0; - UInt64 sys_time = 0; - - UInt64 soft_page_faults = 0; - UInt64 hard_page_faults = 0; - - RUsageCounters() = default; - RUsageCounters(const ::rusage & rusage_, UInt64 real_time_) - { - set(rusage_, real_time_); - } - - void set(const ::rusage & rusage, UInt64 real_time_) - { - real_time = real_time_; - user_time = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL; - sys_time = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL; - - soft_page_faults = static_cast<UInt64>(rusage.ru_minflt); - hard_page_faults = static_cast<UInt64>(rusage.ru_majflt); - } - +#endif +} + +namespace DB +{ + +/// Handles overflow +template <typename TUInt> +inline TUInt safeDiff(TUInt prev, TUInt curr) +{ + return curr >= prev ? curr - prev : 0; +} + + +struct RUsageCounters +{ + /// In nanoseconds + UInt64 real_time = 0; + UInt64 user_time = 0; + UInt64 sys_time = 0; + + UInt64 soft_page_faults = 0; + UInt64 hard_page_faults = 0; + + RUsageCounters() = default; + RUsageCounters(const ::rusage & rusage_, UInt64 real_time_) + { + set(rusage_, real_time_); + } + + void set(const ::rusage & rusage, UInt64 real_time_) + { + real_time = real_time_; + user_time = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL; + sys_time = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL; + + soft_page_faults = static_cast<UInt64>(rusage.ru_minflt); + hard_page_faults = static_cast<UInt64>(rusage.ru_majflt); + } + static RUsageCounters current() - { - ::rusage rusage {}; -#if !defined(__APPLE__) + { + ::rusage rusage {}; +#if !defined(__APPLE__) #if defined(OS_SUNOS) ::getrusage(RUSAGE_LWP, &rusage); #else - ::getrusage(RUSAGE_THREAD, &rusage); + ::getrusage(RUSAGE_THREAD, &rusage); #endif // OS_SUNOS #endif // __APPLE return RUsageCounters(rusage, getClockMonotonic()); - } - - static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events) - { - profile_events.increment(ProfileEvents::RealTimeMicroseconds, (curr.real_time - prev.real_time) / 1000U); - profile_events.increment(ProfileEvents::UserTimeMicroseconds, (curr.user_time - prev.user_time) / 1000U); - profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U); - - profile_events.increment(ProfileEvents::SoftPageFaults, curr.soft_page_faults - prev.soft_page_faults); - profile_events.increment(ProfileEvents::HardPageFaults, curr.hard_page_faults - prev.hard_page_faults); - } - - static void updateProfileEvents(RUsageCounters & last_counters, ProfileEvents::Counters & profile_events) - { - auto current_counters = current(); - incrementProfileEvents(last_counters, current_counters, profile_events); - last_counters = current_counters; - } + } + + static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events) + { + profile_events.increment(ProfileEvents::RealTimeMicroseconds, (curr.real_time - prev.real_time) / 1000U); + profile_events.increment(ProfileEvents::UserTimeMicroseconds, (curr.user_time - prev.user_time) / 1000U); + profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U); + + profile_events.increment(ProfileEvents::SoftPageFaults, curr.soft_page_faults - prev.soft_page_faults); + profile_events.increment(ProfileEvents::HardPageFaults, curr.hard_page_faults - prev.hard_page_faults); + } + + static void updateProfileEvents(RUsageCounters & last_counters, ProfileEvents::Counters & profile_events) + { + auto current_counters = current(); + incrementProfileEvents(last_counters, current_counters, profile_events); + last_counters = current_counters; + } private: static inline UInt64 getClockMonotonic() @@ -138,121 +138,121 @@ private: clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } -}; - +}; + #if defined(__linux__) - -struct PerfEventInfo -{ - // see perf_event.h/perf_type_id enum - int event_type; - // see configs in perf_event.h - int event_config; - ProfileEvents::Event profile_event; - std::string settings_name; -}; - -struct PerfEventValue -{ - UInt64 value = 0; - UInt64 time_enabled = 0; - UInt64 time_running = 0; -}; - + +struct PerfEventInfo +{ + // see perf_event.h/perf_type_id enum + int event_type; + // see configs in perf_event.h + int event_config; + ProfileEvents::Event profile_event; + std::string settings_name; +}; + +struct PerfEventValue +{ + UInt64 value = 0; + UInt64 time_enabled = 0; + UInt64 time_running = 0; +}; + static constexpr size_t NUMBER_OF_RAW_EVENTS = 22; - -struct PerfDescriptorsHolder : boost::noncopyable -{ - int descriptors[NUMBER_OF_RAW_EVENTS]{}; - - PerfDescriptorsHolder(); - - ~PerfDescriptorsHolder(); - - void releaseResources(); -}; - -struct PerfEventsCounters -{ - PerfDescriptorsHolder thread_events_descriptors_holder; - - // time_enabled and time_running can't be reset, so we have to store the - // data from the previous profiling period and calculate deltas to them, - // to be able to properly account for counter multiplexing. - PerfEventValue previous_values[NUMBER_OF_RAW_EVENTS]{}; - - - void initializeProfileEvents(const std::string & events_list); - void finalizeProfileEvents(ProfileEvents::Counters & profile_events); - void closeEventDescriptors(); - bool processThreadLocalChanges(const std::string & needed_events_list); - - - static std::vector<size_t> eventIndicesFromString(const std::string & events_list); -}; - -// Perf event creation is moderately heavy, so we create them once per thread and -// then reuse. -extern thread_local PerfEventsCounters current_thread_counters; - -#else - + +struct PerfDescriptorsHolder : boost::noncopyable +{ + int descriptors[NUMBER_OF_RAW_EVENTS]{}; + + PerfDescriptorsHolder(); + + ~PerfDescriptorsHolder(); + + void releaseResources(); +}; + +struct PerfEventsCounters +{ + PerfDescriptorsHolder thread_events_descriptors_holder; + + // time_enabled and time_running can't be reset, so we have to store the + // data from the previous profiling period and calculate deltas to them, + // to be able to properly account for counter multiplexing. + PerfEventValue previous_values[NUMBER_OF_RAW_EVENTS]{}; + + + void initializeProfileEvents(const std::string & events_list); + void finalizeProfileEvents(ProfileEvents::Counters & profile_events); + void closeEventDescriptors(); + bool processThreadLocalChanges(const std::string & needed_events_list); + + + static std::vector<size_t> eventIndicesFromString(const std::string & events_list); +}; + +// Perf event creation is moderately heavy, so we create them once per thread and +// then reuse. +extern thread_local PerfEventsCounters current_thread_counters; + +#else + // the functionality is disabled when we are not running on Linux. -struct PerfEventsCounters -{ - void initializeProfileEvents(const std::string & /* events_list */) {} - void finalizeProfileEvents(ProfileEvents::Counters & /* profile_events */) {} - void closeEventDescriptors() {} -}; - -extern PerfEventsCounters current_thread_counters; - -#endif - -#if defined(__linux__) - -class TasksStatsCounters -{ -public: - static bool checkIfAvailable(); - static std::unique_ptr<TasksStatsCounters> create(const UInt64 tid); - - void reset(); - void updateCounters(ProfileEvents::Counters & profile_events); - -private: - ::taskstats stats; //-V730_NOINIT - std::function<::taskstats()> stats_getter; - - enum class MetricsProvider - { - None, - Procfs, - Netlink - }; - -private: - explicit TasksStatsCounters(const UInt64 tid, const MetricsProvider provider); - - static MetricsProvider findBestAvailableProvider(); - static void incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events); -}; - -#else - -class TasksStatsCounters -{ -public: - static bool checkIfAvailable() { return false; } - static std::unique_ptr<TasksStatsCounters> create(const UInt64 /*tid*/) { return {}; } - - void reset() {} - void updateCounters(ProfileEvents::Counters &) {} - -private: - TasksStatsCounters(const UInt64 /*tid*/) {} -}; - -#endif - -} +struct PerfEventsCounters +{ + void initializeProfileEvents(const std::string & /* events_list */) {} + void finalizeProfileEvents(ProfileEvents::Counters & /* profile_events */) {} + void closeEventDescriptors() {} +}; + +extern PerfEventsCounters current_thread_counters; + +#endif + +#if defined(__linux__) + +class TasksStatsCounters +{ +public: + static bool checkIfAvailable(); + static std::unique_ptr<TasksStatsCounters> create(const UInt64 tid); + + void reset(); + void updateCounters(ProfileEvents::Counters & profile_events); + +private: + ::taskstats stats; //-V730_NOINIT + std::function<::taskstats()> stats_getter; + + enum class MetricsProvider + { + None, + Procfs, + Netlink + }; + +private: + explicit TasksStatsCounters(const UInt64 tid, const MetricsProvider provider); + + static MetricsProvider findBestAvailableProvider(); + static void incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events); +}; + +#else + +class TasksStatsCounters +{ +public: + static bool checkIfAvailable() { return false; } + static std::unique_ptr<TasksStatsCounters> create(const UInt64 /*tid*/) { return {}; } + + void reset() {} + void updateCounters(ProfileEvents::Counters &) {} + +private: + TasksStatsCounters(const UInt64 /*tid*/) {} +}; + +#endif + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h index cae14b7e96..33be285306 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h @@ -1,37 +1,37 @@ -#pragma once - -#include <common/types.h> -#include <future> -#include <memory> -#include <vector> -#include <Common/ZooKeeper/IKeeper.h> -#include <Poco/Event.h> - - -namespace zkutil -{ - -using Strings = std::vector<std::string>; - - -namespace CreateMode -{ - extern const int Persistent; - extern const int Ephemeral; - extern const int EphemeralSequential; - extern const int PersistentSequential; -} - -using EventPtr = std::shared_ptr<Poco::Event>; - -/// Gets multiple asynchronous results -/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse -template <typename R> -using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>; - -Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode); -Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version); -Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version); -Coordination::RequestPtr makeCheckRequest(const std::string & path, int version); - -} +#pragma once + +#include <common/types.h> +#include <future> +#include <memory> +#include <vector> +#include <Common/ZooKeeper/IKeeper.h> +#include <Poco/Event.h> + + +namespace zkutil +{ + +using Strings = std::vector<std::string>; + + +namespace CreateMode +{ + extern const int Persistent; + extern const int Ephemeral; + extern const int EphemeralSequential; + extern const int PersistentSequential; +} + +using EventPtr = std::shared_ptr<Poco::Event>; + +/// Gets multiple asynchronous results +/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse +template <typename R> +using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>; + +Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode); +Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version); +Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version); +Coordination::RequestPtr makeCheckRequest(const std::string & path, int version); + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp index 08305276a8..4f0c5efe68 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp @@ -1,164 +1,164 @@ -#include <Common/ProfileEvents.h> -#include <Common/ZooKeeper/IKeeper.h> - - -namespace DB -{ - namespace ErrorCodes - { - extern const int KEEPER_EXCEPTION; - } -} - -namespace ProfileEvents -{ - extern const Event ZooKeeperUserExceptions; - extern const Event ZooKeeperHardwareExceptions; - extern const Event ZooKeeperOtherExceptions; -} - - -namespace Coordination -{ - -Exception::Exception(const std::string & msg, const Error code_, int) - : DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION), code(code_) -{ - if (Coordination::isUserError(code)) - ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions); - else if (Coordination::isHardwareError(code)) - ProfileEvents::increment(ProfileEvents::ZooKeeperHardwareExceptions); - else - ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions); -} - -Exception::Exception(const std::string & msg, const Error code_) - : Exception(msg + " (" + errorMessage(code_) + ")", code_, 0) -{ -} - -Exception::Exception(const Error code_) - : Exception(errorMessage(code_), code_, 0) -{ -} - -Exception::Exception(const Error code_, const std::string & path) - : Exception(std::string{errorMessage(code_)} + ", path: " + path, code_, 0) -{ -} - -Exception::Exception(const Exception & exc) = default; - - -using namespace DB; - - -static void addRootPath(String & path, const String & root_path) -{ - if (path.empty()) - throw Exception("Path cannot be empty", Error::ZBADARGUMENTS); - - if (path[0] != '/') +#include <Common/ProfileEvents.h> +#include <Common/ZooKeeper/IKeeper.h> + + +namespace DB +{ + namespace ErrorCodes + { + extern const int KEEPER_EXCEPTION; + } +} + +namespace ProfileEvents +{ + extern const Event ZooKeeperUserExceptions; + extern const Event ZooKeeperHardwareExceptions; + extern const Event ZooKeeperOtherExceptions; +} + + +namespace Coordination +{ + +Exception::Exception(const std::string & msg, const Error code_, int) + : DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION), code(code_) +{ + if (Coordination::isUserError(code)) + ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions); + else if (Coordination::isHardwareError(code)) + ProfileEvents::increment(ProfileEvents::ZooKeeperHardwareExceptions); + else + ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions); +} + +Exception::Exception(const std::string & msg, const Error code_) + : Exception(msg + " (" + errorMessage(code_) + ")", code_, 0) +{ +} + +Exception::Exception(const Error code_) + : Exception(errorMessage(code_), code_, 0) +{ +} + +Exception::Exception(const Error code_, const std::string & path) + : Exception(std::string{errorMessage(code_)} + ", path: " + path, code_, 0) +{ +} + +Exception::Exception(const Exception & exc) = default; + + +using namespace DB; + + +static void addRootPath(String & path, const String & root_path) +{ + if (path.empty()) + throw Exception("Path cannot be empty", Error::ZBADARGUMENTS); + + if (path[0] != '/') throw Exception("Path must begin with /, got " + path, Error::ZBADARGUMENTS); - - if (root_path.empty()) - return; - - if (path.size() == 1) /// "/" - path = root_path; - else - path = root_path + path; -} - -static void removeRootPath(String & path, const String & root_path) -{ - if (root_path.empty()) - return; - - if (path.size() <= root_path.size()) - throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY); - - path = path.substr(root_path.size()); -} - - -const char * errorMessage(Error code) -{ - switch (code) - { - case Error::ZOK: return "Ok"; - case Error::ZSYSTEMERROR: return "System error"; - case Error::ZRUNTIMEINCONSISTENCY: return "Run time inconsistency"; - case Error::ZDATAINCONSISTENCY: return "Data inconsistency"; - case Error::ZCONNECTIONLOSS: return "Connection loss"; - case Error::ZMARSHALLINGERROR: return "Marshalling error"; - case Error::ZUNIMPLEMENTED: return "Unimplemented"; - case Error::ZOPERATIONTIMEOUT: return "Operation timeout"; - case Error::ZBADARGUMENTS: return "Bad arguments"; - case Error::ZINVALIDSTATE: return "Invalid zhandle state"; - case Error::ZAPIERROR: return "API error"; - case Error::ZNONODE: return "No node"; - case Error::ZNOAUTH: return "Not authenticated"; - case Error::ZBADVERSION: return "Bad version"; - case Error::ZNOCHILDRENFOREPHEMERALS: return "No children for ephemerals"; - case Error::ZNODEEXISTS: return "Node exists"; - case Error::ZNOTEMPTY: return "Not empty"; - case Error::ZSESSIONEXPIRED: return "Session expired"; - case Error::ZINVALIDCALLBACK: return "Invalid callback"; - case Error::ZINVALIDACL: return "Invalid ACL"; - case Error::ZAUTHFAILED: return "Authentication failed"; - case Error::ZCLOSING: return "ZooKeeper is closing"; - case Error::ZNOTHING: return "(not error) no server responses to process"; - case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; - } - - __builtin_unreachable(); -} - -bool isHardwareError(Error zk_return_code) -{ - return zk_return_code == Error::ZINVALIDSTATE - || zk_return_code == Error::ZSESSIONEXPIRED - || zk_return_code == Error::ZSESSIONMOVED - || zk_return_code == Error::ZCONNECTIONLOSS - || zk_return_code == Error::ZMARSHALLINGERROR - || zk_return_code == Error::ZOPERATIONTIMEOUT; -} - -bool isUserError(Error zk_return_code) -{ - return zk_return_code == Error::ZNONODE - || zk_return_code == Error::ZBADVERSION - || zk_return_code == Error::ZNOCHILDRENFOREPHEMERALS - || zk_return_code == Error::ZNODEEXISTS - || zk_return_code == Error::ZNOTEMPTY; -} - - -void CreateRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } -void RemoveRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } -void ExistsRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } -void GetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } -void SetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } -void ListRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } -void CheckRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } + + if (root_path.empty()) + return; + + if (path.size() == 1) /// "/" + path = root_path; + else + path = root_path + path; +} + +static void removeRootPath(String & path, const String & root_path) +{ + if (root_path.empty()) + return; + + if (path.size() <= root_path.size()) + throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY); + + path = path.substr(root_path.size()); +} + + +const char * errorMessage(Error code) +{ + switch (code) + { + case Error::ZOK: return "Ok"; + case Error::ZSYSTEMERROR: return "System error"; + case Error::ZRUNTIMEINCONSISTENCY: return "Run time inconsistency"; + case Error::ZDATAINCONSISTENCY: return "Data inconsistency"; + case Error::ZCONNECTIONLOSS: return "Connection loss"; + case Error::ZMARSHALLINGERROR: return "Marshalling error"; + case Error::ZUNIMPLEMENTED: return "Unimplemented"; + case Error::ZOPERATIONTIMEOUT: return "Operation timeout"; + case Error::ZBADARGUMENTS: return "Bad arguments"; + case Error::ZINVALIDSTATE: return "Invalid zhandle state"; + case Error::ZAPIERROR: return "API error"; + case Error::ZNONODE: return "No node"; + case Error::ZNOAUTH: return "Not authenticated"; + case Error::ZBADVERSION: return "Bad version"; + case Error::ZNOCHILDRENFOREPHEMERALS: return "No children for ephemerals"; + case Error::ZNODEEXISTS: return "Node exists"; + case Error::ZNOTEMPTY: return "Not empty"; + case Error::ZSESSIONEXPIRED: return "Session expired"; + case Error::ZINVALIDCALLBACK: return "Invalid callback"; + case Error::ZINVALIDACL: return "Invalid ACL"; + case Error::ZAUTHFAILED: return "Authentication failed"; + case Error::ZCLOSING: return "ZooKeeper is closing"; + case Error::ZNOTHING: return "(not error) no server responses to process"; + case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; + } + + __builtin_unreachable(); +} + +bool isHardwareError(Error zk_return_code) +{ + return zk_return_code == Error::ZINVALIDSTATE + || zk_return_code == Error::ZSESSIONEXPIRED + || zk_return_code == Error::ZSESSIONMOVED + || zk_return_code == Error::ZCONNECTIONLOSS + || zk_return_code == Error::ZMARSHALLINGERROR + || zk_return_code == Error::ZOPERATIONTIMEOUT; +} + +bool isUserError(Error zk_return_code) +{ + return zk_return_code == Error::ZNONODE + || zk_return_code == Error::ZBADVERSION + || zk_return_code == Error::ZNOCHILDRENFOREPHEMERALS + || zk_return_code == Error::ZNODEEXISTS + || zk_return_code == Error::ZNOTEMPTY; +} + + +void CreateRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } +void RemoveRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } +void ExistsRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } +void GetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } +void SetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } +void ListRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } +void CheckRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } void SetACLRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } void GetACLRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } - -void MultiRequest::addRootPath(const String & root_path) -{ - for (auto & request : requests) - request->addRootPath(root_path); -} - -void CreateResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path_created, root_path); } -void WatchResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path, root_path); } - -void MultiResponse::removeRootPath(const String & root_path) -{ - for (auto & response : responses) - response->removeRootPath(root_path); -} - -} - + +void MultiRequest::addRootPath(const String & root_path) +{ + for (auto & request : requests) + request->addRootPath(root_path); +} + +void CreateResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path_created, root_path); } +void WatchResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path, root_path); } + +void MultiResponse::removeRootPath(const String & root_path) +{ + for (auto & response : responses) + response->removeRootPath(root_path); +} + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h index 1a49a78caf..30d816aad1 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h @@ -1,153 +1,153 @@ -#pragma once - +#pragma once + #include <common/types.h> -#include <Common/Exception.h> - -#include <vector> -#include <memory> -#include <cstdint> -#include <functional> - -/** Generic interface for ZooKeeper-like services. - * Possible examples are: - * - ZooKeeper client itself; - * - fake ZooKeeper client for testing; - * - ZooKeeper emulation layer on top of Etcd, FoundationDB, whatever. - */ - - -namespace Coordination -{ - -using namespace DB; - - -struct ACL -{ - static constexpr int32_t Read = 1; - static constexpr int32_t Write = 2; - static constexpr int32_t Create = 4; - static constexpr int32_t Delete = 8; - static constexpr int32_t Admin = 16; - static constexpr int32_t All = 0x1F; - - int32_t permissions; - String scheme; - String id; -}; - -using ACLs = std::vector<ACL>; - -struct Stat -{ - int64_t czxid; - int64_t mzxid; - int64_t ctime; - int64_t mtime; - int32_t version; - int32_t cversion; - int32_t aversion; - int64_t ephemeralOwner; - int32_t dataLength; - int32_t numChildren; - int64_t pzxid; -}; - -enum class Error : int32_t -{ - ZOK = 0, - - /** System and server-side errors. - * This is never thrown by the server, it shouldn't be used other than - * to indicate a range. Specifically error codes greater than this - * value, but lesser than ZAPIERROR, are system errors. - */ - ZSYSTEMERROR = -1, - - ZRUNTIMEINCONSISTENCY = -2, /// A runtime inconsistency was found - ZDATAINCONSISTENCY = -3, /// A data inconsistency was found - ZCONNECTIONLOSS = -4, /// Connection to the server has been lost - ZMARSHALLINGERROR = -5, /// Error while marshalling or unmarshalling data - ZUNIMPLEMENTED = -6, /// Operation is unimplemented - ZOPERATIONTIMEOUT = -7, /// Operation timeout - ZBADARGUMENTS = -8, /// Invalid arguments - ZINVALIDSTATE = -9, /// Invliad zhandle state - - /** API errors. - * This is never thrown by the server, it shouldn't be used other than - * to indicate a range. Specifically error codes greater than this - * value are API errors. - */ - ZAPIERROR = -100, - - ZNONODE = -101, /// Node does not exist - ZNOAUTH = -102, /// Not authenticated - ZBADVERSION = -103, /// Version conflict - ZNOCHILDRENFOREPHEMERALS = -108, /// Ephemeral nodes may not have children - ZNODEEXISTS = -110, /// The node already exists - ZNOTEMPTY = -111, /// The node has children - ZSESSIONEXPIRED = -112, /// The session has been expired by the server - ZINVALIDCALLBACK = -113, /// Invalid callback specified - ZINVALIDACL = -114, /// Invalid ACL specified - ZAUTHFAILED = -115, /// Client authentication failed - ZCLOSING = -116, /// ZooKeeper is closing - ZNOTHING = -117, /// (not error) no server responses to process - ZSESSIONMOVED = -118 /// Session moved to another server, so operation is ignored -}; - -/// Network errors and similar. You should reinitialize ZooKeeper session in case of these errors -bool isHardwareError(Error code); - -/// Valid errors sent from the server about database state (like "no node"). Logical and authentication errors (like "bad arguments") are not here. -bool isUserError(Error code); - -const char * errorMessage(Error code); - - -struct Request; -using RequestPtr = std::shared_ptr<Request>; -using Requests = std::vector<RequestPtr>; - -struct Request -{ - Request() = default; - Request(const Request &) = default; - Request & operator=(const Request &) = default; - virtual ~Request() = default; - virtual String getPath() const = 0; - virtual void addRootPath(const String & /* root_path */) {} +#include <Common/Exception.h> + +#include <vector> +#include <memory> +#include <cstdint> +#include <functional> + +/** Generic interface for ZooKeeper-like services. + * Possible examples are: + * - ZooKeeper client itself; + * - fake ZooKeeper client for testing; + * - ZooKeeper emulation layer on top of Etcd, FoundationDB, whatever. + */ + + +namespace Coordination +{ + +using namespace DB; + + +struct ACL +{ + static constexpr int32_t Read = 1; + static constexpr int32_t Write = 2; + static constexpr int32_t Create = 4; + static constexpr int32_t Delete = 8; + static constexpr int32_t Admin = 16; + static constexpr int32_t All = 0x1F; + + int32_t permissions; + String scheme; + String id; +}; + +using ACLs = std::vector<ACL>; + +struct Stat +{ + int64_t czxid; + int64_t mzxid; + int64_t ctime; + int64_t mtime; + int32_t version; + int32_t cversion; + int32_t aversion; + int64_t ephemeralOwner; + int32_t dataLength; + int32_t numChildren; + int64_t pzxid; +}; + +enum class Error : int32_t +{ + ZOK = 0, + + /** System and server-side errors. + * This is never thrown by the server, it shouldn't be used other than + * to indicate a range. Specifically error codes greater than this + * value, but lesser than ZAPIERROR, are system errors. + */ + ZSYSTEMERROR = -1, + + ZRUNTIMEINCONSISTENCY = -2, /// A runtime inconsistency was found + ZDATAINCONSISTENCY = -3, /// A data inconsistency was found + ZCONNECTIONLOSS = -4, /// Connection to the server has been lost + ZMARSHALLINGERROR = -5, /// Error while marshalling or unmarshalling data + ZUNIMPLEMENTED = -6, /// Operation is unimplemented + ZOPERATIONTIMEOUT = -7, /// Operation timeout + ZBADARGUMENTS = -8, /// Invalid arguments + ZINVALIDSTATE = -9, /// Invliad zhandle state + + /** API errors. + * This is never thrown by the server, it shouldn't be used other than + * to indicate a range. Specifically error codes greater than this + * value are API errors. + */ + ZAPIERROR = -100, + + ZNONODE = -101, /// Node does not exist + ZNOAUTH = -102, /// Not authenticated + ZBADVERSION = -103, /// Version conflict + ZNOCHILDRENFOREPHEMERALS = -108, /// Ephemeral nodes may not have children + ZNODEEXISTS = -110, /// The node already exists + ZNOTEMPTY = -111, /// The node has children + ZSESSIONEXPIRED = -112, /// The session has been expired by the server + ZINVALIDCALLBACK = -113, /// Invalid callback specified + ZINVALIDACL = -114, /// Invalid ACL specified + ZAUTHFAILED = -115, /// Client authentication failed + ZCLOSING = -116, /// ZooKeeper is closing + ZNOTHING = -117, /// (not error) no server responses to process + ZSESSIONMOVED = -118 /// Session moved to another server, so operation is ignored +}; + +/// Network errors and similar. You should reinitialize ZooKeeper session in case of these errors +bool isHardwareError(Error code); + +/// Valid errors sent from the server about database state (like "no node"). Logical and authentication errors (like "bad arguments") are not here. +bool isUserError(Error code); + +const char * errorMessage(Error code); + + +struct Request; +using RequestPtr = std::shared_ptr<Request>; +using Requests = std::vector<RequestPtr>; + +struct Request +{ + Request() = default; + Request(const Request &) = default; + Request & operator=(const Request &) = default; + virtual ~Request() = default; + virtual String getPath() const = 0; + virtual void addRootPath(const String & /* root_path */) {} virtual size_t bytesSize() const { return 0; } -}; - -struct Response; -using ResponsePtr = std::shared_ptr<Response>; -using Responses = std::vector<ResponsePtr>; -using ResponseCallback = std::function<void(const Response &)>; - -struct Response -{ - Error error = Error::ZOK; - Response() = default; - Response(const Response &) = default; - Response & operator=(const Response &) = default; - virtual ~Response() = default; - virtual void removeRootPath(const String & /* root_path */) {} +}; + +struct Response; +using ResponsePtr = std::shared_ptr<Response>; +using Responses = std::vector<ResponsePtr>; +using ResponseCallback = std::function<void(const Response &)>; + +struct Response +{ + Error error = Error::ZOK; + Response() = default; + Response(const Response &) = default; + Response & operator=(const Response &) = default; + virtual ~Response() = default; + virtual void removeRootPath(const String & /* root_path */) {} virtual size_t bytesSize() const { return 0; } -}; - -struct WatchResponse : virtual Response -{ - int32_t type = 0; - int32_t state = 0; - String path; - - void removeRootPath(const String & root_path) override; +}; + +struct WatchResponse : virtual Response +{ + int32_t type = 0; + int32_t state = 0; + String path; + + void removeRootPath(const String & root_path) override; size_t bytesSize() const override { return path.size() + sizeof(type) + sizeof(state); } -}; - -using WatchCallback = std::function<void(const WatchResponse &)>; - +}; + +using WatchCallback = std::function<void(const WatchResponse &)>; + struct SetACLRequest : virtual Request { String path; @@ -182,113 +182,113 @@ struct GetACLResponse : virtual Response size_t bytesSize() const override { return sizeof(Stat) + acl.size() * sizeof(ACL); } }; -struct CreateRequest : virtual Request -{ - String path; - String data; - bool is_ephemeral = false; - bool is_sequential = false; - ACLs acls; - - void addRootPath(const String & root_path) override; - String getPath() const override { return path; } +struct CreateRequest : virtual Request +{ + String path; + String data; + bool is_ephemeral = false; + bool is_sequential = false; + ACLs acls; + + void addRootPath(const String & root_path) override; + String getPath() const override { return path; } size_t bytesSize() const override { return path.size() + data.size() + sizeof(is_ephemeral) + sizeof(is_sequential) + acls.size() * sizeof(ACL); } -}; - -struct CreateResponse : virtual Response -{ - String path_created; - - void removeRootPath(const String & root_path) override; +}; + +struct CreateResponse : virtual Response +{ + String path_created; + + void removeRootPath(const String & root_path) override; size_t bytesSize() const override { return path_created.size(); } -}; - -struct RemoveRequest : virtual Request -{ - String path; - int32_t version = -1; - - void addRootPath(const String & root_path) override; - String getPath() const override { return path; } +}; + +struct RemoveRequest : virtual Request +{ + String path; + int32_t version = -1; + + void addRootPath(const String & root_path) override; + String getPath() const override { return path; } size_t bytesSize() const override { return path.size() + sizeof(version); } -}; - -struct RemoveResponse : virtual Response -{ -}; - -struct ExistsRequest : virtual Request -{ - String path; - - void addRootPath(const String & root_path) override; - String getPath() const override { return path; } +}; + +struct RemoveResponse : virtual Response +{ +}; + +struct ExistsRequest : virtual Request +{ + String path; + + void addRootPath(const String & root_path) override; + String getPath() const override { return path; } size_t bytesSize() const override { return path.size(); } -}; - -struct ExistsResponse : virtual Response -{ - Stat stat; +}; + +struct ExistsResponse : virtual Response +{ + Stat stat; size_t bytesSize() const override { return sizeof(Stat); } -}; - -struct GetRequest : virtual Request -{ - String path; - - void addRootPath(const String & root_path) override; - String getPath() const override { return path; } +}; + +struct GetRequest : virtual Request +{ + String path; + + void addRootPath(const String & root_path) override; + String getPath() const override { return path; } size_t bytesSize() const override { return path.size(); } -}; - -struct GetResponse : virtual Response -{ - String data; - Stat stat; +}; + +struct GetResponse : virtual Response +{ + String data; + Stat stat; size_t bytesSize() const override { return data.size() + sizeof(stat); } -}; - -struct SetRequest : virtual Request -{ - String path; - String data; - int32_t version = -1; - - void addRootPath(const String & root_path) override; - String getPath() const override { return path; } +}; + +struct SetRequest : virtual Request +{ + String path; + String data; + int32_t version = -1; + + void addRootPath(const String & root_path) override; + String getPath() const override { return path; } size_t bytesSize() const override { return data.size() + data.size() + sizeof(version); } -}; - -struct SetResponse : virtual Response -{ - Stat stat; +}; + +struct SetResponse : virtual Response +{ + Stat stat; size_t bytesSize() const override { return sizeof(stat); } -}; - -struct ListRequest : virtual Request -{ - String path; - - void addRootPath(const String & root_path) override; - String getPath() const override { return path; } +}; + +struct ListRequest : virtual Request +{ + String path; + + void addRootPath(const String & root_path) override; + String getPath() const override { return path; } size_t bytesSize() const override { return path.size(); } -}; - -struct ListResponse : virtual Response -{ - std::vector<String> names; - Stat stat; +}; + +struct ListResponse : virtual Response +{ + std::vector<String> names; + Stat stat; size_t bytesSize() const override { @@ -297,29 +297,29 @@ struct ListResponse : virtual Response size += name.size(); return size; } -}; - -struct CheckRequest : virtual Request -{ - String path; - int32_t version = -1; - - void addRootPath(const String & root_path) override; - String getPath() const override { return path; } +}; + +struct CheckRequest : virtual Request +{ + String path; + int32_t version = -1; + + void addRootPath(const String & root_path) override; + String getPath() const override { return path; } size_t bytesSize() const override { return path.size() + sizeof(version); } -}; - -struct CheckResponse : virtual Response -{ -}; - -struct MultiRequest : virtual Request -{ - Requests requests; - - void addRootPath(const String & root_path) override; - String getPath() const override { return {}; } +}; + +struct CheckResponse : virtual Response +{ +}; + +struct MultiRequest : virtual Request +{ + Requests requests; + + void addRootPath(const String & root_path) override; + String getPath() const override { return {}; } size_t bytesSize() const override { @@ -328,13 +328,13 @@ struct MultiRequest : virtual Request size += request->bytesSize(); return size; } -}; - -struct MultiResponse : virtual Response -{ - Responses responses; - - void removeRootPath(const String & root_path) override; +}; + +struct MultiResponse : virtual Response +{ + Responses responses; + + void removeRootPath(const String & root_path) override; size_t bytesSize() const override { @@ -343,145 +343,145 @@ struct MultiResponse : virtual Response size += response->bytesSize(); return size; } -}; - -/// This response may be received only as an element of responses in MultiResponse. -struct ErrorResponse : virtual Response -{ -}; - - -using CreateCallback = std::function<void(const CreateResponse &)>; -using RemoveCallback = std::function<void(const RemoveResponse &)>; -using ExistsCallback = std::function<void(const ExistsResponse &)>; -using GetCallback = std::function<void(const GetResponse &)>; -using SetCallback = std::function<void(const SetResponse &)>; -using ListCallback = std::function<void(const ListResponse &)>; -using CheckCallback = std::function<void(const CheckResponse &)>; -using MultiCallback = std::function<void(const MultiResponse &)>; - - -/// For watches. -enum State -{ - EXPIRED_SESSION = -112, - AUTH_FAILED = -113, - CONNECTING = 1, - ASSOCIATING = 2, - CONNECTED = 3, - NOTCONNECTED = 999 -}; - -enum Event -{ - CREATED = 1, - DELETED = 2, - CHANGED = 3, - CHILD = 4, - SESSION = -1, - NOTWATCHING = -2 -}; - - -class Exception : public DB::Exception -{ -private: - /// Delegate constructor, used to minimize repetition; last parameter used for overload resolution. - Exception(const std::string & msg, const Error code_, int); - -public: - explicit Exception(const Error code_); - Exception(const std::string & msg, const Error code_); - Exception(const Error code_, const std::string & path); - Exception(const Exception & exc); - - const char * name() const throw() override { return "Coordination::Exception"; } - const char * className() const throw() override { return "Coordination::Exception"; } - Exception * clone() const override { return new Exception(*this); } - - const Error code; -}; - - -/** Usage scenario: - * - create an object and issue commands; - * - you provide callbacks for your commands; callbacks are invoked in internal thread and must be cheap: - * for example, just signal a condvar / fulfull a promise. - * - you also may provide callbacks for watches; they are also invoked in internal thread and must be cheap. - * - whenever you receive exception with ZSESSIONEXPIRED code or method isExpired returns true, - * the ZooKeeper instance is no longer usable - you may only destroy it and probably create another. - * - whenever session is expired or ZooKeeper instance is destroying, all callbacks are notified with special event. +}; + +/// This response may be received only as an element of responses in MultiResponse. +struct ErrorResponse : virtual Response +{ +}; + + +using CreateCallback = std::function<void(const CreateResponse &)>; +using RemoveCallback = std::function<void(const RemoveResponse &)>; +using ExistsCallback = std::function<void(const ExistsResponse &)>; +using GetCallback = std::function<void(const GetResponse &)>; +using SetCallback = std::function<void(const SetResponse &)>; +using ListCallback = std::function<void(const ListResponse &)>; +using CheckCallback = std::function<void(const CheckResponse &)>; +using MultiCallback = std::function<void(const MultiResponse &)>; + + +/// For watches. +enum State +{ + EXPIRED_SESSION = -112, + AUTH_FAILED = -113, + CONNECTING = 1, + ASSOCIATING = 2, + CONNECTED = 3, + NOTCONNECTED = 999 +}; + +enum Event +{ + CREATED = 1, + DELETED = 2, + CHANGED = 3, + CHILD = 4, + SESSION = -1, + NOTWATCHING = -2 +}; + + +class Exception : public DB::Exception +{ +private: + /// Delegate constructor, used to minimize repetition; last parameter used for overload resolution. + Exception(const std::string & msg, const Error code_, int); + +public: + explicit Exception(const Error code_); + Exception(const std::string & msg, const Error code_); + Exception(const Error code_, const std::string & path); + Exception(const Exception & exc); + + const char * name() const throw() override { return "Coordination::Exception"; } + const char * className() const throw() override { return "Coordination::Exception"; } + Exception * clone() const override { return new Exception(*this); } + + const Error code; +}; + + +/** Usage scenario: + * - create an object and issue commands; + * - you provide callbacks for your commands; callbacks are invoked in internal thread and must be cheap: + * for example, just signal a condvar / fulfull a promise. + * - you also may provide callbacks for watches; they are also invoked in internal thread and must be cheap. + * - whenever you receive exception with ZSESSIONEXPIRED code or method isExpired returns true, + * the ZooKeeper instance is no longer usable - you may only destroy it and probably create another. + * - whenever session is expired or ZooKeeper instance is destroying, all callbacks are notified with special event. * - data for callbacks must be alive when ZooKeeper instance is alive, so try to avoid capturing references in callbacks, it's error-prone. - */ -class IKeeper -{ -public: + */ +class IKeeper +{ +public: virtual ~IKeeper() = default; - - /// If expired, you can only destroy the object. All other methods will throw exception. - virtual bool isExpired() const = 0; - - /// Useful to check owner of ephemeral node. - virtual int64_t getSessionID() const = 0; - - /// If the method will throw an exception, callbacks won't be called. - /// - /// After the method is executed successfully, you must wait for callbacks - /// (don't destroy callback data before it will be called). + + /// If expired, you can only destroy the object. All other methods will throw exception. + virtual bool isExpired() const = 0; + + /// Useful to check owner of ephemeral node. + virtual int64_t getSessionID() const = 0; + + /// If the method will throw an exception, callbacks won't be called. + /// + /// After the method is executed successfully, you must wait for callbacks + /// (don't destroy callback data before it will be called). /// TODO: The above line is the description of an error-prone interface. It's better /// to replace callbacks with std::future results, so the caller shouldn't think about /// lifetime of the callback data. - /// - /// All callbacks are executed sequentially (the execution of callbacks is serialized). - /// - /// If an exception is thrown inside the callback, the session will expire, - /// and all other callbacks will be called with "Session expired" error. - - virtual void create( - const String & path, - const String & data, - bool is_ephemeral, - bool is_sequential, - const ACLs & acls, - CreateCallback callback) = 0; - - virtual void remove( - const String & path, - int32_t version, - RemoveCallback callback) = 0; - - virtual void exists( - const String & path, - ExistsCallback callback, - WatchCallback watch) = 0; - - virtual void get( - const String & path, - GetCallback callback, - WatchCallback watch) = 0; - - virtual void set( - const String & path, - const String & data, - int32_t version, - SetCallback callback) = 0; - - virtual void list( - const String & path, - ListCallback callback, - WatchCallback watch) = 0; - - virtual void check( - const String & path, - int32_t version, - CheckCallback callback) = 0; - - virtual void multi( - const Requests & requests, - MultiCallback callback) = 0; + /// + /// All callbacks are executed sequentially (the execution of callbacks is serialized). + /// + /// If an exception is thrown inside the callback, the session will expire, + /// and all other callbacks will be called with "Session expired" error. + + virtual void create( + const String & path, + const String & data, + bool is_ephemeral, + bool is_sequential, + const ACLs & acls, + CreateCallback callback) = 0; + + virtual void remove( + const String & path, + int32_t version, + RemoveCallback callback) = 0; + + virtual void exists( + const String & path, + ExistsCallback callback, + WatchCallback watch) = 0; + + virtual void get( + const String & path, + GetCallback callback, + WatchCallback watch) = 0; + + virtual void set( + const String & path, + const String & data, + int32_t version, + SetCallback callback) = 0; + + virtual void list( + const String & path, + ListCallback callback, + WatchCallback watch) = 0; + + virtual void check( + const String & path, + int32_t version, + CheckCallback callback) = 0; + + virtual void multi( + const Requests & requests, + MultiCallback callback) = 0; /// Expire session and finish all pending requests virtual void finalize() = 0; -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h index cae14b7e96..33be285306 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h @@ -1,37 +1,37 @@ -#pragma once - -#include <common/types.h> -#include <future> -#include <memory> -#include <vector> -#include <Common/ZooKeeper/IKeeper.h> -#include <Poco/Event.h> - - -namespace zkutil -{ - -using Strings = std::vector<std::string>; - - -namespace CreateMode -{ - extern const int Persistent; - extern const int Ephemeral; - extern const int EphemeralSequential; - extern const int PersistentSequential; -} - -using EventPtr = std::shared_ptr<Poco::Event>; - -/// Gets multiple asynchronous results -/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse -template <typename R> -using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>; - -Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode); -Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version); -Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version); -Coordination::RequestPtr makeCheckRequest(const std::string & path, int version); - -} +#pragma once + +#include <common/types.h> +#include <future> +#include <memory> +#include <vector> +#include <Common/ZooKeeper/IKeeper.h> +#include <Poco/Event.h> + + +namespace zkutil +{ + +using Strings = std::vector<std::string>; + + +namespace CreateMode +{ + extern const int Persistent; + extern const int Ephemeral; + extern const int EphemeralSequential; + extern const int PersistentSequential; +} + +using EventPtr = std::shared_ptr<Poco::Event>; + +/// Gets multiple asynchronous results +/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse +template <typename R> +using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>; + +Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode); +Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version); +Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version); +Coordination::RequestPtr makeCheckRequest(const std::string & path, int version); + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp index b1042ef23e..5ed82231b2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp @@ -1,45 +1,45 @@ -#include <Common/createHardLink.h> -#include <Common/Exception.h> -#include <errno.h> -#include <unistd.h> -#include <sys/stat.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_STAT; - extern const int CANNOT_LINK; -} - -void createHardLink(const String & source_path, const String & destination_path) -{ - if (0 != link(source_path.c_str(), destination_path.c_str())) - { - if (errno == EEXIST) - { - auto link_errno = errno; - - struct stat source_descr; - struct stat destination_descr; - - if (0 != lstat(source_path.c_str(), &source_descr)) - throwFromErrnoWithPath("Cannot stat " + source_path, source_path, ErrorCodes::CANNOT_STAT); - - if (0 != lstat(destination_path.c_str(), &destination_descr)) - throwFromErrnoWithPath("Cannot stat " + destination_path, destination_path, ErrorCodes::CANNOT_STAT); - - if (source_descr.st_ino != destination_descr.st_ino) - throwFromErrnoWithPath( - "Destination file " + destination_path + " is already exist and have different inode.", - destination_path, ErrorCodes::CANNOT_LINK, link_errno); - } - else - throwFromErrnoWithPath("Cannot link " + source_path + " to " + destination_path, destination_path, - ErrorCodes::CANNOT_LINK); - } -} - -} +#include <Common/createHardLink.h> +#include <Common/Exception.h> +#include <errno.h> +#include <unistd.h> +#include <sys/stat.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_STAT; + extern const int CANNOT_LINK; +} + +void createHardLink(const String & source_path, const String & destination_path) +{ + if (0 != link(source_path.c_str(), destination_path.c_str())) + { + if (errno == EEXIST) + { + auto link_errno = errno; + + struct stat source_descr; + struct stat destination_descr; + + if (0 != lstat(source_path.c_str(), &source_descr)) + throwFromErrnoWithPath("Cannot stat " + source_path, source_path, ErrorCodes::CANNOT_STAT); + + if (0 != lstat(destination_path.c_str(), &destination_descr)) + throwFromErrnoWithPath("Cannot stat " + destination_path, destination_path, ErrorCodes::CANNOT_STAT); + + if (source_descr.st_ino != destination_descr.st_ino) + throwFromErrnoWithPath( + "Destination file " + destination_path + " is already exist and have different inode.", + destination_path, ErrorCodes::CANNOT_LINK, link_errno); + } + else + throwFromErrnoWithPath("Cannot link " + source_path + " to " + destination_path, destination_path, + ErrorCodes::CANNOT_LINK); + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h index f4b4d6204c..c2b01cf817 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h @@ -1,12 +1,12 @@ -#pragma once - +#pragma once + #include <common/types.h> - -namespace DB -{ - -/// Create a hard link `destination_path` pointing to `source_path`. -/// If the destination already exists, check that it has the same inode (and throw if they are different). -void createHardLink(const String & source_path, const String & destination_path); - -} + +namespace DB +{ + +/// Create a hard link `destination_path` pointing to `source_path`. +/// If the destination already exists, check that it has the same inode (and throw if they are different). +void createHardLink(const String & source_path, const String & destination_path); + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp index 9d897d043d..0fbebe21de 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp @@ -1,161 +1,161 @@ -#include <Common/formatIPv6.h> -#include <Common/hex.h> - +#include <Common/formatIPv6.h> +#include <Common/hex.h> + #include <common/range.h> -#include <array> -#include <algorithm> - -namespace DB -{ - -// To be used in formatIPv4, maps a byte to it's string form prefixed with length (so save strlen call). -extern const char one_byte_to_string_lookup_table[256][4] = -{ - {1, '0'}, {1, '1'}, {1, '2'}, {1, '3'}, {1, '4'}, {1, '5'}, {1, '6'}, {1, '7'}, {1, '8'}, {1, '9'}, - {2, '1', '0'}, {2, '1', '1'}, {2, '1', '2'}, {2, '1', '3'}, {2, '1', '4'}, {2, '1', '5'}, {2, '1', '6'}, {2, '1', '7'}, {2, '1', '8'}, {2, '1', '9'}, - {2, '2', '0'}, {2, '2', '1'}, {2, '2', '2'}, {2, '2', '3'}, {2, '2', '4'}, {2, '2', '5'}, {2, '2', '6'}, {2, '2', '7'}, {2, '2', '8'}, {2, '2', '9'}, - {2, '3', '0'}, {2, '3', '1'}, {2, '3', '2'}, {2, '3', '3'}, {2, '3', '4'}, {2, '3', '5'}, {2, '3', '6'}, {2, '3', '7'}, {2, '3', '8'}, {2, '3', '9'}, - {2, '4', '0'}, {2, '4', '1'}, {2, '4', '2'}, {2, '4', '3'}, {2, '4', '4'}, {2, '4', '5'}, {2, '4', '6'}, {2, '4', '7'}, {2, '4', '8'}, {2, '4', '9'}, - {2, '5', '0'}, {2, '5', '1'}, {2, '5', '2'}, {2, '5', '3'}, {2, '5', '4'}, {2, '5', '5'}, {2, '5', '6'}, {2, '5', '7'}, {2, '5', '8'}, {2, '5', '9'}, - {2, '6', '0'}, {2, '6', '1'}, {2, '6', '2'}, {2, '6', '3'}, {2, '6', '4'}, {2, '6', '5'}, {2, '6', '6'}, {2, '6', '7'}, {2, '6', '8'}, {2, '6', '9'}, - {2, '7', '0'}, {2, '7', '1'}, {2, '7', '2'}, {2, '7', '3'}, {2, '7', '4'}, {2, '7', '5'}, {2, '7', '6'}, {2, '7', '7'}, {2, '7', '8'}, {2, '7', '9'}, - {2, '8', '0'}, {2, '8', '1'}, {2, '8', '2'}, {2, '8', '3'}, {2, '8', '4'}, {2, '8', '5'}, {2, '8', '6'}, {2, '8', '7'}, {2, '8', '8'}, {2, '8', '9'}, - {2, '9', '0'}, {2, '9', '1'}, {2, '9', '2'}, {2, '9', '3'}, {2, '9', '4'}, {2, '9', '5'}, {2, '9', '6'}, {2, '9', '7'}, {2, '9', '8'}, {2, '9', '9'}, - {3, '1', '0', '0'}, {3, '1', '0', '1'}, {3, '1', '0', '2'}, {3, '1', '0', '3'}, {3, '1', '0', '4'}, {3, '1', '0', '5'}, {3, '1', '0', '6'}, {3, '1', '0', '7'}, {3, '1', '0', '8'}, {3, '1', '0', '9'}, - {3, '1', '1', '0'}, {3, '1', '1', '1'}, {3, '1', '1', '2'}, {3, '1', '1', '3'}, {3, '1', '1', '4'}, {3, '1', '1', '5'}, {3, '1', '1', '6'}, {3, '1', '1', '7'}, {3, '1', '1', '8'}, {3, '1', '1', '9'}, - {3, '1', '2', '0'}, {3, '1', '2', '1'}, {3, '1', '2', '2'}, {3, '1', '2', '3'}, {3, '1', '2', '4'}, {3, '1', '2', '5'}, {3, '1', '2', '6'}, {3, '1', '2', '7'}, {3, '1', '2', '8'}, {3, '1', '2', '9'}, - {3, '1', '3', '0'}, {3, '1', '3', '1'}, {3, '1', '3', '2'}, {3, '1', '3', '3'}, {3, '1', '3', '4'}, {3, '1', '3', '5'}, {3, '1', '3', '6'}, {3, '1', '3', '7'}, {3, '1', '3', '8'}, {3, '1', '3', '9'}, - {3, '1', '4', '0'}, {3, '1', '4', '1'}, {3, '1', '4', '2'}, {3, '1', '4', '3'}, {3, '1', '4', '4'}, {3, '1', '4', '5'}, {3, '1', '4', '6'}, {3, '1', '4', '7'}, {3, '1', '4', '8'}, {3, '1', '4', '9'}, - {3, '1', '5', '0'}, {3, '1', '5', '1'}, {3, '1', '5', '2'}, {3, '1', '5', '3'}, {3, '1', '5', '4'}, {3, '1', '5', '5'}, {3, '1', '5', '6'}, {3, '1', '5', '7'}, {3, '1', '5', '8'}, {3, '1', '5', '9'}, - {3, '1', '6', '0'}, {3, '1', '6', '1'}, {3, '1', '6', '2'}, {3, '1', '6', '3'}, {3, '1', '6', '4'}, {3, '1', '6', '5'}, {3, '1', '6', '6'}, {3, '1', '6', '7'}, {3, '1', '6', '8'}, {3, '1', '6', '9'}, - {3, '1', '7', '0'}, {3, '1', '7', '1'}, {3, '1', '7', '2'}, {3, '1', '7', '3'}, {3, '1', '7', '4'}, {3, '1', '7', '5'}, {3, '1', '7', '6'}, {3, '1', '7', '7'}, {3, '1', '7', '8'}, {3, '1', '7', '9'}, - {3, '1', '8', '0'}, {3, '1', '8', '1'}, {3, '1', '8', '2'}, {3, '1', '8', '3'}, {3, '1', '8', '4'}, {3, '1', '8', '5'}, {3, '1', '8', '6'}, {3, '1', '8', '7'}, {3, '1', '8', '8'}, {3, '1', '8', '9'}, - {3, '1', '9', '0'}, {3, '1', '9', '1'}, {3, '1', '9', '2'}, {3, '1', '9', '3'}, {3, '1', '9', '4'}, {3, '1', '9', '5'}, {3, '1', '9', '6'}, {3, '1', '9', '7'}, {3, '1', '9', '8'}, {3, '1', '9', '9'}, - {3, '2', '0', '0'}, {3, '2', '0', '1'}, {3, '2', '0', '2'}, {3, '2', '0', '3'}, {3, '2', '0', '4'}, {3, '2', '0', '5'}, {3, '2', '0', '6'}, {3, '2', '0', '7'}, {3, '2', '0', '8'}, {3, '2', '0', '9'}, - {3, '2', '1', '0'}, {3, '2', '1', '1'}, {3, '2', '1', '2'}, {3, '2', '1', '3'}, {3, '2', '1', '4'}, {3, '2', '1', '5'}, {3, '2', '1', '6'}, {3, '2', '1', '7'}, {3, '2', '1', '8'}, {3, '2', '1', '9'}, - {3, '2', '2', '0'}, {3, '2', '2', '1'}, {3, '2', '2', '2'}, {3, '2', '2', '3'}, {3, '2', '2', '4'}, {3, '2', '2', '5'}, {3, '2', '2', '6'}, {3, '2', '2', '7'}, {3, '2', '2', '8'}, {3, '2', '2', '9'}, - {3, '2', '3', '0'}, {3, '2', '3', '1'}, {3, '2', '3', '2'}, {3, '2', '3', '3'}, {3, '2', '3', '4'}, {3, '2', '3', '5'}, {3, '2', '3', '6'}, {3, '2', '3', '7'}, {3, '2', '3', '8'}, {3, '2', '3', '9'}, - {3, '2', '4', '0'}, {3, '2', '4', '1'}, {3, '2', '4', '2'}, {3, '2', '4', '3'}, {3, '2', '4', '4'}, {3, '2', '4', '5'}, {3, '2', '4', '6'}, {3, '2', '4', '7'}, {3, '2', '4', '8'}, {3, '2', '4', '9'}, - {3, '2', '5', '0'}, {3, '2', '5', '1'}, {3, '2', '5', '2'}, {3, '2', '5', '3'}, {3, '2', '5', '4'}, {3, '2', '5', '5'}, -}; - -/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) -static constexpr UInt32 intLog(const UInt32 value, const UInt32 base, const bool carry) -{ - return value >= base ? 1 + intLog(value / base, base, value % base || carry) : value % base > 1 || carry; -} - -/// Print integer in desired base, faster than sprintf. -/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark -/// But it doesn't matter here. -template <UInt32 base, typename T> -static void printInteger(char *& out, T value) -{ - if (value == 0) - *out++ = '0'; - else - { - constexpr size_t buffer_size = sizeof(T) * intLog(256, base, false); - - char buf[buffer_size]; - auto ptr = buf; - - while (value > 0) - { - *ptr = hexDigitLowercase(value % base); - ++ptr; - value /= base; - } - - /// Copy to out reversed. - while (ptr != buf) - { - --ptr; - *out = *ptr; - ++out; - } - } -} - -void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count) -{ - struct { int base, len; } best{-1, 0}, cur{-1, 0}; - std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words{}; - - /** Preprocess: - * Copy the input (bytewise) array into a wordwise array. - * Find the longest run of 0x00's in src[] for :: shorthanding. */ +#include <array> +#include <algorithm> + +namespace DB +{ + +// To be used in formatIPv4, maps a byte to it's string form prefixed with length (so save strlen call). +extern const char one_byte_to_string_lookup_table[256][4] = +{ + {1, '0'}, {1, '1'}, {1, '2'}, {1, '3'}, {1, '4'}, {1, '5'}, {1, '6'}, {1, '7'}, {1, '8'}, {1, '9'}, + {2, '1', '0'}, {2, '1', '1'}, {2, '1', '2'}, {2, '1', '3'}, {2, '1', '4'}, {2, '1', '5'}, {2, '1', '6'}, {2, '1', '7'}, {2, '1', '8'}, {2, '1', '9'}, + {2, '2', '0'}, {2, '2', '1'}, {2, '2', '2'}, {2, '2', '3'}, {2, '2', '4'}, {2, '2', '5'}, {2, '2', '6'}, {2, '2', '7'}, {2, '2', '8'}, {2, '2', '9'}, + {2, '3', '0'}, {2, '3', '1'}, {2, '3', '2'}, {2, '3', '3'}, {2, '3', '4'}, {2, '3', '5'}, {2, '3', '6'}, {2, '3', '7'}, {2, '3', '8'}, {2, '3', '9'}, + {2, '4', '0'}, {2, '4', '1'}, {2, '4', '2'}, {2, '4', '3'}, {2, '4', '4'}, {2, '4', '5'}, {2, '4', '6'}, {2, '4', '7'}, {2, '4', '8'}, {2, '4', '9'}, + {2, '5', '0'}, {2, '5', '1'}, {2, '5', '2'}, {2, '5', '3'}, {2, '5', '4'}, {2, '5', '5'}, {2, '5', '6'}, {2, '5', '7'}, {2, '5', '8'}, {2, '5', '9'}, + {2, '6', '0'}, {2, '6', '1'}, {2, '6', '2'}, {2, '6', '3'}, {2, '6', '4'}, {2, '6', '5'}, {2, '6', '6'}, {2, '6', '7'}, {2, '6', '8'}, {2, '6', '9'}, + {2, '7', '0'}, {2, '7', '1'}, {2, '7', '2'}, {2, '7', '3'}, {2, '7', '4'}, {2, '7', '5'}, {2, '7', '6'}, {2, '7', '7'}, {2, '7', '8'}, {2, '7', '9'}, + {2, '8', '0'}, {2, '8', '1'}, {2, '8', '2'}, {2, '8', '3'}, {2, '8', '4'}, {2, '8', '5'}, {2, '8', '6'}, {2, '8', '7'}, {2, '8', '8'}, {2, '8', '9'}, + {2, '9', '0'}, {2, '9', '1'}, {2, '9', '2'}, {2, '9', '3'}, {2, '9', '4'}, {2, '9', '5'}, {2, '9', '6'}, {2, '9', '7'}, {2, '9', '8'}, {2, '9', '9'}, + {3, '1', '0', '0'}, {3, '1', '0', '1'}, {3, '1', '0', '2'}, {3, '1', '0', '3'}, {3, '1', '0', '4'}, {3, '1', '0', '5'}, {3, '1', '0', '6'}, {3, '1', '0', '7'}, {3, '1', '0', '8'}, {3, '1', '0', '9'}, + {3, '1', '1', '0'}, {3, '1', '1', '1'}, {3, '1', '1', '2'}, {3, '1', '1', '3'}, {3, '1', '1', '4'}, {3, '1', '1', '5'}, {3, '1', '1', '6'}, {3, '1', '1', '7'}, {3, '1', '1', '8'}, {3, '1', '1', '9'}, + {3, '1', '2', '0'}, {3, '1', '2', '1'}, {3, '1', '2', '2'}, {3, '1', '2', '3'}, {3, '1', '2', '4'}, {3, '1', '2', '5'}, {3, '1', '2', '6'}, {3, '1', '2', '7'}, {3, '1', '2', '8'}, {3, '1', '2', '9'}, + {3, '1', '3', '0'}, {3, '1', '3', '1'}, {3, '1', '3', '2'}, {3, '1', '3', '3'}, {3, '1', '3', '4'}, {3, '1', '3', '5'}, {3, '1', '3', '6'}, {3, '1', '3', '7'}, {3, '1', '3', '8'}, {3, '1', '3', '9'}, + {3, '1', '4', '0'}, {3, '1', '4', '1'}, {3, '1', '4', '2'}, {3, '1', '4', '3'}, {3, '1', '4', '4'}, {3, '1', '4', '5'}, {3, '1', '4', '6'}, {3, '1', '4', '7'}, {3, '1', '4', '8'}, {3, '1', '4', '9'}, + {3, '1', '5', '0'}, {3, '1', '5', '1'}, {3, '1', '5', '2'}, {3, '1', '5', '3'}, {3, '1', '5', '4'}, {3, '1', '5', '5'}, {3, '1', '5', '6'}, {3, '1', '5', '7'}, {3, '1', '5', '8'}, {3, '1', '5', '9'}, + {3, '1', '6', '0'}, {3, '1', '6', '1'}, {3, '1', '6', '2'}, {3, '1', '6', '3'}, {3, '1', '6', '4'}, {3, '1', '6', '5'}, {3, '1', '6', '6'}, {3, '1', '6', '7'}, {3, '1', '6', '8'}, {3, '1', '6', '9'}, + {3, '1', '7', '0'}, {3, '1', '7', '1'}, {3, '1', '7', '2'}, {3, '1', '7', '3'}, {3, '1', '7', '4'}, {3, '1', '7', '5'}, {3, '1', '7', '6'}, {3, '1', '7', '7'}, {3, '1', '7', '8'}, {3, '1', '7', '9'}, + {3, '1', '8', '0'}, {3, '1', '8', '1'}, {3, '1', '8', '2'}, {3, '1', '8', '3'}, {3, '1', '8', '4'}, {3, '1', '8', '5'}, {3, '1', '8', '6'}, {3, '1', '8', '7'}, {3, '1', '8', '8'}, {3, '1', '8', '9'}, + {3, '1', '9', '0'}, {3, '1', '9', '1'}, {3, '1', '9', '2'}, {3, '1', '9', '3'}, {3, '1', '9', '4'}, {3, '1', '9', '5'}, {3, '1', '9', '6'}, {3, '1', '9', '7'}, {3, '1', '9', '8'}, {3, '1', '9', '9'}, + {3, '2', '0', '0'}, {3, '2', '0', '1'}, {3, '2', '0', '2'}, {3, '2', '0', '3'}, {3, '2', '0', '4'}, {3, '2', '0', '5'}, {3, '2', '0', '6'}, {3, '2', '0', '7'}, {3, '2', '0', '8'}, {3, '2', '0', '9'}, + {3, '2', '1', '0'}, {3, '2', '1', '1'}, {3, '2', '1', '2'}, {3, '2', '1', '3'}, {3, '2', '1', '4'}, {3, '2', '1', '5'}, {3, '2', '1', '6'}, {3, '2', '1', '7'}, {3, '2', '1', '8'}, {3, '2', '1', '9'}, + {3, '2', '2', '0'}, {3, '2', '2', '1'}, {3, '2', '2', '2'}, {3, '2', '2', '3'}, {3, '2', '2', '4'}, {3, '2', '2', '5'}, {3, '2', '2', '6'}, {3, '2', '2', '7'}, {3, '2', '2', '8'}, {3, '2', '2', '9'}, + {3, '2', '3', '0'}, {3, '2', '3', '1'}, {3, '2', '3', '2'}, {3, '2', '3', '3'}, {3, '2', '3', '4'}, {3, '2', '3', '5'}, {3, '2', '3', '6'}, {3, '2', '3', '7'}, {3, '2', '3', '8'}, {3, '2', '3', '9'}, + {3, '2', '4', '0'}, {3, '2', '4', '1'}, {3, '2', '4', '2'}, {3, '2', '4', '3'}, {3, '2', '4', '4'}, {3, '2', '4', '5'}, {3, '2', '4', '6'}, {3, '2', '4', '7'}, {3, '2', '4', '8'}, {3, '2', '4', '9'}, + {3, '2', '5', '0'}, {3, '2', '5', '1'}, {3, '2', '5', '2'}, {3, '2', '5', '3'}, {3, '2', '5', '4'}, {3, '2', '5', '5'}, +}; + +/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) +static constexpr UInt32 intLog(const UInt32 value, const UInt32 base, const bool carry) +{ + return value >= base ? 1 + intLog(value / base, base, value % base || carry) : value % base > 1 || carry; +} + +/// Print integer in desired base, faster than sprintf. +/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark +/// But it doesn't matter here. +template <UInt32 base, typename T> +static void printInteger(char *& out, T value) +{ + if (value == 0) + *out++ = '0'; + else + { + constexpr size_t buffer_size = sizeof(T) * intLog(256, base, false); + + char buf[buffer_size]; + auto ptr = buf; + + while (value > 0) + { + *ptr = hexDigitLowercase(value % base); + ++ptr; + value /= base; + } + + /// Copy to out reversed. + while (ptr != buf) + { + --ptr; + *out = *ptr; + ++out; + } + } +} + +void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count) +{ + struct { int base, len; } best{-1, 0}, cur{-1, 0}; + std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words{}; + + /** Preprocess: + * Copy the input (bytewise) array into a wordwise array. + * Find the longest run of 0x00's in src[] for :: shorthanding. */ for (const auto i : collections::range(0, IPV6_BINARY_LENGTH - zeroed_tail_bytes_count)) - words[i / 2] |= src[i] << ((1 - (i % 2)) << 3); - + words[i / 2] |= src[i] << ((1 - (i % 2)) << 3); + for (const auto i : collections::range(0, words.size())) - { - if (words[i] == 0) - { - if (cur.base == -1) - { - cur.base = i; - cur.len = 1; - } - else - cur.len++; - } - else - { - if (cur.base != -1) - { - if (best.base == -1 || cur.len > best.len) - best = cur; - cur.base = -1; - } - } - } - - if (cur.base != -1) - { - if (best.base == -1 || cur.len > best.len) - best = cur; - } - - if (best.base != -1 && best.len < 2) - best.base = -1; - - /// Format the result. + { + if (words[i] == 0) + { + if (cur.base == -1) + { + cur.base = i; + cur.len = 1; + } + else + cur.len++; + } + else + { + if (cur.base != -1) + { + if (best.base == -1 || cur.len > best.len) + best = cur; + cur.base = -1; + } + } + } + + if (cur.base != -1) + { + if (best.base == -1 || cur.len > best.len) + best = cur; + } + + if (best.base != -1 && best.len < 2) + best.base = -1; + + /// Format the result. for (const int i : collections::range(0, words.size())) - { - /// Are we inside the best run of 0x00's? - if (best.base != -1 && i >= best.base && i < (best.base + best.len)) - { - if (i == best.base) - *dst++ = ':'; - continue; - } - - /// Are we following an initial run of 0x00s or any real hex? - if (i != 0) - *dst++ = ':'; - - /// Is this address an encapsulated IPv4? - if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) - { - uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0}; - memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH); - // Due to historical reasons formatIPv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format. - std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer)); - - formatIPv4(ipv4_buffer, dst, std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), "0"); - // formatIPv4 has already added a null-terminator for us. - return; - } - - printInteger<16>(dst, words[i]); - } - - /// Was it a trailing run of 0x00's? - if (best.base != -1 && size_t(best.base) + size_t(best.len) == words.size()) - *dst++ = ':'; - - *dst++ = '\0'; -} - -} + { + /// Are we inside the best run of 0x00's? + if (best.base != -1 && i >= best.base && i < (best.base + best.len)) + { + if (i == best.base) + *dst++ = ':'; + continue; + } + + /// Are we following an initial run of 0x00s or any real hex? + if (i != 0) + *dst++ = ':'; + + /// Is this address an encapsulated IPv4? + if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) + { + uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0}; + memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH); + // Due to historical reasons formatIPv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format. + std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer)); + + formatIPv4(ipv4_buffer, dst, std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), "0"); + // formatIPv4 has already added a null-terminator for us. + return; + } + + printInteger<16>(dst, words[i]); + } + + /// Was it a trailing run of 0x00's? + if (best.base != -1 && size_t(best.base) + size_t(best.len) == words.size()) + *dst++ = ':'; + + *dst++ = '\0'; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h index 8c6d3dc207..bb8acd6d17 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h @@ -1,227 +1,227 @@ -#pragma once - -#include <common/types.h> -#include <string.h> -#include <algorithm> -#include <utility> +#pragma once + +#include <common/types.h> +#include <string.h> +#include <algorithm> +#include <utility> #include <common/range.h> -#include <Common/hex.h> -#include <Common/StringUtils/StringUtils.h> - -constexpr size_t IPV4_BINARY_LENGTH = 4; -constexpr size_t IPV6_BINARY_LENGTH = 16; -constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. -constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; - -namespace DB -{ - - -/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c - * performs significantly faster than the reference implementation due to the absence of sprintf calls, - * bounds checking, unnecessary string copying and length calculation. - */ -void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count = 0); - -/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. - * +#include <Common/hex.h> +#include <Common/StringUtils/StringUtils.h> + +constexpr size_t IPV4_BINARY_LENGTH = 4; +constexpr size_t IPV6_BINARY_LENGTH = 16; +constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. +constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; + +namespace DB +{ + + +/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + * performs significantly faster than the reference implementation due to the absence of sprintf calls, + * bounds checking, unnecessary string copying and length calculation. + */ +void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count = 0); + +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, - * which should be long enough. - * That is "127.0.0.1" becomes 0x7f000001. - * - * In case of failure returns false and doesn't modify buffer pointed by `dst`. - * - * @param src - input string, expected to be non-null and null-terminated right after the IPv4 string value. + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure returns false and doesn't modify buffer pointed by `dst`. + * + * @param src - input string, expected to be non-null and null-terminated right after the IPv4 string value. * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. - * @return false if parsing failed, true otherwise. - */ -inline bool parseIPv4(const char * src, unsigned char * dst) -{ - UInt32 result = 0; - for (int offset = 24; offset >= 0; offset -= 8) - { - UInt32 value = 0; - size_t len = 0; - while (isNumericASCII(*src) && len <= 3) - { - value = value * 10 + (*src - '0'); - ++len; - ++src; - } - if (len == 0 || value > 255 || (offset > 0 && *src != '.')) - return false; - result |= value << offset; - ++src; - } - if (*(src - 1) != '\0') - return false; - - memcpy(dst, &result, sizeof(result)); - return true; -} - -/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. -* -* Slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + * @return false if parsing failed, true otherwise. + */ +inline bool parseIPv4(const char * src, unsigned char * dst) +{ + UInt32 result = 0; + for (int offset = 24; offset >= 0; offset -= 8) + { + UInt32 value = 0; + size_t len = 0; + while (isNumericASCII(*src) && len <= 3) + { + value = value * 10 + (*src - '0'); + ++len; + ++src; + } + if (len == 0 || value > 255 || (offset > 0 && *src != '.')) + return false; + result |= value << offset; + ++src; + } + if (*(src - 1) != '\0') + return false; + + memcpy(dst, &result, sizeof(result)); + return true; +} + +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. +* +* Slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c * Parses the input string `src` and stores binary big-endian value into buffer pointed by `dst`, -* which should be long enough. In case of failure zeroes -* IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. -* -* @param src - input string, expected to be non-null and null-terminated right after the IPv6 string value. +* which should be long enough. In case of failure zeroes +* IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. +* +* @param src - input string, expected to be non-null and null-terminated right after the IPv6 string value. * @param dst - where to put output bytes, expected to be non-null and at IPV6_BINARY_LENGTH-long. -* @return false if parsing failed, true otherwise. -*/ -inline bool parseIPv6(const char * src, unsigned char * dst) -{ - const auto clear_dst = [dst]() - { - memset(dst, '\0', IPV6_BINARY_LENGTH); - return false; - }; - - /// Leading :: requires some special handling. - if (*src == ':') - if (*++src != ':') - return clear_dst(); - - unsigned char tmp[IPV6_BINARY_LENGTH]{}; +* @return false if parsing failed, true otherwise. +*/ +inline bool parseIPv6(const char * src, unsigned char * dst) +{ + const auto clear_dst = [dst]() + { + memset(dst, '\0', IPV6_BINARY_LENGTH); + return false; + }; + + /// Leading :: requires some special handling. + if (*src == ':') + if (*++src != ':') + return clear_dst(); + + unsigned char tmp[IPV6_BINARY_LENGTH]{}; unsigned char * tp = tmp; unsigned char * endp = tp + IPV6_BINARY_LENGTH; const char * curtok = src; bool saw_xdigit = false; - UInt32 val{}; - unsigned char * colonp = nullptr; - - /// Assuming zero-terminated string. + UInt32 val{}; + unsigned char * colonp = nullptr; + + /// Assuming zero-terminated string. while (char ch = *src++) - { + { UInt8 num = unhex(ch); - + if (num != 0xFF) - { - val <<= 4; - val |= num; - if (val > 0xffffu) - return clear_dst(); - + { + val <<= 4; + val |= num; + if (val > 0xffffu) + return clear_dst(); + saw_xdigit = true; - continue; - } - - if (ch == ':') - { - curtok = src; - if (!saw_xdigit) - { - if (colonp) - return clear_dst(); - - colonp = tp; - continue; - } - - if (tp + sizeof(UInt16) > endp) - return clear_dst(); - - *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu); - *tp++ = static_cast<unsigned char>(val & 0xffu); - saw_xdigit = false; - val = 0; - continue; - } - - if (ch == '.' && (tp + IPV4_BINARY_LENGTH) <= endp) - { - if (!parseIPv4(curtok, tp)) - return clear_dst(); - std::reverse(tp, tp + IPV4_BINARY_LENGTH); - - tp += IPV4_BINARY_LENGTH; - saw_xdigit = false; - break; /* '\0' was seen by ipv4_scan(). */ - } - - return clear_dst(); - } - - if (saw_xdigit) - { - if (tp + sizeof(UInt16) > endp) - return clear_dst(); - - *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu); - *tp++ = static_cast<unsigned char>(val & 0xffu); - } - - if (colonp) - { - /* - * Since some memmove()'s erroneously fail to handle - * overlapping regions, we'll do the shift by hand. - */ - const auto n = tp - colonp; - - for (int i = 1; i <= n; ++i) - { - endp[- i] = colonp[n - i]; - colonp[n - i] = 0; - } - tp = endp; - } - - if (tp != endp) - return clear_dst(); - - memcpy(dst, tmp, sizeof(tmp)); - return true; -} - -/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd', + continue; + } + + if (ch == ':') + { + curtok = src; + if (!saw_xdigit) + { + if (colonp) + return clear_dst(); + + colonp = tp; + continue; + } + + if (tp + sizeof(UInt16) > endp) + return clear_dst(); + + *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu); + *tp++ = static_cast<unsigned char>(val & 0xffu); + saw_xdigit = false; + val = 0; + continue; + } + + if (ch == '.' && (tp + IPV4_BINARY_LENGTH) <= endp) + { + if (!parseIPv4(curtok, tp)) + return clear_dst(); + std::reverse(tp, tp + IPV4_BINARY_LENGTH); + + tp += IPV4_BINARY_LENGTH; + saw_xdigit = false; + break; /* '\0' was seen by ipv4_scan(). */ + } + + return clear_dst(); + } + + if (saw_xdigit) + { + if (tp + sizeof(UInt16) > endp) + return clear_dst(); + + *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu); + *tp++ = static_cast<unsigned char>(val & 0xffu); + } + + if (colonp) + { + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const auto n = tp - colonp; + + for (int i = 1; i <= n; ++i) + { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + + if (tp != endp) + return clear_dst(); + + memcpy(dst, tmp, sizeof(tmp)); + return true; +} + +/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd', * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1". - * - * Any number of the tail bytes can be masked with given mask string. - * - * Assumptions: - * src is IPV4_BINARY_LENGTH long, - * dst is IPV4_MAX_TEXT_LENGTH long, - * mask_tail_octets <= IPV4_BINARY_LENGTH - * mask_string is NON-NULL, if mask_tail_octets > 0. - * - * Examples: - * formatIPv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); - * > dst == "127.0.0.1" - * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); - * > dst == "127.0.0.xxx" - * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); - * > dst == "127.0.0.0" - */ -inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail_octets = 0, const char * mask_string = "xxx") -{ - extern const char one_byte_to_string_lookup_table[256][4]; - - const size_t mask_length = mask_string ? strlen(mask_string) : 0; - const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); - for (size_t octet = 0; octet < limit; ++octet) - { - const uint8_t value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]); + * + * Any number of the tail bytes can be masked with given mask string. + * + * Assumptions: + * src is IPV4_BINARY_LENGTH long, + * dst is IPV4_MAX_TEXT_LENGTH long, + * mask_tail_octets <= IPV4_BINARY_LENGTH + * mask_string is NON-NULL, if mask_tail_octets > 0. + * + * Examples: + * formatIPv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); + * > dst == "127.0.0.1" + * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); + * > dst == "127.0.0.xxx" + * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); + * > dst == "127.0.0.0" + */ +inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail_octets = 0, const char * mask_string = "xxx") +{ + extern const char one_byte_to_string_lookup_table[256][4]; + + const size_t mask_length = mask_string ? strlen(mask_string) : 0; + const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); + for (size_t octet = 0; octet < limit; ++octet) + { + const uint8_t value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]); const auto * rep = one_byte_to_string_lookup_table[value]; - const uint8_t len = rep[0]; - const char* str = rep + 1; - - memcpy(dst, str, len); - dst += len; - *dst++ = '.'; - } - - for (size_t mask = 0; mask < mask_tail_octets; ++mask) - { - memcpy(dst, mask_string, mask_length); - dst += mask_length; - - *dst++ = '.'; - } - - dst[-1] = '\0'; -} - -} + const uint8_t len = rep[0]; + const char* str = rep + 1; + + memcpy(dst, str, len); + dst += len; + *dst++ = '.'; + } + + for (size_t mask = 0; mask < mask_tail_octets; ++mask) + { + memcpy(dst, mask_string, mask_length); + dst += mask_length; + + *dst++ = '.'; + } + + dst[-1] = '\0'; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp index fea591369f..39798c4882 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp @@ -1,30 +1,30 @@ -#include <Common/getMultipleKeysFromConfig.h> - -#include <Poco/Util/AbstractConfiguration.h> - -namespace DB -{ -std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name) -{ - std::vector<std::string> values; - Poco::Util::AbstractConfiguration::Keys config_keys; - config.keys(root, config_keys); - for (const auto & key : config_keys) - { +#include <Common/getMultipleKeysFromConfig.h> + +#include <Poco/Util/AbstractConfiguration.h> + +namespace DB +{ +std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name) +{ + std::vector<std::string> values; + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(root, config_keys); + for (const auto & key : config_keys) + { if (key != name && !(key.starts_with(name + "[") && key.ends_with("]"))) - continue; - values.emplace_back(key); - } - return values; -} - - -std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name) -{ - std::vector<std::string> values; - for (const auto & key : DB::getMultipleKeysFromConfig(config, root, name)) - values.emplace_back(config.getString(root.empty() ? key : root + "." + key)); - return values; -} - -} + continue; + values.emplace_back(key); + } + return values; +} + + +std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name) +{ + std::vector<std::string> values; + for (const auto & key : DB::getMultipleKeysFromConfig(config, root, name)) + values.emplace_back(config.getString(root.empty() ? key : root + "." + key)); + return values; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h index c55f8f0e98..1c58af7bb4 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h @@ -1,18 +1,18 @@ -#pragma once -#include <string> -#include <vector> - -namespace Poco -{ -namespace Util -{ - class AbstractConfiguration; -} -} -namespace DB -{ -/// get all internal key names for given key -std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name); -/// Get all values for given key -std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name); -} +#pragma once +#include <string> +#include <vector> + +namespace Poco +{ +namespace Util +{ + class AbstractConfiguration; +} +} +namespace DB +{ +/// get all internal key names for given key +std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name); +/// Get all values for given key +std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name); +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp index f0de9bb752..a9db8223eb 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -1,9 +1,9 @@ -#include "getNumberOfPhysicalCPUCores.h" - -#include <thread> - -unsigned getNumberOfPhysicalCPUCores() -{ +#include "getNumberOfPhysicalCPUCores.h" + +#include <thread> + +unsigned getNumberOfPhysicalCPUCores() +{ static const unsigned number = [] { /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system. @@ -11,4 +11,4 @@ unsigned getNumberOfPhysicalCPUCores() return std::thread::hardware_concurrency(); }(); return number; -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h index bfb3d5ee8d..827e95e1be 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h @@ -1,4 +1,4 @@ -#pragma once - -/// Get number of CPU cores without hyper-threading. -unsigned getNumberOfPhysicalCPUCores(); +#pragma once + +/// Get number of CPU cores without hyper-threading. +unsigned getNumberOfPhysicalCPUCores(); diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp index 90d830ac73..c71a5f6c9d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp @@ -1,43 +1,43 @@ -#if defined(__linux__) - -#include "hasLinuxCapability.h" - -#include <syscall.h> -#include <unistd.h> -#include <linux/capability.h> -#include <Common/Exception.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NETLINK_ERROR; -} - -static __user_cap_data_struct getCapabilities() -{ - /// See man getcap. - __user_cap_header_struct request{}; - request.version = _LINUX_CAPABILITY_VERSION_1; /// It's enough to check just single CAP_NET_ADMIN capability we are interested. - request.pid = getpid(); - - __user_cap_data_struct response{}; - - /// Avoid dependency on 'libcap'. - if (0 != syscall(SYS_capget, &request, &response)) - throwFromErrno("Cannot do 'capget' syscall", ErrorCodes::NETLINK_ERROR); - - return response; -} - -bool hasLinuxCapability(int cap) -{ - static __user_cap_data_struct capabilities = getCapabilities(); - return (1 << cap) & capabilities.effective; -} - -} - -#endif +#if defined(__linux__) + +#include "hasLinuxCapability.h" + +#include <syscall.h> +#include <unistd.h> +#include <linux/capability.h> +#include <Common/Exception.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NETLINK_ERROR; +} + +static __user_cap_data_struct getCapabilities() +{ + /// See man getcap. + __user_cap_header_struct request{}; + request.version = _LINUX_CAPABILITY_VERSION_1; /// It's enough to check just single CAP_NET_ADMIN capability we are interested. + request.pid = getpid(); + + __user_cap_data_struct response{}; + + /// Avoid dependency on 'libcap'. + if (0 != syscall(SYS_capget, &request, &response)) + throwFromErrno("Cannot do 'capget' syscall", ErrorCodes::NETLINK_ERROR); + + return response; +} + +bool hasLinuxCapability(int cap) +{ + static __user_cap_data_struct capabilities = getCapabilities(); + return (1 << cap) & capabilities.effective; +} + +} + +#endif diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h index 55181dbe56..4a9d2214a7 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h @@ -1,14 +1,14 @@ #pragma once -#if defined(__linux__) - -#include <linux/capability.h> - -namespace DB -{ - -/// Check that the current process has Linux capability. Examples: CAP_IPC_LOCK, CAP_NET_ADMIN. -bool hasLinuxCapability(int cap); - -} - -#endif +#if defined(__linux__) + +#include <linux/capability.h> + +namespace DB +{ + +/// Check that the current process has Linux capability. Examples: CAP_IPC_LOCK, CAP_NET_ADMIN. +bool hasLinuxCapability(int cap); + +} + +#endif diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp index 2e9dc759d9..9e22c58f94 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp @@ -1,22 +1,22 @@ -#include <Common/isLocalAddress.h> - +#include <Common/isLocalAddress.h> + #include <ifaddrs.h> -#include <cstring> +#include <cstring> #include <optional> #include <common/types.h> #include <Common/Exception.h> #include <Poco/Net/IPAddress.h> -#include <Poco/Net/SocketAddress.h> - - -namespace DB -{ - +#include <Poco/Net/SocketAddress.h> + + +namespace DB +{ + namespace ErrorCodes -{ +{ extern const int SYSTEM_ERROR; } - + namespace { @@ -46,7 +46,7 @@ struct NetworkInterfaces { /// We interested only in IP-adresses case AF_INET: - { + { interface_address.emplace(*(iface->ifa_addr)); break; } @@ -76,8 +76,8 @@ struct NetworkInterfaces } }; -} - +} + bool isLocalAddress(const Poco::Net::IPAddress & address) { @@ -116,19 +116,19 @@ bool isLocalAddress(const Poco::Net::IPAddress & address) } -bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port) -{ - return clickhouse_port == address.port() && isLocalAddress(address.host()); -} - - -size_t getHostNameDifference(const std::string & local_hostname, const std::string & host) -{ - size_t hostname_difference = 0; - for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i) - if (local_hostname[i] != host[i]) - ++hostname_difference; - return hostname_difference; -} - -} +bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port) +{ + return clickhouse_port == address.port() && isLocalAddress(address.host()); +} + + +size_t getHostNameDifference(const std::string & local_hostname, const std::string & host) +{ + size_t hostname_difference = 0; + for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i) + if (local_hostname[i] != host[i]) + ++hostname_difference; + return hostname_difference; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h index 520038ace9..3d0db2d955 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h @@ -1,31 +1,31 @@ -#pragma once - -#include <common/types.h> -#include <Poco/Net/IPAddress.h> - - -namespace Poco -{ - namespace Net - { - class SocketAddress; - } -} - -namespace DB -{ - /** Lets you check if the address is similar to `localhost`. - * The purpose of this check is usually to make an assumption, - * that when we go to this address via the Internet, we'll get to ourselves. - * Please note that this check is not accurate: - * - the address is simply compared to the addresses of the network interfaces; - * - only the first address is taken for each network interface; - * - the routing rules that affect which network interface we go to the specified address are not checked. - */ - bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port); - bool isLocalAddress(const Poco::Net::SocketAddress & address); - bool isLocalAddress(const Poco::Net::IPAddress & address); - - /// Returns number of different bytes in hostnames, used for load balancing - size_t getHostNameDifference(const std::string & local_hostname, const std::string & host); -} +#pragma once + +#include <common/types.h> +#include <Poco/Net/IPAddress.h> + + +namespace Poco +{ + namespace Net + { + class SocketAddress; + } +} + +namespace DB +{ + /** Lets you check if the address is similar to `localhost`. + * The purpose of this check is usually to make an assumption, + * that when we go to this address via the Internet, we'll get to ourselves. + * Please note that this check is not accurate: + * - the address is simply compared to the addresses of the network interfaces; + * - only the first address is taken for each network interface; + * - the routing rules that affect which network interface we go to the specified address are not checked. + */ + bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port); + bool isLocalAddress(const Poco::Net::SocketAddress & address); + bool isLocalAddress(const Poco::Net::IPAddress & address); + + /// Returns number of different bytes in hostnames, used for load balancing + size_t getHostNameDifference(const std::string & local_hostname, const std::string & host); +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp index f5450d05d6..c99c08896a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp @@ -1,40 +1,40 @@ -#include <Common/parseAddress.h> -#include <Common/Exception.h> -#include <IO/ReadHelpers.h> -#include <common/find_symbols.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port) -{ - if (str.empty()) - throw Exception("Empty address passed to function parseAddress", ErrorCodes::BAD_ARGUMENTS); - - const char * begin = str.data(); - const char * end = begin + str.size(); - const char * port = end; // NOLINT - - if (begin[0] == '[') - { - const char * closing_square_bracket = find_first_symbols<']'>(begin + 1, end); - if (closing_square_bracket >= end) - throw Exception("Illegal address passed to function parseAddress: " - "the address begins with opening square bracket, but no closing square bracket found", ErrorCodes::BAD_ARGUMENTS); - +#include <Common/parseAddress.h> +#include <Common/Exception.h> +#include <IO/ReadHelpers.h> +#include <common/find_symbols.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port) +{ + if (str.empty()) + throw Exception("Empty address passed to function parseAddress", ErrorCodes::BAD_ARGUMENTS); + + const char * begin = str.data(); + const char * end = begin + str.size(); + const char * port = end; // NOLINT + + if (begin[0] == '[') + { + const char * closing_square_bracket = find_first_symbols<']'>(begin + 1, end); + if (closing_square_bracket >= end) + throw Exception("Illegal address passed to function parseAddress: " + "the address begins with opening square bracket, but no closing square bracket found", ErrorCodes::BAD_ARGUMENTS); + port = closing_square_bracket + 1; - } - else - port = find_first_symbols<':'>(begin, end); - - if (port != end) - { + } + else + port = find_first_symbols<':'>(begin, end); + + if (port != end) + { if (*port != ':') throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal port prefix passed to function parseAddress: {}", port); @@ -49,14 +49,14 @@ std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 defa "Illegal port passed to function parseAddress: {}", port); } return { std::string(begin, port - 1), port_number }; - } - else if (default_port) - { - return { str, default_port }; - } - else - throw Exception("The address passed to function parseAddress doesn't contain port number " - "and no 'default_port' was passed", ErrorCodes::BAD_ARGUMENTS); -} - -} + } + else if (default_port) + { + return { str, default_port }; + } + else + throw Exception("The address passed to function parseAddress doesn't contain port number " + "and no 'default_port' was passed", ErrorCodes::BAD_ARGUMENTS); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h index 078aea0a73..602a9adc0b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h @@ -1,22 +1,22 @@ -#pragma once - -#include <string> -#include <map> -#include <common/types.h> - - -namespace DB -{ - -/** Parse address from string, that can contain host with or without port. - * If port was not specified and default_port is not zero, default_port is used. - * Otherwise, an exception is thrown. - * - * Examples: - * yandex.ru - returns "yandex.ru" and default_port - * yandex.ru:80 - returns "yandex.ru" and 80 - * [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host. - */ -std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port); - -} +#pragma once + +#include <string> +#include <map> +#include <common/types.h> + + +namespace DB +{ + +/** Parse address from string, that can contain host with or without port. + * If port was not specified and default_port is not zero, default_port is used. + * Otherwise, an exception is thrown. + * + * Examples: + * yandex.ru - returns "yandex.ru" and default_port + * yandex.ru:80 - returns "yandex.ru" and 80 + * [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host. + */ +std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port); + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp index a8b0ebb7bd..ded224e56c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp @@ -1,34 +1,34 @@ -#include <time.h> -#include <unistd.h> -#include <sys/types.h> -#include <Common/Exception.h> -#include <Common/randomSeed.h> -#include <Common/SipHash.h> +#include <time.h> +#include <unistd.h> +#include <sys/types.h> +#include <Common/Exception.h> +#include <Common/randomSeed.h> +#include <Common/SipHash.h> #include <common/getThreadId.h> #include <common/types.h> - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_CLOCK_GETTIME; - } -} - - -DB::UInt64 randomSeed() -{ - struct timespec times; + + +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_CLOCK_GETTIME; + } +} + + +DB::UInt64 randomSeed() +{ + struct timespec times; if (clock_gettime(CLOCK_MONOTONIC, ×)) - DB::throwFromErrno("Cannot clock_gettime.", DB::ErrorCodes::CANNOT_CLOCK_GETTIME); - - /// Not cryptographically secure as time, pid and stack address can be predictable. - - SipHash hash; - hash.update(times.tv_nsec); - hash.update(times.tv_sec); + DB::throwFromErrno("Cannot clock_gettime.", DB::ErrorCodes::CANNOT_CLOCK_GETTIME); + + /// Not cryptographically secure as time, pid and stack address can be predictable. + + SipHash hash; + hash.update(times.tv_nsec); + hash.update(times.tv_sec); hash.update(getThreadId()); - hash.update(×); - return hash.get64(); -} + hash.update(×); + return hash.get64(); +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h index 9305715cf6..4f04e4b974 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h @@ -1,7 +1,7 @@ -#pragma once - -#include <cstdint> +#pragma once + +#include <cstdint> #include <common/types.h> - -/** Returns a number suitable as seed for PRNG. Use clock_gettime, pid and so on. */ -DB::UInt64 randomSeed(); + +/** Returns a number suitable as seed for PRNG. Use clock_gettime, pid and so on. */ +DB::UInt64 randomSeed(); diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp index f82f47e029..78241ec1b6 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp @@ -1,76 +1,76 @@ -#include "CompressedReadBuffer.h" -#include <Compression/LZ4_decompress_faster.h> - - -namespace DB -{ - -bool CompressedReadBuffer::nextImpl() -{ - size_t size_decompressed; - size_t size_compressed_without_checksum; +#include "CompressedReadBuffer.h" +#include <Compression/LZ4_decompress_faster.h> + + +namespace DB +{ + +bool CompressedReadBuffer::nextImpl() +{ + size_t size_decompressed; + size_t size_compressed_without_checksum; size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false); - if (!size_compressed) - return false; - - auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); - - /// This is for clang static analyzer. - assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); - - memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); - working_buffer = Buffer(memory.data(), &memory[size_decompressed]); - + if (!size_compressed) + return false; + + auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); + + /// This is for clang static analyzer. + assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); + + memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); + working_buffer = Buffer(memory.data(), &memory[size_decompressed]); + decompress(working_buffer, size_decompressed, size_compressed_without_checksum); - - return true; -} - -size_t CompressedReadBuffer::readBig(char * to, size_t n) -{ - size_t bytes_read = 0; - - /// If there are unread bytes in the buffer, then we copy necessary to `to`. - if (pos < working_buffer.end()) - bytes_read += read(to, std::min(static_cast<size_t>(working_buffer.end() - pos), n)); - - /// If you need to read more - we will, if possible, uncompress at once to `to`. - while (bytes_read < n) - { - size_t size_decompressed; - size_t size_compressed_without_checksum; - + + return true; +} + +size_t CompressedReadBuffer::readBig(char * to, size_t n) +{ + size_t bytes_read = 0; + + /// If there are unread bytes in the buffer, then we copy necessary to `to`. + if (pos < working_buffer.end()) + bytes_read += read(to, std::min(static_cast<size_t>(working_buffer.end() - pos), n)); + + /// If you need to read more - we will, if possible, uncompress at once to `to`. + while (bytes_read < n) + { + size_t size_decompressed; + size_t size_compressed_without_checksum; + if (!readCompressedData(size_decompressed, size_compressed_without_checksum, false)) - return bytes_read; - - auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); - - /// If the decompressed block fits entirely where it needs to be copied. - if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read) - { + return bytes_read; + + auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); + + /// If the decompressed block fits entirely where it needs to be copied. + if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read) + { decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum); - bytes_read += size_decompressed; - bytes += size_decompressed; - } - else - { - bytes += offset(); - - /// This is for clang static analyzer. - assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); - - memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); - working_buffer = Buffer(memory.data(), &memory[size_decompressed]); + bytes_read += size_decompressed; + bytes += size_decompressed; + } + else + { + bytes += offset(); + + /// This is for clang static analyzer. + assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); + + memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); + working_buffer = Buffer(memory.data(), &memory[size_decompressed]); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); - pos = working_buffer.begin(); - - bytes_read += read(to + bytes_read, n - bytes_read); - break; - } - } - - return bytes_read; -} - -} + pos = working_buffer.begin(); + + bytes_read += read(to + bytes_read, n - bytes_read); + break; + } + } + + return bytes_read; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h index 40e5d87111..3fa7347507 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h @@ -1,33 +1,33 @@ -#pragma once - -#include "CompressedReadBufferBase.h" -#include <IO/BufferWithOwnMemory.h> -#include <IO/ReadBuffer.h> - - -namespace DB -{ - -class CompressedReadBuffer : public CompressedReadBufferBase, public BufferWithOwnMemory<ReadBuffer> -{ -private: - size_t size_compressed = 0; - - bool nextImpl() override; - -public: +#pragma once + +#include "CompressedReadBufferBase.h" +#include <IO/BufferWithOwnMemory.h> +#include <IO/ReadBuffer.h> + + +namespace DB +{ + +class CompressedReadBuffer : public CompressedReadBufferBase, public BufferWithOwnMemory<ReadBuffer> +{ +private: + size_t size_compressed = 0; + + bool nextImpl() override; + +public: CompressedReadBuffer(ReadBuffer & in_, bool allow_different_codecs_ = false) : CompressedReadBufferBase(&in_, allow_different_codecs_), BufferWithOwnMemory<ReadBuffer>(0) - { - } - - size_t readBig(char * to, size_t n) override; - - /// The compressed size of the current block. - size_t getSizeCompressed() const - { - return size_compressed; - } -}; - -} + { + } + + size_t readBig(char * to, size_t n) override; + + /// The compressed size of the current block. + size_t getSizeCompressed() const + { + return size_compressed; + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp index f584a9f0a3..12f138dc95 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp @@ -1,24 +1,24 @@ -#include <lz4.h> -#include <lz4hc.h> +#include <lz4.h> +#include <lz4hc.h> #include <Compression/ICompressionCodec.h> -#include <Compression/CompressionInfo.h> -#include <Compression/CompressionFactory.h> -#include <Compression/LZ4_decompress_faster.h> -#include <Parsers/IAST.h> -#include <Parsers/ASTLiteral.h> +#include <Compression/CompressionInfo.h> +#include <Compression/CompressionFactory.h> +#include <Compression/LZ4_decompress_faster.h> +#include <Parsers/IAST.h> +#include <Parsers/ASTLiteral.h> #include <Parsers/ASTFunction.h> #include <Parsers/ASTIdentifier.h> #include <IO/WriteBuffer.h> -#include <IO/WriteHelpers.h> +#include <IO/WriteHelpers.h> #include <IO/BufferWithOwnMemory.h> - -#pragma GCC diagnostic ignored "-Wold-style-cast" - - -namespace DB -{ - + +#pragma GCC diagnostic ignored "-Wold-style-cast" + + +namespace DB +{ + class CompressionCodecLZ4 : public ICompressionCodec { public: @@ -59,98 +59,98 @@ private: }; -namespace ErrorCodes -{ +namespace ErrorCodes +{ extern const int CANNOT_COMPRESS; extern const int CANNOT_DECOMPRESS; extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; extern const int ILLEGAL_CODEC_PARAMETER; -} - +} + CompressionCodecLZ4::CompressionCodecLZ4() { setCodecDescription("LZ4"); } - -uint8_t CompressionCodecLZ4::getMethodByte() const -{ - return static_cast<uint8_t>(CompressionMethodByte::LZ4); -} - + +uint8_t CompressionCodecLZ4::getMethodByte() const +{ + return static_cast<uint8_t>(CompressionMethodByte::LZ4); +} + void CompressionCodecLZ4::updateHash(SipHash & hash) const { getCodecDesc()->updateTreeHash(hash); } -UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const -{ - return LZ4_COMPRESSBOUND(uncompressed_size); -} - -UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_size, char * dest) const -{ - return LZ4_compress_default(source, dest, source_size, LZ4_COMPRESSBOUND(source_size)); -} - -void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const -{ +UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const +{ + return LZ4_COMPRESSBOUND(uncompressed_size); +} + +UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_size, char * dest) const +{ + return LZ4_compress_default(source, dest, source_size, LZ4_COMPRESSBOUND(source_size)); +} + +void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +{ bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat); if (!success) throw Exception("Cannot decompress", ErrorCodes::CANNOT_DECOMPRESS); -} - -void registerCodecLZ4(CompressionCodecFactory & factory) -{ - factory.registerSimpleCompressionCodec("LZ4", static_cast<UInt8>(CompressionMethodByte::LZ4), [&] () - { - return std::make_shared<CompressionCodecLZ4>(); - }); -} - -UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_size, char * dest) const -{ - auto success = LZ4_compress_HC(source, dest, source_size, LZ4_COMPRESSBOUND(source_size), level); - - if (!success) - throw Exception("Cannot LZ4_compress_HC", ErrorCodes::CANNOT_COMPRESS); - - return success; -} - -void registerCodecLZ4HC(CompressionCodecFactory & factory) -{ - factory.registerCompressionCodec("LZ4HC", {}, [&](const ASTPtr & arguments) -> CompressionCodecPtr - { - int level = 0; - - if (arguments && !arguments->children.empty()) - { - if (arguments->children.size() > 1) - throw Exception("LZ4HC codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); - - const auto children = arguments->children; - const auto * literal = children[0]->as<ASTLiteral>(); - if (!literal) - throw Exception("LZ4HC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER); - - level = literal->value.safeGet<UInt64>(); - } - - return std::make_shared<CompressionCodecLZ4HC>(level); - }); -} - -CompressionCodecLZ4HC::CompressionCodecLZ4HC(int level_) - : level(level_) -{ +} + +void registerCodecLZ4(CompressionCodecFactory & factory) +{ + factory.registerSimpleCompressionCodec("LZ4", static_cast<UInt8>(CompressionMethodByte::LZ4), [&] () + { + return std::make_shared<CompressionCodecLZ4>(); + }); +} + +UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_size, char * dest) const +{ + auto success = LZ4_compress_HC(source, dest, source_size, LZ4_COMPRESSBOUND(source_size), level); + + if (!success) + throw Exception("Cannot LZ4_compress_HC", ErrorCodes::CANNOT_COMPRESS); + + return success; +} + +void registerCodecLZ4HC(CompressionCodecFactory & factory) +{ + factory.registerCompressionCodec("LZ4HC", {}, [&](const ASTPtr & arguments) -> CompressionCodecPtr + { + int level = 0; + + if (arguments && !arguments->children.empty()) + { + if (arguments->children.size() > 1) + throw Exception("LZ4HC codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); + + const auto children = arguments->children; + const auto * literal = children[0]->as<ASTLiteral>(); + if (!literal) + throw Exception("LZ4HC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER); + + level = literal->value.safeGet<UInt64>(); + } + + return std::make_shared<CompressionCodecLZ4HC>(level); + }); +} + +CompressionCodecLZ4HC::CompressionCodecLZ4HC(int level_) + : level(level_) +{ setCodecDescription("LZ4HC", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))}); -} - +} + CompressionCodecPtr getCompressionCodecLZ4(int level) { return std::make_shared<CompressionCodecLZ4HC>(level); -} +} } diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp index 05b584f052..84bcb5bd84 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp @@ -1,44 +1,44 @@ -#include <Compression/CompressionCodecNone.h> -#include <Compression/CompressionInfo.h> -#include <Compression/CompressionFactory.h> +#include <Compression/CompressionCodecNone.h> +#include <Compression/CompressionInfo.h> +#include <Compression/CompressionFactory.h> #include <Parsers/ASTIdentifier.h> - - -namespace DB -{ - + + +namespace DB +{ + CompressionCodecNone::CompressionCodecNone() -{ +{ setCodecDescription("NONE"); -} - +} + uint8_t CompressionCodecNone::getMethodByte() const -{ +{ return static_cast<uint8_t>(CompressionMethodByte::NONE); -} - +} + void CompressionCodecNone::updateHash(SipHash & hash) const { getCodecDesc()->updateTreeHash(hash); } -UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_size, char * dest) const -{ - memcpy(dest, source, source_size); - return source_size; -} - -void CompressionCodecNone::doDecompressData(const char * source, UInt32 /*source_size*/, char * dest, UInt32 uncompressed_size) const -{ - memcpy(dest, source, uncompressed_size); -} - -void registerCodecNone(CompressionCodecFactory & factory) -{ - factory.registerSimpleCompressionCodec("NONE", static_cast<char>(CompressionMethodByte::NONE), [&] () - { - return std::make_shared<CompressionCodecNone>(); - }); -} - -} +UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_size, char * dest) const +{ + memcpy(dest, source, source_size); + return source_size; +} + +void CompressionCodecNone::doDecompressData(const char * source, UInt32 /*source_size*/, char * dest, UInt32 uncompressed_size) const +{ + memcpy(dest, source, uncompressed_size); +} + +void registerCodecNone(CompressionCodecFactory & factory) +{ + factory.registerSimpleCompressionCodec("NONE", static_cast<char>(CompressionMethodByte::NONE), [&] () + { + return std::make_shared<CompressionCodecNone>(); + }); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h index 8a0b0add67..bf6bb6de4e 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h @@ -1,31 +1,31 @@ -#pragma once - -#include <IO/WriteBuffer.h> -#include <Compression/ICompressionCodec.h> -#include <IO/BufferWithOwnMemory.h> -#include <Parsers/StringRange.h> - -namespace DB -{ - -class CompressionCodecNone : public ICompressionCodec -{ -public: +#pragma once + +#include <IO/WriteBuffer.h> +#include <Compression/ICompressionCodec.h> +#include <IO/BufferWithOwnMemory.h> +#include <Parsers/StringRange.h> + +namespace DB +{ + +class CompressionCodecNone : public ICompressionCodec +{ +public: CompressionCodecNone(); - uint8_t getMethodByte() const override; - + uint8_t getMethodByte() const override; + void updateHash(SipHash & hash) const override; -protected: - - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; - - bool isCompression() const override { return false; } - bool isGenericCompression() const override { return false; } - bool isNone() const override { return true; } -}; - -} +protected: + + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; + + void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + + bool isCompression() const override { return false; } + bool isGenericCompression() const override { return false; } + bool isNone() const override { return true; } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp index 9244906e48..1796303988 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp @@ -1,48 +1,48 @@ -#include <Compression/CompressionFactory.h> -#include <Parsers/ASTFunction.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ASTLiteral.h> -#include <Poco/String.h> -#include <IO/ReadBuffer.h> -#include <Parsers/queryToString.h> -#include <Compression/CompressionCodecMultiple.h> +#include <Compression/CompressionFactory.h> +#include <Parsers/ASTFunction.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTLiteral.h> +#include <Poco/String.h> +#include <IO/ReadBuffer.h> +#include <Parsers/queryToString.h> +#include <Compression/CompressionCodecMultiple.h> #include <Compression/CompressionCodecNone.h> -#include <IO/WriteHelpers.h> - +#include <IO/WriteHelpers.h> + #include <boost/algorithm/string/join.hpp> - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int UNKNOWN_CODEC; - extern const int UNEXPECTED_AST_STRUCTURE; - extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS; -} - -CompressionCodecPtr CompressionCodecFactory::getDefaultCodec() const -{ - return default_codec; -} - - + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_CODEC; + extern const int UNEXPECTED_AST_STRUCTURE; + extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS; +} + +CompressionCodecPtr CompressionCodecFactory::getDefaultCodec() const +{ + return default_codec; +} + + CompressionCodecPtr CompressionCodecFactory::get(const String & family_name, std::optional<int> level) const -{ - if (level) - { +{ + if (level) + { auto level_literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level)); return get(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), level_literal)), {}); - } - else - { - auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name)); + } + else + { + auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name)); return get(makeASTFunction("CODEC", identifier), {}); - } -} - + } +} + CompressionCodecPtr CompressionCodecFactory::get( const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const @@ -52,24 +52,24 @@ CompressionCodecPtr CompressionCodecFactory::get( if (const auto * func = ast->as<ASTFunction>()) { - Codecs codecs; - codecs.reserve(func->arguments->children.size()); - for (const auto & inner_codec_ast : func->arguments->children) - { + Codecs codecs; + codecs.reserve(func->arguments->children.size()); + for (const auto & inner_codec_ast : func->arguments->children) + { String codec_family_name; ASTPtr codec_arguments; - if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>()) + if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>()) { codec_family_name = family_name->name(); codec_arguments = {}; } - else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>()) + else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>()) { codec_family_name = ast_func->name; codec_arguments = ast_func->arguments; } - else - throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + else + throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE); CompressionCodecPtr codec; if (codec_family_name == DEFAULT_CODEC_NAME) @@ -81,119 +81,119 @@ CompressionCodecPtr CompressionCodecFactory::get( continue; codecs.emplace_back(codec); - } - - CompressionCodecPtr res; - - if (codecs.size() == 1) + } + + CompressionCodecPtr res; + + if (codecs.size() == 1) return codecs.back(); - else if (codecs.size() > 1) + else if (codecs.size() > 1) return std::make_shared<CompressionCodecMultiple>(codecs); else return std::make_shared<CompressionCodecNone>(); - } - + } + throw Exception("Unexpected AST structure for compression codec: " + queryToString(ast), ErrorCodes::UNEXPECTED_AST_STRUCTURE); -} - - -CompressionCodecPtr CompressionCodecFactory::get(const uint8_t byte_code) const -{ - const auto family_code_and_creator = family_code_with_codec.find(byte_code); - - if (family_code_and_creator == family_code_with_codec.end()) - throw Exception("Unknown codec family code: " + toString(byte_code), ErrorCodes::UNKNOWN_CODEC); - - return family_code_and_creator->second({}, nullptr); -} - - +} + + +CompressionCodecPtr CompressionCodecFactory::get(const uint8_t byte_code) const +{ + const auto family_code_and_creator = family_code_with_codec.find(byte_code); + + if (family_code_and_creator == family_code_with_codec.end()) + throw Exception("Unknown codec family code: " + toString(byte_code), ErrorCodes::UNKNOWN_CODEC); + + return family_code_and_creator->second({}, nullptr); +} + + CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const -{ - if (family_name == "Multiple") - throw Exception("Codec Multiple cannot be specified directly", ErrorCodes::UNKNOWN_CODEC); - - const auto family_and_creator = family_name_with_codec.find(family_name); - - if (family_and_creator == family_name_with_codec.end()) - throw Exception("Unknown codec family: " + family_name, ErrorCodes::UNKNOWN_CODEC); - - return family_and_creator->second(arguments, column_type); -} - -void CompressionCodecFactory::registerCompressionCodecWithType( - const String & family_name, - std::optional<uint8_t> byte_code, - CreatorWithType creator) -{ - if (creator == nullptr) - throw Exception("CompressionCodecFactory: the codec family " + family_name + " has been provided a null constructor", - ErrorCodes::LOGICAL_ERROR); - - if (!family_name_with_codec.emplace(family_name, creator).second) - throw Exception("CompressionCodecFactory: the codec family name '" + family_name + "' is not unique", ErrorCodes::LOGICAL_ERROR); - - if (byte_code) - if (!family_code_with_codec.emplace(*byte_code, creator).second) - throw Exception("CompressionCodecFactory: the codec family code '" + std::to_string(*byte_code) + "' is not unique", ErrorCodes::LOGICAL_ERROR); -} - -void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, Creator creator) -{ +{ + if (family_name == "Multiple") + throw Exception("Codec Multiple cannot be specified directly", ErrorCodes::UNKNOWN_CODEC); + + const auto family_and_creator = family_name_with_codec.find(family_name); + + if (family_and_creator == family_name_with_codec.end()) + throw Exception("Unknown codec family: " + family_name, ErrorCodes::UNKNOWN_CODEC); + + return family_and_creator->second(arguments, column_type); +} + +void CompressionCodecFactory::registerCompressionCodecWithType( + const String & family_name, + std::optional<uint8_t> byte_code, + CreatorWithType creator) +{ + if (creator == nullptr) + throw Exception("CompressionCodecFactory: the codec family " + family_name + " has been provided a null constructor", + ErrorCodes::LOGICAL_ERROR); + + if (!family_name_with_codec.emplace(family_name, creator).second) + throw Exception("CompressionCodecFactory: the codec family name '" + family_name + "' is not unique", ErrorCodes::LOGICAL_ERROR); + + if (byte_code) + if (!family_code_with_codec.emplace(*byte_code, creator).second) + throw Exception("CompressionCodecFactory: the codec family code '" + std::to_string(*byte_code) + "' is not unique", ErrorCodes::LOGICAL_ERROR); +} + +void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, Creator creator) +{ registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, const IDataType * /* data_type */) - { - return creator(ast); - }); -} - -void CompressionCodecFactory::registerSimpleCompressionCodec( - const String & family_name, - std::optional<uint8_t> byte_code, - SimpleCreator creator) -{ - registerCompressionCodec(family_name, byte_code, [family_name, creator](const ASTPtr & ast) - { - if (ast) + { + return creator(ast); + }); +} + +void CompressionCodecFactory::registerSimpleCompressionCodec( + const String & family_name, + std::optional<uint8_t> byte_code, + SimpleCreator creator) +{ + registerCompressionCodec(family_name, byte_code, [family_name, creator](const ASTPtr & ast) + { + if (ast) throw Exception(ErrorCodes::DATA_TYPE_CANNOT_HAVE_ARGUMENTS, "Compression codec {} cannot have arguments", family_name); - return creator(); - }); -} - - -void registerCodecNone(CompressionCodecFactory & factory); + return creator(); + }); +} + + +void registerCodecNone(CompressionCodecFactory & factory); void registerCodecLZ4(CompressionCodecFactory & factory); /* void registerCodecLZ4HC(CompressionCodecFactory & factory); -void registerCodecZSTD(CompressionCodecFactory & factory); -void registerCodecDelta(CompressionCodecFactory & factory); -void registerCodecT64(CompressionCodecFactory & factory); -void registerCodecDoubleDelta(CompressionCodecFactory & factory); -void registerCodecGorilla(CompressionCodecFactory & factory); +void registerCodecZSTD(CompressionCodecFactory & factory); +void registerCodecDelta(CompressionCodecFactory & factory); +void registerCodecT64(CompressionCodecFactory & factory); +void registerCodecDoubleDelta(CompressionCodecFactory & factory); +void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecMultiple(CompressionCodecFactory & factory); */ - -CompressionCodecFactory::CompressionCodecFactory() -{ - registerCodecLZ4(*this); - registerCodecNone(*this); + +CompressionCodecFactory::CompressionCodecFactory() +{ + registerCodecLZ4(*this); + registerCodecNone(*this); /* - registerCodecZSTD(*this); - registerCodecLZ4HC(*this); - registerCodecDelta(*this); - registerCodecT64(*this); - registerCodecDoubleDelta(*this); - registerCodecGorilla(*this); + registerCodecZSTD(*this); + registerCodecLZ4HC(*this); + registerCodecDelta(*this); + registerCodecT64(*this); + registerCodecDoubleDelta(*this); + registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecMultiple(*this); */ default_codec = get("LZ4", {}); -} - -CompressionCodecFactory & CompressionCodecFactory::instance() -{ - static CompressionCodecFactory ret; - return ret; -} - -} +} + +CompressionCodecFactory & CompressionCodecFactory::instance() +{ + static CompressionCodecFactory ret; + return ret; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp index 3b0cb92de5..f241eb29db 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp @@ -1,255 +1,255 @@ -#include "BackgroundSchedulePool.h" -#include <Common/Exception.h> -#include <Common/setThreadName.h> -#include <Common/Stopwatch.h> -#include <Common/CurrentThread.h> -#include <common/logger_useful.h> -#include <chrono> +#include "BackgroundSchedulePool.h" +#include <Common/Exception.h> +#include <Common/setThreadName.h> +#include <Common/Stopwatch.h> +#include <Common/CurrentThread.h> +#include <common/logger_useful.h> +#include <chrono> #include <common/scope_guard.h> - - -namespace DB -{ - - -class TaskNotification final : public Poco::Notification -{ -public: - explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task_) : task(task_) {} - void execute() { task->execute(); } - -private: - BackgroundSchedulePoolTaskInfoPtr task; -}; - - -BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo( - BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_) - : pool(pool_), log_name(log_name_), function(function_) -{ -} - -bool BackgroundSchedulePoolTaskInfo::schedule() -{ - std::lock_guard lock(schedule_mutex); - - if (deactivated || scheduled) - return false; - - scheduleImpl(lock); - return true; -} - -bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms, bool overwrite) -{ - std::lock_guard lock(schedule_mutex); - - if (deactivated || scheduled) - return false; - if (delayed && !overwrite) - return false; - - pool.scheduleDelayedTask(shared_from_this(), ms, lock); - return true; -} - -void BackgroundSchedulePoolTaskInfo::deactivate() -{ - std::lock_guard lock_exec(exec_mutex); - std::lock_guard lock_schedule(schedule_mutex); - - if (deactivated) - return; - - deactivated = true; - scheduled = false; - - if (delayed) - pool.cancelDelayedTask(shared_from_this(), lock_schedule); -} - -void BackgroundSchedulePoolTaskInfo::activate() -{ - std::lock_guard lock(schedule_mutex); - deactivated = false; -} - -bool BackgroundSchedulePoolTaskInfo::activateAndSchedule() -{ - std::lock_guard lock(schedule_mutex); - - deactivated = false; - if (scheduled) - return false; - - scheduleImpl(lock); - return true; -} - -void BackgroundSchedulePoolTaskInfo::execute() -{ - Stopwatch watch; - CurrentMetrics::Increment metric_increment{pool.tasks_metric}; - - std::lock_guard lock_exec(exec_mutex); - - { - std::lock_guard lock_schedule(schedule_mutex); - - if (deactivated) - return; - - scheduled = false; - executing = true; - } - - function(); - UInt64 milliseconds = watch.elapsedMilliseconds(); - - /// If the task is executed longer than specified time, it will be logged. - static const int32_t slow_execution_threshold_ms = 200; - - if (milliseconds >= slow_execution_threshold_ms) - LOG_TRACE(&Poco::Logger::get(log_name), "Execution took {} ms.", milliseconds); - - { - std::lock_guard lock_schedule(schedule_mutex); - - executing = false; - - /// In case was scheduled while executing (including a scheduleAfter which expired) we schedule the task - /// on the queue. We don't call the function again here because this way all tasks - /// will have their chance to execute - - if (scheduled) - pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); - } -} - -void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock) -{ - scheduled = true; - - if (delayed) - pool.cancelDelayedTask(shared_from_this(), schedule_mutex_lock); - - /// If the task is not executing at the moment, enqueue it for immediate execution. - /// But if it is currently executing, do nothing because it will be enqueued - /// at the end of the execute() method. - if (!executing) - pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); -} - -Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback() -{ - return [t = shared_from_this()](const Coordination::WatchResponse &) - { - t->schedule(); - }; -} - - + + +namespace DB +{ + + +class TaskNotification final : public Poco::Notification +{ +public: + explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task_) : task(task_) {} + void execute() { task->execute(); } + +private: + BackgroundSchedulePoolTaskInfoPtr task; +}; + + +BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo( + BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_) + : pool(pool_), log_name(log_name_), function(function_) +{ +} + +bool BackgroundSchedulePoolTaskInfo::schedule() +{ + std::lock_guard lock(schedule_mutex); + + if (deactivated || scheduled) + return false; + + scheduleImpl(lock); + return true; +} + +bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms, bool overwrite) +{ + std::lock_guard lock(schedule_mutex); + + if (deactivated || scheduled) + return false; + if (delayed && !overwrite) + return false; + + pool.scheduleDelayedTask(shared_from_this(), ms, lock); + return true; +} + +void BackgroundSchedulePoolTaskInfo::deactivate() +{ + std::lock_guard lock_exec(exec_mutex); + std::lock_guard lock_schedule(schedule_mutex); + + if (deactivated) + return; + + deactivated = true; + scheduled = false; + + if (delayed) + pool.cancelDelayedTask(shared_from_this(), lock_schedule); +} + +void BackgroundSchedulePoolTaskInfo::activate() +{ + std::lock_guard lock(schedule_mutex); + deactivated = false; +} + +bool BackgroundSchedulePoolTaskInfo::activateAndSchedule() +{ + std::lock_guard lock(schedule_mutex); + + deactivated = false; + if (scheduled) + return false; + + scheduleImpl(lock); + return true; +} + +void BackgroundSchedulePoolTaskInfo::execute() +{ + Stopwatch watch; + CurrentMetrics::Increment metric_increment{pool.tasks_metric}; + + std::lock_guard lock_exec(exec_mutex); + + { + std::lock_guard lock_schedule(schedule_mutex); + + if (deactivated) + return; + + scheduled = false; + executing = true; + } + + function(); + UInt64 milliseconds = watch.elapsedMilliseconds(); + + /// If the task is executed longer than specified time, it will be logged. + static const int32_t slow_execution_threshold_ms = 200; + + if (milliseconds >= slow_execution_threshold_ms) + LOG_TRACE(&Poco::Logger::get(log_name), "Execution took {} ms.", milliseconds); + + { + std::lock_guard lock_schedule(schedule_mutex); + + executing = false; + + /// In case was scheduled while executing (including a scheduleAfter which expired) we schedule the task + /// on the queue. We don't call the function again here because this way all tasks + /// will have their chance to execute + + if (scheduled) + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); + } +} + +void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock) +{ + scheduled = true; + + if (delayed) + pool.cancelDelayedTask(shared_from_this(), schedule_mutex_lock); + + /// If the task is not executing at the moment, enqueue it for immediate execution. + /// But if it is currently executing, do nothing because it will be enqueued + /// at the end of the execute() method. + if (!executing) + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); +} + +Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback() +{ + return [t = shared_from_this()](const Coordination::WatchResponse &) + { + t->schedule(); + }; +} + + BackgroundSchedulePool::BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, const char *thread_name_) - : size(size_) - , tasks_metric(tasks_metric_) - , thread_name(thread_name_) -{ - LOG_INFO(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Create BackgroundSchedulePool with {} threads", size); - - threads.resize(size); - for (auto & thread : threads) - thread = ThreadFromGlobalPool([this] { threadFunction(); }); - - delayed_thread = ThreadFromGlobalPool([this] { delayExecutionThreadFunction(); }); -} - - -BackgroundSchedulePool::~BackgroundSchedulePool() -{ - try - { - { - std::unique_lock lock(delayed_tasks_mutex); - shutdown = true; - wakeup_cond.notify_all(); - } - - queue.wakeUpAll(); - delayed_thread.join(); - - LOG_TRACE(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Waiting for threads to finish."); - for (auto & thread : threads) - thread.join(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - - -BackgroundSchedulePool::TaskHolder BackgroundSchedulePool::createTask(const std::string & name, const TaskFunc & function) -{ - return TaskHolder(std::make_shared<TaskInfo>(*this, name, function)); -} - - -void BackgroundSchedulePool::scheduleDelayedTask(const TaskInfoPtr & task, size_t ms, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */) -{ - Poco::Timestamp current_time; - - { - std::lock_guard lock(delayed_tasks_mutex); - - if (task->delayed) - delayed_tasks.erase(task->iterator); - - task->iterator = delayed_tasks.emplace(current_time + (ms * 1000), task); - task->delayed = true; - } - - wakeup_cond.notify_all(); -} - - -void BackgroundSchedulePool::cancelDelayedTask(const TaskInfoPtr & task, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */) -{ - { - std::lock_guard lock(delayed_tasks_mutex); - delayed_tasks.erase(task->iterator); - task->delayed = false; - } - - wakeup_cond.notify_all(); -} - - -void BackgroundSchedulePool::attachToThreadGroup() -{ - std::lock_guard lock(delayed_tasks_mutex); - - if (thread_group) - { - /// Put all threads to one thread pool - CurrentThread::attachTo(thread_group); - } - else - { - CurrentThread::initializeQuery(); - thread_group = CurrentThread::getGroup(); - } -} - - -void BackgroundSchedulePool::threadFunction() -{ - setThreadName(thread_name.c_str()); - - attachToThreadGroup(); - SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); - - while (!shutdown) - { + : size(size_) + , tasks_metric(tasks_metric_) + , thread_name(thread_name_) +{ + LOG_INFO(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Create BackgroundSchedulePool with {} threads", size); + + threads.resize(size); + for (auto & thread : threads) + thread = ThreadFromGlobalPool([this] { threadFunction(); }); + + delayed_thread = ThreadFromGlobalPool([this] { delayExecutionThreadFunction(); }); +} + + +BackgroundSchedulePool::~BackgroundSchedulePool() +{ + try + { + { + std::unique_lock lock(delayed_tasks_mutex); + shutdown = true; + wakeup_cond.notify_all(); + } + + queue.wakeUpAll(); + delayed_thread.join(); + + LOG_TRACE(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Waiting for threads to finish."); + for (auto & thread : threads) + thread.join(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + +BackgroundSchedulePool::TaskHolder BackgroundSchedulePool::createTask(const std::string & name, const TaskFunc & function) +{ + return TaskHolder(std::make_shared<TaskInfo>(*this, name, function)); +} + + +void BackgroundSchedulePool::scheduleDelayedTask(const TaskInfoPtr & task, size_t ms, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */) +{ + Poco::Timestamp current_time; + + { + std::lock_guard lock(delayed_tasks_mutex); + + if (task->delayed) + delayed_tasks.erase(task->iterator); + + task->iterator = delayed_tasks.emplace(current_time + (ms * 1000), task); + task->delayed = true; + } + + wakeup_cond.notify_all(); +} + + +void BackgroundSchedulePool::cancelDelayedTask(const TaskInfoPtr & task, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */) +{ + { + std::lock_guard lock(delayed_tasks_mutex); + delayed_tasks.erase(task->iterator); + task->delayed = false; + } + + wakeup_cond.notify_all(); +} + + +void BackgroundSchedulePool::attachToThreadGroup() +{ + std::lock_guard lock(delayed_tasks_mutex); + + if (thread_group) + { + /// Put all threads to one thread pool + CurrentThread::attachTo(thread_group); + } + else + { + CurrentThread::initializeQuery(); + thread_group = CurrentThread::getGroup(); + } +} + + +void BackgroundSchedulePool::threadFunction() +{ + setThreadName(thread_name.c_str()); + + attachToThreadGroup(); + SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); + + while (!shutdown) + { /// We have to wait with timeout to prevent very rare deadlock, caused by the following race condition: /// 1. Background thread N: threadFunction(): checks for shutdown (it's false) /// 2. Main thread: ~BackgroundSchedulePool(): sets shutdown to true, calls queue.wakeUpAll(), it triggers @@ -260,65 +260,65 @@ void BackgroundSchedulePool::threadFunction() /// TODO Do we really need Poco::NotificationQueue? Why not to use std::queue + mutex + condvar or maybe even DB::ThreadPool? constexpr size_t wait_timeout_ms = 500; if (Poco::AutoPtr<Poco::Notification> notification = queue.waitDequeueNotification(wait_timeout_ms)) - { - TaskNotification & task_notification = static_cast<TaskNotification &>(*notification); - task_notification.execute(); - } - } -} - - -void BackgroundSchedulePool::delayExecutionThreadFunction() -{ - setThreadName((thread_name + "/D").c_str()); - - attachToThreadGroup(); - SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); - - while (!shutdown) - { - TaskInfoPtr task; - bool found = false; - - { - std::unique_lock lock(delayed_tasks_mutex); - - while (!shutdown) - { - Poco::Timestamp min_time; - - if (!delayed_tasks.empty()) - { - auto t = delayed_tasks.begin(); - min_time = t->first; - task = t->second; - } - - if (!task) - { - wakeup_cond.wait(lock); - continue; - } - - Poco::Timestamp current_time; - - if (min_time > current_time) - { - wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time)); - continue; - } - else - { - /// We have a task ready for execution - found = true; - break; - } - } - } - - if (found) - task->schedule(); - } -} - -} + { + TaskNotification & task_notification = static_cast<TaskNotification &>(*notification); + task_notification.execute(); + } + } +} + + +void BackgroundSchedulePool::delayExecutionThreadFunction() +{ + setThreadName((thread_name + "/D").c_str()); + + attachToThreadGroup(); + SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); + + while (!shutdown) + { + TaskInfoPtr task; + bool found = false; + + { + std::unique_lock lock(delayed_tasks_mutex); + + while (!shutdown) + { + Poco::Timestamp min_time; + + if (!delayed_tasks.empty()) + { + auto t = delayed_tasks.begin(); + min_time = t->first; + task = t->second; + } + + if (!task) + { + wakeup_cond.wait(lock); + continue; + } + + Poco::Timestamp current_time; + + if (min_time > current_time) + { + wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time)); + continue; + } + else + { + /// We have a task ready for execution + found = true; + break; + } + } + } + + if (found) + task->schedule(); + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h index 0e7e891118..092824c069 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h @@ -1,173 +1,173 @@ -#pragma once - -#include <Poco/Notification.h> -#include <Poco/NotificationQueue.h> -#include <Poco/Timestamp.h> -#include <thread> -#include <atomic> -#include <mutex> -#include <condition_variable> -#include <vector> -#include <map> -#include <functional> -#include <boost/noncopyable.hpp> -#include <Common/ZooKeeper/Types.h> -#include <Common/CurrentMetrics.h> -#include <Common/CurrentThread.h> -#include <Common/ThreadPool.h> - - -namespace DB -{ - -class TaskNotification; -class BackgroundSchedulePoolTaskInfo; -class BackgroundSchedulePoolTaskHolder; - - -/** Executes functions scheduled at a specific point in time. - * Basically all tasks are added in a queue and precessed by worker threads. - * - * The most important difference between this and BackgroundProcessingPool - * is that we have the guarantee that the same function is not executed from many workers in the same time. - * - * The usage scenario: instead starting a separate thread for each task, - * register a task in BackgroundSchedulePool and when you need to run the task, - * call schedule or scheduleAfter(duration) method. - */ -class BackgroundSchedulePool -{ -public: - friend class BackgroundSchedulePoolTaskInfo; - - using TaskInfo = BackgroundSchedulePoolTaskInfo; - using TaskInfoPtr = std::shared_ptr<TaskInfo>; - using TaskFunc = std::function<void()>; - using TaskHolder = BackgroundSchedulePoolTaskHolder; - using DelayedTasks = std::multimap<Poco::Timestamp, TaskInfoPtr>; - - TaskHolder createTask(const std::string & log_name, const TaskFunc & function); - - size_t getNumberOfThreads() const { return size; } - - /// thread_name_ cannot be longer then 13 bytes (2 bytes is reserved for "/D" suffix for delayExecutionThreadFunction()) +#pragma once + +#include <Poco/Notification.h> +#include <Poco/NotificationQueue.h> +#include <Poco/Timestamp.h> +#include <thread> +#include <atomic> +#include <mutex> +#include <condition_variable> +#include <vector> +#include <map> +#include <functional> +#include <boost/noncopyable.hpp> +#include <Common/ZooKeeper/Types.h> +#include <Common/CurrentMetrics.h> +#include <Common/CurrentThread.h> +#include <Common/ThreadPool.h> + + +namespace DB +{ + +class TaskNotification; +class BackgroundSchedulePoolTaskInfo; +class BackgroundSchedulePoolTaskHolder; + + +/** Executes functions scheduled at a specific point in time. + * Basically all tasks are added in a queue and precessed by worker threads. + * + * The most important difference between this and BackgroundProcessingPool + * is that we have the guarantee that the same function is not executed from many workers in the same time. + * + * The usage scenario: instead starting a separate thread for each task, + * register a task in BackgroundSchedulePool and when you need to run the task, + * call schedule or scheduleAfter(duration) method. + */ +class BackgroundSchedulePool +{ +public: + friend class BackgroundSchedulePoolTaskInfo; + + using TaskInfo = BackgroundSchedulePoolTaskInfo; + using TaskInfoPtr = std::shared_ptr<TaskInfo>; + using TaskFunc = std::function<void()>; + using TaskHolder = BackgroundSchedulePoolTaskHolder; + using DelayedTasks = std::multimap<Poco::Timestamp, TaskInfoPtr>; + + TaskHolder createTask(const std::string & log_name, const TaskFunc & function); + + size_t getNumberOfThreads() const { return size; } + + /// thread_name_ cannot be longer then 13 bytes (2 bytes is reserved for "/D" suffix for delayExecutionThreadFunction()) BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, const char *thread_name_); - ~BackgroundSchedulePool(); - -private: - using Threads = std::vector<ThreadFromGlobalPool>; - - void threadFunction(); - void delayExecutionThreadFunction(); - - /// Schedule task for execution after specified delay from now. - void scheduleDelayedTask(const TaskInfoPtr & task_info, size_t ms, std::lock_guard<std::mutex> & task_schedule_mutex_lock); - - /// Remove task, that was scheduled with delay, from schedule. - void cancelDelayedTask(const TaskInfoPtr & task_info, std::lock_guard<std::mutex> & task_schedule_mutex_lock); - - /// Number for worker threads. - const size_t size; - std::atomic<bool> shutdown {false}; - Threads threads; - Poco::NotificationQueue queue; - - /// Delayed notifications. - - std::condition_variable wakeup_cond; - std::mutex delayed_tasks_mutex; - /// Thread waiting for next delayed task. - ThreadFromGlobalPool delayed_thread; - /// Tasks ordered by scheduled time. - DelayedTasks delayed_tasks; - - /// Thread group used for profiling purposes - ThreadGroupStatusPtr thread_group; - - CurrentMetrics::Metric tasks_metric; - std::string thread_name; - - void attachToThreadGroup(); -}; - - -class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this<BackgroundSchedulePoolTaskInfo>, private boost::noncopyable -{ -public: - BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_); - - /// Schedule for execution as soon as possible (if not already scheduled). - /// If the task was already scheduled with delay, the delay will be ignored. - bool schedule(); - - /// Schedule for execution after specified delay. - /// If overwrite is set then the task will be re-scheduled (if it was already scheduled, i.e. delayed == true). - bool scheduleAfter(size_t ms, bool overwrite = true); - - /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task. - void deactivate(); - - void activate(); - - /// Atomically activate task and schedule it for execution. - bool activateAndSchedule(); - - /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches. - Coordination::WatchCallback getWatchCallback(); - -private: - friend class TaskNotification; - friend class BackgroundSchedulePool; - - void execute(); - - void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock); - - BackgroundSchedulePool & pool; - std::string log_name; - BackgroundSchedulePool::TaskFunc function; - - std::mutex exec_mutex; - std::mutex schedule_mutex; - - /// Invariants: - /// * If deactivated is true then scheduled, delayed and executing are all false. - /// * scheduled and delayed cannot be true at the same time. - bool deactivated = false; - bool scheduled = false; - bool delayed = false; - bool executing = false; - - /// If the task is scheduled with delay, points to element of delayed_tasks. - BackgroundSchedulePool::DelayedTasks::iterator iterator; -}; - -using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr<BackgroundSchedulePoolTaskInfo>; - - -class BackgroundSchedulePoolTaskHolder -{ -public: - BackgroundSchedulePoolTaskHolder() = default; - explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {} - BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete; - BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default; - BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete; - BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default; - - ~BackgroundSchedulePoolTaskHolder() - { - if (task_info) - task_info->deactivate(); - } - - operator bool() const { return task_info != nullptr; } - - BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); } - const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); } - -private: - BackgroundSchedulePoolTaskInfoPtr task_info; -}; - -} + ~BackgroundSchedulePool(); + +private: + using Threads = std::vector<ThreadFromGlobalPool>; + + void threadFunction(); + void delayExecutionThreadFunction(); + + /// Schedule task for execution after specified delay from now. + void scheduleDelayedTask(const TaskInfoPtr & task_info, size_t ms, std::lock_guard<std::mutex> & task_schedule_mutex_lock); + + /// Remove task, that was scheduled with delay, from schedule. + void cancelDelayedTask(const TaskInfoPtr & task_info, std::lock_guard<std::mutex> & task_schedule_mutex_lock); + + /// Number for worker threads. + const size_t size; + std::atomic<bool> shutdown {false}; + Threads threads; + Poco::NotificationQueue queue; + + /// Delayed notifications. + + std::condition_variable wakeup_cond; + std::mutex delayed_tasks_mutex; + /// Thread waiting for next delayed task. + ThreadFromGlobalPool delayed_thread; + /// Tasks ordered by scheduled time. + DelayedTasks delayed_tasks; + + /// Thread group used for profiling purposes + ThreadGroupStatusPtr thread_group; + + CurrentMetrics::Metric tasks_metric; + std::string thread_name; + + void attachToThreadGroup(); +}; + + +class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this<BackgroundSchedulePoolTaskInfo>, private boost::noncopyable +{ +public: + BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_); + + /// Schedule for execution as soon as possible (if not already scheduled). + /// If the task was already scheduled with delay, the delay will be ignored. + bool schedule(); + + /// Schedule for execution after specified delay. + /// If overwrite is set then the task will be re-scheduled (if it was already scheduled, i.e. delayed == true). + bool scheduleAfter(size_t ms, bool overwrite = true); + + /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task. + void deactivate(); + + void activate(); + + /// Atomically activate task and schedule it for execution. + bool activateAndSchedule(); + + /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches. + Coordination::WatchCallback getWatchCallback(); + +private: + friend class TaskNotification; + friend class BackgroundSchedulePool; + + void execute(); + + void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock); + + BackgroundSchedulePool & pool; + std::string log_name; + BackgroundSchedulePool::TaskFunc function; + + std::mutex exec_mutex; + std::mutex schedule_mutex; + + /// Invariants: + /// * If deactivated is true then scheduled, delayed and executing are all false. + /// * scheduled and delayed cannot be true at the same time. + bool deactivated = false; + bool scheduled = false; + bool delayed = false; + bool executing = false; + + /// If the task is scheduled with delay, points to element of delayed_tasks. + BackgroundSchedulePool::DelayedTasks::iterator iterator; +}; + +using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr<BackgroundSchedulePoolTaskInfo>; + + +class BackgroundSchedulePoolTaskHolder +{ +public: + BackgroundSchedulePoolTaskHolder() = default; + explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {} + BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete; + BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default; + BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete; + BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default; + + ~BackgroundSchedulePoolTaskHolder() + { + if (task_info) + task_info->deactivate(); + } + + operator bool() const { return task_info != nullptr; } + + BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); } + const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); } + +private: + BackgroundSchedulePoolTaskInfoPtr task_info; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h index 1c42b211a3..5b017cd463 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h @@ -1,320 +1,320 @@ -#pragma once - -#include <common/arithmeticOverflow.h> -#include <Core/Block.h> -#include <Core/AccurateComparison.h> -#include <Core/callOnTypeIndex.h> -#include <DataTypes/DataTypesNumber.h> -#include <DataTypes/DataTypesDecimal.h> -#include <Columns/ColumnVector.h> -#include <Columns/ColumnsNumber.h> -#include <Columns/ColumnConst.h> -#include <Functions/FunctionHelpers.h> /// TODO Core should not depend on Functions - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int DECIMAL_OVERFLOW; -} - - -inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataTypePtr & right_type) -{ - if (isColumnedAsDecimal(left_type)) - { - if (isColumnedAsDecimal(right_type) || isNotDecimalButComparableToDecimal(right_type)) - return true; - } - else if (isNotDecimalButComparableToDecimal(left_type) && isColumnedAsDecimal(right_type)) +#pragma once + +#include <common/arithmeticOverflow.h> +#include <Core/Block.h> +#include <Core/AccurateComparison.h> +#include <Core/callOnTypeIndex.h> +#include <DataTypes/DataTypesNumber.h> +#include <DataTypes/DataTypesDecimal.h> +#include <Columns/ColumnVector.h> +#include <Columns/ColumnsNumber.h> +#include <Columns/ColumnConst.h> +#include <Functions/FunctionHelpers.h> /// TODO Core should not depend on Functions + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int DECIMAL_OVERFLOW; +} + + +inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataTypePtr & right_type) +{ + if (isColumnedAsDecimal(left_type)) + { + if (isColumnedAsDecimal(right_type) || isNotDecimalButComparableToDecimal(right_type)) + return true; + } + else if (isNotDecimalButComparableToDecimal(left_type) && isColumnedAsDecimal(right_type)) { - return true; + return true; } - return false; -} - + return false; +} + template <size_t> struct ConstructDecInt; template <> struct ConstructDecInt<1> { using Type = Int32; }; template <> struct ConstructDecInt<2> { using Type = Int32; }; template <> struct ConstructDecInt<4> { using Type = Int32; }; -template <> struct ConstructDecInt<8> { using Type = Int64; }; -template <> struct ConstructDecInt<16> { using Type = Int128; }; +template <> struct ConstructDecInt<8> { using Type = Int64; }; +template <> struct ConstructDecInt<16> { using Type = Int128; }; template <> struct ConstructDecInt<32> { using Type = Int256; }; - -template <typename T, typename U> -struct DecCompareInt -{ - using Type = typename ConstructDecInt<(!IsDecimalNumber<U> || sizeof(T) > sizeof(U)) ? sizeof(T) : sizeof(U)>::Type; - using TypeA = Type; - using TypeB = Type; -}; - -/// -template <typename A, typename B, template <typename, typename> typename Operation, bool _check_overflow = true, - bool _actual = IsDecimalNumber<A> || IsDecimalNumber<B>> -class DecimalComparison -{ -public: - using CompareInt = typename DecCompareInt<A, B>::Type; - using Op = Operation<CompareInt, CompareInt>; - using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>; - using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>; - - using ArrayA = typename ColVecA::Container; - using ArrayB = typename ColVecB::Container; - + +template <typename T, typename U> +struct DecCompareInt +{ + using Type = typename ConstructDecInt<(!IsDecimalNumber<U> || sizeof(T) > sizeof(U)) ? sizeof(T) : sizeof(U)>::Type; + using TypeA = Type; + using TypeB = Type; +}; + +/// +template <typename A, typename B, template <typename, typename> typename Operation, bool _check_overflow = true, + bool _actual = IsDecimalNumber<A> || IsDecimalNumber<B>> +class DecimalComparison +{ +public: + using CompareInt = typename DecCompareInt<A, B>::Type; + using Op = Operation<CompareInt, CompareInt>; + using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>; + using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>; + + using ArrayA = typename ColVecA::Container; + using ArrayB = typename ColVecB::Container; + static ColumnPtr apply(const ColumnWithTypeAndName & col_left, const ColumnWithTypeAndName & col_right) - { - if constexpr (_actual) - { - ColumnPtr c_res; - Shift shift = getScales<A, B>(col_left.type, col_right.type); - + { + if constexpr (_actual) + { + ColumnPtr c_res; + Shift shift = getScales<A, B>(col_left.type, col_right.type); + return applyWithScale(col_left.column, col_right.column, shift); - } + } else return nullptr; - } - - static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b) - { + } + + static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b) + { static const UInt32 max_scale = DecimalUtils::max_precision<Decimal256>; - if (scale_a > max_scale || scale_b > max_scale) - throw Exception("Bad scale of decimal field", ErrorCodes::DECIMAL_OVERFLOW); - - Shift shift; - if (scale_a < scale_b) + if (scale_a > max_scale || scale_b > max_scale) + throw Exception("Bad scale of decimal field", ErrorCodes::DECIMAL_OVERFLOW); + + Shift shift; + if (scale_a < scale_b) shift.a = static_cast<CompareInt>(DecimalUtils::scaleMultiplier<B>(scale_b - scale_a)); - if (scale_a > scale_b) + if (scale_a > scale_b) shift.b = static_cast<CompareInt>(DecimalUtils::scaleMultiplier<A>(scale_a - scale_b)); - - return applyWithScale(a, b, shift); - } - -private: - struct Shift - { - CompareInt a = 1; - CompareInt b = 1; - - bool none() const { return a == 1 && b == 1; } - bool left() const { return a != 1; } - bool right() const { return b != 1; } - }; - - template <typename T, typename U> - static auto applyWithScale(T a, U b, const Shift & shift) - { - if (shift.left()) - return apply<true, false>(a, b, shift.a); - else if (shift.right()) - return apply<false, true>(a, b, shift.b); - return apply<false, false>(a, b, 1); - } - - template <typename T, typename U> - static std::enable_if_t<IsDecimalNumber<T> && IsDecimalNumber<U>, Shift> - getScales(const DataTypePtr & left_type, const DataTypePtr & right_type) - { + + return applyWithScale(a, b, shift); + } + +private: + struct Shift + { + CompareInt a = 1; + CompareInt b = 1; + + bool none() const { return a == 1 && b == 1; } + bool left() const { return a != 1; } + bool right() const { return b != 1; } + }; + + template <typename T, typename U> + static auto applyWithScale(T a, U b, const Shift & shift) + { + if (shift.left()) + return apply<true, false>(a, b, shift.a); + else if (shift.right()) + return apply<false, true>(a, b, shift.b); + return apply<false, false>(a, b, 1); + } + + template <typename T, typename U> + static std::enable_if_t<IsDecimalNumber<T> && IsDecimalNumber<U>, Shift> + getScales(const DataTypePtr & left_type, const DataTypePtr & right_type) + { const DataTypeDecimalBase<T> * decimal0 = checkDecimalBase<T>(*left_type); const DataTypeDecimalBase<U> * decimal1 = checkDecimalBase<U>(*right_type); - - Shift shift; - if (decimal0 && decimal1) - { + + Shift shift; + if (decimal0 && decimal1) + { auto result_type = DecimalUtils::binaryOpResult<false, false>(*decimal0, *decimal1); shift.a = static_cast<CompareInt>(result_type.scaleFactorFor(decimal0->getTrait(), false).value); shift.b = static_cast<CompareInt>(result_type.scaleFactorFor(decimal1->getTrait(), false).value); - } - else if (decimal0) + } + else if (decimal0) shift.b = static_cast<CompareInt>(decimal0->getScaleMultiplier().value); - else if (decimal1) + else if (decimal1) shift.a = static_cast<CompareInt>(decimal1->getScaleMultiplier().value); - - return shift; - } - - template <typename T, typename U> - static std::enable_if_t<IsDecimalNumber<T> && !IsDecimalNumber<U>, Shift> - getScales(const DataTypePtr & left_type, const DataTypePtr &) - { - Shift shift; + + return shift; + } + + template <typename T, typename U> + static std::enable_if_t<IsDecimalNumber<T> && !IsDecimalNumber<U>, Shift> + getScales(const DataTypePtr & left_type, const DataTypePtr &) + { + Shift shift; const DataTypeDecimalBase<T> * decimal0 = checkDecimalBase<T>(*left_type); - if (decimal0) + if (decimal0) shift.b = static_cast<CompareInt>(decimal0->getScaleMultiplier().value); - return shift; - } - - template <typename T, typename U> - static std::enable_if_t<!IsDecimalNumber<T> && IsDecimalNumber<U>, Shift> - getScales(const DataTypePtr &, const DataTypePtr & right_type) - { - Shift shift; + return shift; + } + + template <typename T, typename U> + static std::enable_if_t<!IsDecimalNumber<T> && IsDecimalNumber<U>, Shift> + getScales(const DataTypePtr &, const DataTypePtr & right_type) + { + Shift shift; const DataTypeDecimalBase<U> * decimal1 = checkDecimalBase<U>(*right_type); - if (decimal1) + if (decimal1) shift.a = static_cast<CompareInt>(decimal1->getScaleMultiplier().value); - return shift; - } - - template <bool scale_left, bool scale_right> - static ColumnPtr apply(const ColumnPtr & c0, const ColumnPtr & c1, CompareInt scale) - { - auto c_res = ColumnUInt8::create(); - - if constexpr (_actual) - { - bool c0_is_const = isColumnConst(*c0); - bool c1_is_const = isColumnConst(*c1); - - if (c0_is_const && c1_is_const) - { - const ColumnConst * c0_const = checkAndGetColumnConst<ColVecA>(c0.get()); - const ColumnConst * c1_const = checkAndGetColumnConst<ColVecB>(c1.get()); - - A a = c0_const->template getValue<A>(); - B b = c1_const->template getValue<B>(); - UInt8 res = apply<scale_left, scale_right>(a, b, scale); - return DataTypeUInt8().createColumnConst(c0->size(), toField(res)); - } - - ColumnUInt8::Container & vec_res = c_res->getData(); - vec_res.resize(c0->size()); - - if (c0_is_const) - { - const ColumnConst * c0_const = checkAndGetColumnConst<ColVecA>(c0.get()); - A a = c0_const->template getValue<A>(); - if (const ColVecB * c1_vec = checkAndGetColumn<ColVecB>(c1.get())) - constantVector<scale_left, scale_right>(a, c1_vec->getData(), vec_res, scale); - else - throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR); - } - else if (c1_is_const) - { - const ColumnConst * c1_const = checkAndGetColumnConst<ColVecB>(c1.get()); - B b = c1_const->template getValue<B>(); - if (const ColVecA * c0_vec = checkAndGetColumn<ColVecA>(c0.get())) - vectorConstant<scale_left, scale_right>(c0_vec->getData(), b, vec_res, scale); - else - throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR); - } - else - { - if (const ColVecA * c0_vec = checkAndGetColumn<ColVecA>(c0.get())) - { - if (const ColVecB * c1_vec = checkAndGetColumn<ColVecB>(c1.get())) - vectorVector<scale_left, scale_right>(c0_vec->getData(), c1_vec->getData(), vec_res, scale); - else - throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR); - } - else - throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR); - } - } - - return c_res; - } - - template <bool scale_left, bool scale_right> - static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]]) - { + return shift; + } + + template <bool scale_left, bool scale_right> + static ColumnPtr apply(const ColumnPtr & c0, const ColumnPtr & c1, CompareInt scale) + { + auto c_res = ColumnUInt8::create(); + + if constexpr (_actual) + { + bool c0_is_const = isColumnConst(*c0); + bool c1_is_const = isColumnConst(*c1); + + if (c0_is_const && c1_is_const) + { + const ColumnConst * c0_const = checkAndGetColumnConst<ColVecA>(c0.get()); + const ColumnConst * c1_const = checkAndGetColumnConst<ColVecB>(c1.get()); + + A a = c0_const->template getValue<A>(); + B b = c1_const->template getValue<B>(); + UInt8 res = apply<scale_left, scale_right>(a, b, scale); + return DataTypeUInt8().createColumnConst(c0->size(), toField(res)); + } + + ColumnUInt8::Container & vec_res = c_res->getData(); + vec_res.resize(c0->size()); + + if (c0_is_const) + { + const ColumnConst * c0_const = checkAndGetColumnConst<ColVecA>(c0.get()); + A a = c0_const->template getValue<A>(); + if (const ColVecB * c1_vec = checkAndGetColumn<ColVecB>(c1.get())) + constantVector<scale_left, scale_right>(a, c1_vec->getData(), vec_res, scale); + else + throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR); + } + else if (c1_is_const) + { + const ColumnConst * c1_const = checkAndGetColumnConst<ColVecB>(c1.get()); + B b = c1_const->template getValue<B>(); + if (const ColVecA * c0_vec = checkAndGetColumn<ColVecA>(c0.get())) + vectorConstant<scale_left, scale_right>(c0_vec->getData(), b, vec_res, scale); + else + throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR); + } + else + { + if (const ColVecA * c0_vec = checkAndGetColumn<ColVecA>(c0.get())) + { + if (const ColVecB * c1_vec = checkAndGetColumn<ColVecB>(c1.get())) + vectorVector<scale_left, scale_right>(c0_vec->getData(), c1_vec->getData(), vec_res, scale); + else + throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR); + } + else + throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR); + } + } + + return c_res; + } + + template <bool scale_left, bool scale_right> + static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]]) + { CompareInt x; if constexpr (IsDecimalNumber<A>) x = a.value; else x = a; - + CompareInt y; if constexpr (IsDecimalNumber<B>) y = b.value; else y = b; - if constexpr (_check_overflow) - { - bool overflow = false; - - if constexpr (sizeof(A) > sizeof(CompareInt)) + if constexpr (_check_overflow) + { + bool overflow = false; + + if constexpr (sizeof(A) > sizeof(CompareInt)) overflow |= (static_cast<A>(x) != a); - if constexpr (sizeof(B) > sizeof(CompareInt)) + if constexpr (sizeof(B) > sizeof(CompareInt)) overflow |= (static_cast<B>(y) != b); - if constexpr (is_unsigned_v<A>) - overflow |= (x < 0); - if constexpr (is_unsigned_v<B>) - overflow |= (y < 0); - - if constexpr (scale_left) - overflow |= common::mulOverflow(x, scale, x); - if constexpr (scale_right) - overflow |= common::mulOverflow(y, scale, y); - - if (overflow) + if constexpr (is_unsigned_v<A>) + overflow |= (x < 0); + if constexpr (is_unsigned_v<B>) + overflow |= (y < 0); + + if constexpr (scale_left) + overflow |= common::mulOverflow(x, scale, x); + if constexpr (scale_right) + overflow |= common::mulOverflow(y, scale, y); + + if (overflow) throw Exception("Can't compare decimal number due to overflow", ErrorCodes::DECIMAL_OVERFLOW); - } - else - { - if constexpr (scale_left) + } + else + { + if constexpr (scale_left) x = common::mulIgnoreOverflow(x, scale); - if constexpr (scale_right) + if constexpr (scale_right) y = common::mulIgnoreOverflow(y, scale); - } - - return Op::apply(x, y); - } - - template <bool scale_left, bool scale_right> - static void NO_INLINE vectorVector(const ArrayA & a, const ArrayB & b, PaddedPODArray<UInt8> & c, - CompareInt scale) - { - size_t size = a.size(); - const A * a_pos = a.data(); - const B * b_pos = b.data(); - UInt8 * c_pos = c.data(); - const A * a_end = a_pos + size; - - while (a_pos < a_end) - { - *c_pos = apply<scale_left, scale_right>(*a_pos, *b_pos, scale); - ++a_pos; - ++b_pos; - ++c_pos; - } - } - - template <bool scale_left, bool scale_right> - static void NO_INLINE vectorConstant(const ArrayA & a, B b, PaddedPODArray<UInt8> & c, CompareInt scale) - { - size_t size = a.size(); - const A * a_pos = a.data(); - UInt8 * c_pos = c.data(); - const A * a_end = a_pos + size; - - while (a_pos < a_end) - { - *c_pos = apply<scale_left, scale_right>(*a_pos, b, scale); - ++a_pos; - ++c_pos; - } - } - - template <bool scale_left, bool scale_right> - static void NO_INLINE constantVector(A a, const ArrayB & b, PaddedPODArray<UInt8> & c, CompareInt scale) - { - size_t size = b.size(); - const B * b_pos = b.data(); - UInt8 * c_pos = c.data(); - const B * b_end = b_pos + size; - - while (b_pos < b_end) - { - *c_pos = apply<scale_left, scale_right>(a, *b_pos, scale); - ++b_pos; - ++c_pos; - } - } -}; - -} + } + + return Op::apply(x, y); + } + + template <bool scale_left, bool scale_right> + static void NO_INLINE vectorVector(const ArrayA & a, const ArrayB & b, PaddedPODArray<UInt8> & c, + CompareInt scale) + { + size_t size = a.size(); + const A * a_pos = a.data(); + const B * b_pos = b.data(); + UInt8 * c_pos = c.data(); + const A * a_end = a_pos + size; + + while (a_pos < a_end) + { + *c_pos = apply<scale_left, scale_right>(*a_pos, *b_pos, scale); + ++a_pos; + ++b_pos; + ++c_pos; + } + } + + template <bool scale_left, bool scale_right> + static void NO_INLINE vectorConstant(const ArrayA & a, B b, PaddedPODArray<UInt8> & c, CompareInt scale) + { + size_t size = a.size(); + const A * a_pos = a.data(); + UInt8 * c_pos = c.data(); + const A * a_end = a_pos + size; + + while (a_pos < a_end) + { + *c_pos = apply<scale_left, scale_right>(*a_pos, b, scale); + ++a_pos; + ++c_pos; + } + } + + template <bool scale_left, bool scale_right> + static void NO_INLINE constantVector(A a, const ArrayB & b, PaddedPODArray<UInt8> & c, CompareInt scale) + { + size_t size = b.size(); + const B * b_pos = b.data(); + UInt8 * c_pos = c.data(); + const B * b_end = b_pos + size; + + while (b_pos < b_end) + { + *c_pos = apply<scale_left, scale_right>(a, *b_pos, scale); + ++b_pos; + ++c_pos; + } + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h index 955b292f3b..92e780104b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h @@ -1,93 +1,93 @@ -#pragma once - +#pragma once + #include <common/types.h> - - -namespace DB -{ - - -/// Client-server protocol. -/// -/// Client opens a connection and sends Hello packet. -/// If client version is incompatible, the server can terminate the connection. -/// Server responds with Hello packet. -/// If server version is incompatible, the client can terminate the connection. -/// -/// The main loop follows: -/// -/// 1. The client sends Query packet. -/// -/// Starting from version 50263 immediately after sending the Query packet the client starts -/// transfer of external (temporary) table (external storages) - one or several Data packets. -/// End of transmission is marked by an empty block. -/// At present, non-empty tables can be sent only along with SELECT query. -/// -/// If the query is an INSERT (and thus requires data transfer from client), then the server transmits -/// Data packet containing empty block that describes the table structure. -/// Then the client sends one or several Data packets - data for insertion. -/// End of data is marked by the transmission of empty block. -/// Then the server sends EndOfStream packet. -/// -/// If the query is a SELECT or a query of other type, then the server transmits packets of -/// one of the following types: -/// - Data - data corresponding to one block of query results. -/// - Progress - query execution progress. -/// - Exception - error description. -/// - EndOfStream - the end of data transmission. -/// -/// The client should read packets until EndOfStream or Exception. -/// -/// The client can also send Cancel packet - a request to cancel the query. -/// In this case the server can stop executing the query and return incomplete data, -/// but the client must still read until EndOfStream packet. -/// -/// Also if there is profiling info and the client revision is recent enough, the server can -/// send one of the following packets before EndOfStream: -/// - Totals - a block with total values -/// - ProfileInfo - serialized BlockStreamProfileInfo structure. -/// -/// If a query returns data, the server sends an empty header block containing -/// the description of resulting columns before executing the query. -/// Using this block the client can initialize the output formatter and display the prefix of resulting table -/// beforehand. - + + +namespace DB +{ + + +/// Client-server protocol. +/// +/// Client opens a connection and sends Hello packet. +/// If client version is incompatible, the server can terminate the connection. +/// Server responds with Hello packet. +/// If server version is incompatible, the client can terminate the connection. +/// +/// The main loop follows: +/// +/// 1. The client sends Query packet. +/// +/// Starting from version 50263 immediately after sending the Query packet the client starts +/// transfer of external (temporary) table (external storages) - one or several Data packets. +/// End of transmission is marked by an empty block. +/// At present, non-empty tables can be sent only along with SELECT query. +/// +/// If the query is an INSERT (and thus requires data transfer from client), then the server transmits +/// Data packet containing empty block that describes the table structure. +/// Then the client sends one or several Data packets - data for insertion. +/// End of data is marked by the transmission of empty block. +/// Then the server sends EndOfStream packet. +/// +/// If the query is a SELECT or a query of other type, then the server transmits packets of +/// one of the following types: +/// - Data - data corresponding to one block of query results. +/// - Progress - query execution progress. +/// - Exception - error description. +/// - EndOfStream - the end of data transmission. +/// +/// The client should read packets until EndOfStream or Exception. +/// +/// The client can also send Cancel packet - a request to cancel the query. +/// In this case the server can stop executing the query and return incomplete data, +/// but the client must still read until EndOfStream packet. +/// +/// Also if there is profiling info and the client revision is recent enough, the server can +/// send one of the following packets before EndOfStream: +/// - Totals - a block with total values +/// - ProfileInfo - serialized BlockStreamProfileInfo structure. +/// +/// If a query returns data, the server sends an empty header block containing +/// the description of resulting columns before executing the query. +/// Using this block the client can initialize the output formatter and display the prefix of resulting table +/// beforehand. + /// Marker of the inter-server secret (passed in the user name) /// (anyway user cannot be started with a whitespace) const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; -namespace Protocol -{ - /// Packet types that server transmits. - namespace Server - { - enum Enum - { - Hello = 0, /// Name, version, revision. - Data = 1, /// A block of data (compressed or not). - Exception = 2, /// The exception during query execution. - Progress = 3, /// Query execution progress: rows read, bytes read. - Pong = 4, /// Ping response - EndOfStream = 5, /// All packets were transmitted - ProfileInfo = 6, /// Packet with profiling info. - Totals = 7, /// A block with totals (compressed or not). - Extremes = 8, /// A block with minimums and maximums (compressed or not). - TablesStatusResponse = 9, /// A response to TablesStatus request. - Log = 10, /// System logs of the query execution - TableColumns = 11, /// Columns' description for default values calculation +namespace Protocol +{ + /// Packet types that server transmits. + namespace Server + { + enum Enum + { + Hello = 0, /// Name, version, revision. + Data = 1, /// A block of data (compressed or not). + Exception = 2, /// The exception during query execution. + Progress = 3, /// Query execution progress: rows read, bytes read. + Pong = 4, /// Ping response + EndOfStream = 5, /// All packets were transmitted + ProfileInfo = 6, /// Packet with profiling info. + Totals = 7, /// A block with totals (compressed or not). + Extremes = 8, /// A block with minimums and maximums (compressed or not). + TablesStatusResponse = 9, /// A response to TablesStatus request. + Log = 10, /// System logs of the query execution + TableColumns = 11, /// Columns' description for default values calculation PartUUIDs = 12, /// List of unique parts ids. ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed /// This is such an inverted logic, where server sends requests /// And client returns back response MAX = ReadTaskRequest, - }; - - /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 - /// would always be true because of compiler optimisation. That would lead to out-of-bounds error - /// if the packet is invalid. - /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values - inline const char * toString(UInt64 packet) - { + }; + + /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 + /// would always be true because of compiler optimisation. That would lead to out-of-bounds error + /// if the packet is invalid. + /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values + inline const char * toString(UInt64 packet) + { static const char * data[] = { "Hello", "Data", @@ -105,46 +105,46 @@ namespace Protocol "ReadTaskRequest" }; return packet <= MAX - ? data[packet] - : "Unknown packet"; - } - - inline size_t stringsInMessage(UInt64 msg_type) - { - switch (msg_type) - { - case TableColumns: - return 2; - default: - break; - } - return 0; - } - } - - /// Packet types that client transmits. - namespace Client - { - enum Enum - { - Hello = 0, /// Name, version, revision, default DB - Query = 1, /// Query id, query settings, stage up to which the query must be executed, - /// whether the compression must be used, - /// query text (without data for INSERTs). - Data = 2, /// A block of data (compressed or not). - Cancel = 3, /// Cancel the query execution. - Ping = 4, /// Check that connection to the server is alive. - TablesStatusRequest = 5, /// Check status of tables on the server. - KeepAlive = 6, /// Keep the connection alive + ? data[packet] + : "Unknown packet"; + } + + inline size_t stringsInMessage(UInt64 msg_type) + { + switch (msg_type) + { + case TableColumns: + return 2; + default: + break; + } + return 0; + } + } + + /// Packet types that client transmits. + namespace Client + { + enum Enum + { + Hello = 0, /// Name, version, revision, default DB + Query = 1, /// Query id, query settings, stage up to which the query must be executed, + /// whether the compression must be used, + /// query text (without data for INSERTs). + Data = 2, /// A block of data (compressed or not). + Cancel = 3, /// Cancel the query execution. + Ping = 4, /// Check that connection to the server is alive. + TablesStatusRequest = 5, /// Check status of tables on the server. + KeepAlive = 6, /// Keep the connection alive Scalar = 7, /// A block of data (compressed or not). IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing ReadTaskResponse = 9, /// TODO: MAX = ReadTaskResponse, - }; - - inline const char * toString(UInt64 packet) - { + }; + + inline const char * toString(UInt64 packet) + { static const char * data[] = { "Hello", "Query", @@ -158,25 +158,25 @@ namespace Protocol "ReadTaskResponse", }; return packet <= MAX - ? data[packet] - : "Unknown packet"; - } - } - - /// Whether the compression must be used. - enum class Compression - { - Disable = 0, - Enable = 1, - }; - - /// Whether the ssl must be used. - enum class Secure - { - Disable = 0, - Enable = 1, - }; - -} - -} + ? data[packet] + : "Unknown packet"; + } + } + + /// Whether the compression must be used. + enum class Compression + { + Disable = 0, + Enable = 1, + }; + + /// Whether the ssl must be used. + enum class Secure + { + Disable = 0, + Enable = 1, + }; + +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h index 931c9818fe..ba125e2882 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -1,21 +1,21 @@ -#pragma once - -#include <DataStreams/IBlockOutputStream.h> +#pragma once + +#include <DataStreams/IBlockOutputStream.h> #include <Interpreters/QueryViewsLog.h> #include <Parsers/IAST_fwd.h> #include <Storages/IStorage.h> #include <Common/Stopwatch.h> - + namespace Poco { class Logger; } -namespace DB -{ - +namespace DB +{ + class ReplicatedMergeTreeSink; - + struct ViewRuntimeData { const ASTPtr query; @@ -31,44 +31,44 @@ struct ViewRuntimeData } }; -/** Writes data to the specified table and to all dependent materialized views. - */ +/** Writes data to the specified table and to all dependent materialized views. + */ class PushingToViewsBlockOutputStream : public IBlockOutputStream, WithContext -{ -public: +{ +public: PushingToViewsBlockOutputStream( const StoragePtr & storage_, const StorageMetadataPtr & metadata_snapshot_, ContextPtr context_, const ASTPtr & query_ptr_, bool no_destination = false); - - Block getHeader() const override; - void write(const Block & block) override; - - void flush() override; - void writePrefix() override; - void writeSuffix() override; + + Block getHeader() const override; + void write(const Block & block) override; + + void flush() override; + void writePrefix() override; + void writeSuffix() override; void onProgress(const Progress & progress) override; - -private: - StoragePtr storage; + +private: + StoragePtr storage; StorageMetadataPtr metadata_snapshot; - BlockOutputStreamPtr output; + BlockOutputStreamPtr output; ReplicatedMergeTreeSink * replicated_output = nullptr; Poco::Logger * log; - - ASTPtr query_ptr; + + ASTPtr query_ptr; Stopwatch main_watch; - + std::vector<ViewRuntimeData> views; ContextMutablePtr select_context; ContextMutablePtr insert_context; - + void process(const Block & block, ViewRuntimeData & view); void checkExceptionsInViews(); void logQueryViews(); -}; - - -} +}; + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp index ccfcc1799b..7caa54cff2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp @@ -1,69 +1,69 @@ -#include <DataStreams/RemoteBlockInputStream.h> -#include <Interpreters/Context.h> - -namespace DB -{ - -RemoteBlockInputStream::RemoteBlockInputStream( +#include <DataStreams/RemoteBlockInputStream.h> +#include <Interpreters/Context.h> + +namespace DB +{ + +RemoteBlockInputStream::RemoteBlockInputStream( Connection & connection, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_) : query_executor(connection, query_, header_, context_, throttler, scalars_, external_tables_, stage_) -{ - init(); -} - -RemoteBlockInputStream::RemoteBlockInputStream( +{ + init(); +} + +RemoteBlockInputStream::RemoteBlockInputStream( const ConnectionPoolWithFailoverPtr & pool, std::vector<IConnectionPool::Entry> && connections, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_) : query_executor(pool, std::move(connections), query_, header_, context_, throttler, scalars_, external_tables_, stage_) -{ - init(); -} - -RemoteBlockInputStream::RemoteBlockInputStream( +{ + init(); +} + +RemoteBlockInputStream::RemoteBlockInputStream( const ConnectionPoolWithFailoverPtr & pool, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_) : query_executor(pool, query_, header_, context_, throttler, scalars_, external_tables_, stage_) -{ - init(); -} - -void RemoteBlockInputStream::init() -{ - query_executor.setProgressCallback([this](const Progress & progress) { progressImpl(progress); }); - query_executor.setProfileInfoCallback([this](const BlockStreamProfileInfo & info_) { info.setFrom(info_, true); }); - query_executor.setLogger(log); -} - -void RemoteBlockInputStream::cancel(bool kill) -{ - if (kill) - is_killed = true; - - bool old_val = false; - if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed)) - return; - - query_executor.cancel(); -} - -Block RemoteBlockInputStream::readImpl() -{ - auto block = query_executor.read(); - - if (isCancelledOrThrowIfKilled()) - return Block(); - - return block; -} - -void RemoteBlockInputStream::readSuffixImpl() -{ - query_executor.finish(); -} - -} +{ + init(); +} + +void RemoteBlockInputStream::init() +{ + query_executor.setProgressCallback([this](const Progress & progress) { progressImpl(progress); }); + query_executor.setProfileInfoCallback([this](const BlockStreamProfileInfo & info_) { info.setFrom(info_, true); }); + query_executor.setLogger(log); +} + +void RemoteBlockInputStream::cancel(bool kill) +{ + if (kill) + is_killed = true; + + bool old_val = false; + if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed)) + return; + + query_executor.cancel(); +} + +Block RemoteBlockInputStream::readImpl() +{ + auto block = query_executor.read(); + + if (isCancelledOrThrowIfKilled()) + return Block(); + + return block; +} + +void RemoteBlockInputStream::readSuffixImpl() +{ + query_executor.finish(); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h index 2800b38f37..1be6b03152 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h @@ -1,78 +1,78 @@ -#pragma once - -#include <optional> - -#include <common/logger_useful.h> - -#include <DataStreams/IBlockInputStream.h> -#include <Common/Throttler.h> -#include <Client/ConnectionPool.h> -#include <Client/MultiplexedConnections.h> -#include <Interpreters/Cluster.h> - -#include <DataStreams/RemoteQueryExecutor.h> - -namespace DB -{ - +#pragma once + +#include <optional> + +#include <common/logger_useful.h> + +#include <DataStreams/IBlockInputStream.h> +#include <Common/Throttler.h> +#include <Client/ConnectionPool.h> +#include <Client/MultiplexedConnections.h> +#include <Interpreters/Cluster.h> + +#include <DataStreams/RemoteQueryExecutor.h> + +namespace DB +{ + class Context; -/** This class allows one to launch queries on remote replicas of one shard and get results - */ -class RemoteBlockInputStream : public IBlockInputStream -{ -public: - /// Takes already set connection. - RemoteBlockInputStream( +/** This class allows one to launch queries on remote replicas of one shard and get results + */ +class RemoteBlockInputStream : public IBlockInputStream +{ +public: + /// Takes already set connection. + RemoteBlockInputStream( Connection & connection, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); - - /// Accepts several connections already taken from pool. - RemoteBlockInputStream( + + /// Accepts several connections already taken from pool. + RemoteBlockInputStream( const ConnectionPoolWithFailoverPtr & pool, std::vector<IConnectionPool::Entry> && connections, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); - - /// Takes a pool and gets one or several connections from it. - RemoteBlockInputStream( + + /// Takes a pool and gets one or several connections from it. + RemoteBlockInputStream( const ConnectionPoolWithFailoverPtr & pool, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); - - /// Set the query_id. For now, used by performance test to later find the query - /// in the server query_log. Must be called before sending the query to the server. - void setQueryId(const std::string & query_id) { query_executor.setQueryId(query_id); } - - /// Specify how we allocate connections on a shard. - void setPoolMode(PoolMode pool_mode) { query_executor.setPoolMode(pool_mode); } - - void setMainTable(StorageID main_table_) { query_executor.setMainTable(std::move(main_table_)); } - - /// Prevent default progress notification because progress' callback is called by its own. - void progress(const Progress & /*value*/) override {} - - void cancel(bool kill) override; - - String getName() const override { return "Remote"; } - - Block getHeader() const override { return query_executor.getHeader(); } - Block getTotals() override { return query_executor.getTotals(); } - Block getExtremes() override { return query_executor.getExtremes(); } - -protected: - Block readImpl() override; - void readSuffixImpl() override; - -private: - RemoteQueryExecutor query_executor; - Poco::Logger * log = &Poco::Logger::get("RemoteBlockInputStream"); - - void init(); -}; - -} + + /// Set the query_id. For now, used by performance test to later find the query + /// in the server query_log. Must be called before sending the query to the server. + void setQueryId(const std::string & query_id) { query_executor.setQueryId(query_id); } + + /// Specify how we allocate connections on a shard. + void setPoolMode(PoolMode pool_mode) { query_executor.setPoolMode(pool_mode); } + + void setMainTable(StorageID main_table_) { query_executor.setMainTable(std::move(main_table_)); } + + /// Prevent default progress notification because progress' callback is called by its own. + void progress(const Progress & /*value*/) override {} + + void cancel(bool kill) override; + + String getName() const override { return "Remote"; } + + Block getHeader() const override { return query_executor.getHeader(); } + Block getTotals() override { return query_executor.getTotals(); } + Block getExtremes() override { return query_executor.getExtremes(); } + +protected: + Block readImpl() override; + void readSuffixImpl() override; + +private: + RemoteQueryExecutor query_executor; + Poco::Logger * log = &Poco::Logger::get("RemoteBlockInputStream"); + + void init(); +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp index f5ed1de947..a64c4409cc 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp @@ -1,22 +1,22 @@ #include <DataStreams/ConnectionCollector.h> -#include <DataStreams/RemoteQueryExecutor.h> +#include <DataStreams/RemoteQueryExecutor.h> #include <DataStreams/RemoteQueryExecutorReadContext.h> - -#include <Columns/ColumnConst.h> -#include <Common/CurrentThread.h> -#include <Processors/Pipe.h> -#include <Processors/Sources/SourceFromSingleChunk.h> -#include <Storages/IStorage.h> + +#include <Columns/ColumnConst.h> +#include <Common/CurrentThread.h> +#include <Processors/Pipe.h> +#include <Processors/Sources/SourceFromSingleChunk.h> +#include <Storages/IStorage.h> #include <Storages/SelectQueryInfo.h> -#include <Interpreters/castColumn.h> -#include <Interpreters/Cluster.h> +#include <Interpreters/castColumn.h> +#include <Interpreters/Cluster.h> #include <Interpreters/Context.h> -#include <Interpreters/InternalTextLogsQueue.h> +#include <Interpreters/InternalTextLogsQueue.h> #include <IO/ConnectionTimeoutsContext.h> #include <Client/MultiplexedConnections.h> #include <Client/HedgedConnections.h> #include <Storages/MergeTree/MergeTreeDataPartUUID.h> - + namespace CurrentMetrics { @@ -24,17 +24,17 @@ namespace CurrentMetrics extern const Metric ActiveSyncDrainedConnections; } -namespace DB -{ - -namespace ErrorCodes -{ +namespace DB +{ + +namespace ErrorCodes +{ extern const int LOGICAL_ERROR; - extern const int UNKNOWN_PACKET_FROM_SERVER; + extern const int UNKNOWN_PACKET_FROM_SERVER; extern const int DUPLICATED_PART_UUIDS; -} - -RemoteQueryExecutor::RemoteQueryExecutor( +} + +RemoteQueryExecutor::RemoteQueryExecutor( const String & query_, const Block & header_, ContextPtr context_, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_) @@ -43,19 +43,19 @@ RemoteQueryExecutor::RemoteQueryExecutor( {} RemoteQueryExecutor::RemoteQueryExecutor( - Connection & connection, + Connection & connection, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_) : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_) -{ +{ create_connections = [this, &connection, throttler]() - { + { return std::make_shared<MultiplexedConnections>(connection, context->getSettingsRef(), throttler); - }; -} - -RemoteQueryExecutor::RemoteQueryExecutor( + }; +} + +RemoteQueryExecutor::RemoteQueryExecutor( std::shared_ptr<Connection> connection_ptr, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_, @@ -76,24 +76,24 @@ RemoteQueryExecutor::RemoteQueryExecutor( QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_) : header(header_), query(query_), context(context_) , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_) -{ +{ create_connections = [this, connections_, throttler]() mutable { return std::make_shared<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler); - }; -} - -RemoteQueryExecutor::RemoteQueryExecutor( + }; +} + +RemoteQueryExecutor::RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool_, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_) : header(header_), query(query_), context(context_) , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_) -{ +{ create_connections = [this, throttler]()->std::shared_ptr<IConnections> - { + { const Settings & current_settings = context->getSettingsRef(); - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); #if defined(OS_LINUX) if (current_settings.use_hedged_requests) @@ -107,90 +107,90 @@ RemoteQueryExecutor::RemoteQueryExecutor( #endif std::vector<IConnectionPool::Entry> connection_entries; - if (main_table) - { - auto try_results = pool->getManyChecked(timeouts, ¤t_settings, pool_mode, main_table.getQualifiedName()); + if (main_table) + { + auto try_results = pool->getManyChecked(timeouts, ¤t_settings, pool_mode, main_table.getQualifiedName()); connection_entries.reserve(try_results.size()); - for (auto & try_result : try_results) + for (auto & try_result : try_results) connection_entries.emplace_back(std::move(try_result.entry)); - } - else + } + else connection_entries = pool->getMany(timeouts, ¤t_settings, pool_mode); - + return std::make_shared<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler); - }; -} - -RemoteQueryExecutor::~RemoteQueryExecutor() -{ - /** If interrupted in the middle of the loop of communication with replicas, then interrupt - * all connections, then read and skip the remaining packets to make sure - * these connections did not remain hanging in the out-of-sync state. - */ - if (established || isQueryPending()) + }; +} + +RemoteQueryExecutor::~RemoteQueryExecutor() +{ + /** If interrupted in the middle of the loop of communication with replicas, then interrupt + * all connections, then read and skip the remaining packets to make sure + * these connections did not remain hanging in the out-of-sync state. + */ + if (established || isQueryPending()) connections->disconnect(); -} - -/** If we receive a block with slightly different column types, or with excessive columns, - * we will adapt it to expected structure. - */ -static Block adaptBlockStructure(const Block & block, const Block & header) -{ - /// Special case when reader doesn't care about result structure. Deprecated and used only in Benchmark, PerformanceTest. - if (!header) - return block; - - Block res; - res.info = block.info; - - for (const auto & elem : header) - { - ColumnPtr column; - - if (elem.column && isColumnConst(*elem.column)) - { - /// We expect constant column in block. - /// If block is not empty, then get value for constant from it, - /// because it may be different for remote server for functions like version(), uptime(), ... - if (block.rows() > 0 && block.has(elem.name)) - { - /// Const column is passed as materialized. Get first value from it. - /// - /// TODO: check that column contains the same value. - /// TODO: serialize const columns. - auto col = block.getByName(elem.name); - col.column = block.getByName(elem.name).column->cut(0, 1); - - column = castColumn(col, elem.type); - - if (!isColumnConst(*column)) - column = ColumnConst::create(column, block.rows()); - else - /// It is not possible now. Just in case we support const columns serialization. - column = column->cloneResized(block.rows()); - } - else - column = elem.column->cloneResized(block.rows()); - } - else - column = castColumn(block.getByName(elem.name), elem.type); - - res.insert({column, elem.type, elem.name}); - } - return res; -} - -void RemoteQueryExecutor::sendQuery() -{ - if (sent_query) - return; - +} + +/** If we receive a block with slightly different column types, or with excessive columns, + * we will adapt it to expected structure. + */ +static Block adaptBlockStructure(const Block & block, const Block & header) +{ + /// Special case when reader doesn't care about result structure. Deprecated and used only in Benchmark, PerformanceTest. + if (!header) + return block; + + Block res; + res.info = block.info; + + for (const auto & elem : header) + { + ColumnPtr column; + + if (elem.column && isColumnConst(*elem.column)) + { + /// We expect constant column in block. + /// If block is not empty, then get value for constant from it, + /// because it may be different for remote server for functions like version(), uptime(), ... + if (block.rows() > 0 && block.has(elem.name)) + { + /// Const column is passed as materialized. Get first value from it. + /// + /// TODO: check that column contains the same value. + /// TODO: serialize const columns. + auto col = block.getByName(elem.name); + col.column = block.getByName(elem.name).column->cut(0, 1); + + column = castColumn(col, elem.type); + + if (!isColumnConst(*column)) + column = ColumnConst::create(column, block.rows()); + else + /// It is not possible now. Just in case we support const columns serialization. + column = column->cloneResized(block.rows()); + } + else + column = elem.column->cloneResized(block.rows()); + } + else + column = castColumn(block.getByName(elem.name), elem.type); + + res.insert({column, elem.type, elem.name}); + } + return res; +} + +void RemoteQueryExecutor::sendQuery() +{ + if (sent_query) + return; + connections = create_connections(); - + const auto & settings = context->getSettingsRef(); if (settings.skip_unavailable_shards && 0 == connections->size()) - return; - + return; + /// Query cannot be canceled in the middle of the send query, /// since there are multiple packets: /// - Query @@ -202,17 +202,17 @@ void RemoteQueryExecutor::sendQuery() /// std::lock_guard guard(was_cancelled_mutex); - established = true; + established = true; was_cancelled = false; - - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); + + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); ClientInfo modified_client_info = context->getClientInfo(); - modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; + modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; if (CurrentThread::isInitialized()) { modified_client_info.client_trace_context = CurrentThread::get().thread_trace_context; } - + { std::lock_guard lock(duplicated_part_uuids_mutex); if (!duplicated_part_uuids.empty()) @@ -220,32 +220,32 @@ void RemoteQueryExecutor::sendQuery() } connections->sendQuery(timeouts, query, query_id, stage, modified_client_info, true); - - established = false; - sent_query = true; - - if (settings.enable_scalar_subquery_optimization) - sendScalars(); - sendExternalTables(); -} - -Block RemoteQueryExecutor::read() -{ - if (!sent_query) - { - sendQuery(); - + + established = false; + sent_query = true; + + if (settings.enable_scalar_subquery_optimization) + sendScalars(); + sendExternalTables(); +} + +Block RemoteQueryExecutor::read() +{ + if (!sent_query) + { + sendQuery(); + if (context->getSettingsRef().skip_unavailable_shards && (0 == connections->size())) - return {}; - } - - while (true) - { - if (was_cancelled) - return Block(); - + return {}; + } + + while (true) + { + if (was_cancelled) + return Block(); + Packet packet = connections->receivePacket(); - + if (auto block = processPacket(std::move(packet))) return *block; else if (got_duplicated_part_uuids) @@ -280,7 +280,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext> return Block(); if (read_context->is_read_in_progress.load(std::memory_order_relaxed)) - { + { read_context->setTimer(); return read_context->epoll.getFileDescriptor(); } @@ -297,7 +297,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext> return read(); #endif } - + std::variant<Block, int> RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context) { @@ -339,12 +339,12 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet) if (packet.block && (packet.block.rows() > 0)) return adaptBlockStructure(packet.block, header); break; /// If the block is empty - we will receive other packets before EndOfStream. - + case Protocol::Server::Exception: got_exception_from_replica = true; packet.exception->rethrow(); break; - + case Protocol::Server::EndOfStream: if (!connections->hasActiveConnections()) { @@ -352,7 +352,7 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet) return Block(); } break; - + case Protocol::Server::Progress: /** We use the progress from a remote server. * We also include in ProcessList, @@ -363,21 +363,21 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet) if (progress_callback) progress_callback(packet.progress); break; - + case Protocol::Server::ProfileInfo: /// Use own (client-side) info about read bytes, it is more correct info than server-side one. if (profile_info_callback) profile_info_callback(packet.profile_info); break; - + case Protocol::Server::Totals: totals = packet.block; break; - + case Protocol::Server::Extremes: extremes = packet.block; break; - + case Protocol::Server::Log: /// Pass logs from remote server to client if (auto log_queue = CurrentThread::getInternalTextLogsQueue()) @@ -389,11 +389,11 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet) throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}", toString(packet.type), connections->dumpAddresses()); - } + } return {}; -} - +} + bool RemoteQueryExecutor::setPartUUIDs(const std::vector<UUID> & uuids) { auto query_context = context->getQueryContext(); @@ -417,82 +417,82 @@ void RemoteQueryExecutor::processReadTaskRequest() } void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context) -{ - /** If one of: - * - nothing started to do; - * - received all packets before EndOfStream; - * - received exception from one replica; - * - received an unknown packet from one replica; - * then you do not need to read anything. - */ - if (!isQueryPending() || hasThrownException()) - return; - - /** If you have not read all the data yet, but they are no longer needed. - * This may be due to the fact that the data is sufficient (for example, when using LIMIT). - */ - - /// Send the request to abort the execution of the request, if not already sent. +{ + /** If one of: + * - nothing started to do; + * - received all packets before EndOfStream; + * - received exception from one replica; + * - received an unknown packet from one replica; + * then you do not need to read anything. + */ + if (!isQueryPending() || hasThrownException()) + return; + + /** If you have not read all the data yet, but they are no longer needed. + * This may be due to the fact that the data is sufficient (for example, when using LIMIT). + */ + + /// Send the request to abort the execution of the request, if not already sent. tryCancel("Cancelling query because enough data has been read", read_context); /// Try to drain connections asynchronously. if (auto conn = ConnectionCollector::enqueueConnectionCleanup(pool, connections)) - { + { /// Drain connections synchronously. CurrentMetrics::Increment metric_increment(CurrentMetrics::ActiveSyncDrainedConnections); ConnectionCollector::drainConnections(*conn); CurrentMetrics::add(CurrentMetrics::SyncDrainedConnections, 1); - } + } finished = true; -} - +} + void RemoteQueryExecutor::cancel(std::unique_ptr<ReadContext> * read_context) -{ - { - std::lock_guard lock(external_tables_mutex); - - /// Stop sending external data. - for (auto & vec : external_tables_data) - for (auto & elem : vec) - elem->is_cancelled = true; - } - - if (!isQueryPending() || hasThrownException()) - return; - +{ + { + std::lock_guard lock(external_tables_mutex); + + /// Stop sending external data. + for (auto & vec : external_tables_data) + for (auto & elem : vec) + elem->is_cancelled = true; + } + + if (!isQueryPending() || hasThrownException()) + return; + tryCancel("Cancelling query", read_context); -} - -void RemoteQueryExecutor::sendScalars() -{ +} + +void RemoteQueryExecutor::sendScalars() +{ connections->sendScalarsData(scalars); -} - -void RemoteQueryExecutor::sendExternalTables() -{ +} + +void RemoteQueryExecutor::sendExternalTables() +{ size_t count = connections->size(); - - { - std::lock_guard lock(external_tables_mutex); - + + { + std::lock_guard lock(external_tables_mutex); + external_tables_data.clear(); - external_tables_data.reserve(count); - - for (size_t i = 0; i < count; ++i) - { - ExternalTablesData res; - for (const auto & table : external_tables) - { - StoragePtr cur = table.second; - - auto data = std::make_unique<ExternalTableData>(); - data->table_name = table.first; + external_tables_data.reserve(count); + + for (size_t i = 0; i < count; ++i) + { + ExternalTablesData res; + for (const auto & table : external_tables) + { + StoragePtr cur = table.second; + + auto data = std::make_unique<ExternalTableData>(); + data->table_name = table.first; data->creating_pipe_callback = [cur, context = this->context]() { SelectQueryInfo query_info; auto metadata_snapshot = cur->getInMemoryMetadataPtr(); QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage( context, QueryProcessingStage::Complete, metadata_snapshot, query_info); - + Pipe pipe = cur->read( metadata_snapshot->getColumns().getNamesOfPhysical(), metadata_snapshot, query_info, context, @@ -501,28 +501,28 @@ void RemoteQueryExecutor::sendExternalTables() if (pipe.empty()) return std::make_unique<Pipe>( std::make_shared<SourceFromSingleChunk>(metadata_snapshot->getSampleBlock(), Chunk())); - + return std::make_unique<Pipe>(std::move(pipe)); }; data->pipe = data->creating_pipe_callback(); - res.emplace_back(std::move(data)); - } - external_tables_data.push_back(std::move(res)); - } - } - + res.emplace_back(std::move(data)); + } + external_tables_data.push_back(std::move(res)); + } + } + connections->sendExternalTablesData(external_tables_data); -} - +} + void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context) -{ +{ /// Flag was_cancelled is atomic because it is checked in read(). std::lock_guard guard(was_cancelled_mutex); - + if (was_cancelled) return; - + was_cancelled = true; if (read_context && *read_context) @@ -538,21 +538,21 @@ void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadCon (*read_context)->setTimer(); (*read_context)->cancel(); } - + connections->sendCancel(); - if (log) + if (log) LOG_TRACE(log, "({}) {}", connections->dumpAddresses(), reason); -} - -bool RemoteQueryExecutor::isQueryPending() const -{ - return sent_query && !finished; -} - -bool RemoteQueryExecutor::hasThrownException() const -{ - return got_exception_from_replica || got_unknown_packet_from_replica; -} - -} +} + +bool RemoteQueryExecutor::isQueryPending() const +{ + return sent_query && !finished; +} + +bool RemoteQueryExecutor::hasThrownException() const +{ + return got_exception_from_replica || got_unknown_packet_from_replica; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h index 56d99e230e..d82f998389 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h @@ -1,6 +1,6 @@ -#pragma once - -#include <Client/ConnectionPool.h> +#pragma once + +#include <Client/ConnectionPool.h> #include <Client/IConnections.h> #include <Client/ConnectionPoolWithFailover.h> #include <Storages/IStorage_fwd.h> @@ -8,41 +8,41 @@ #include <Interpreters/StorageID.h> #include <Common/TimerDescriptor.h> #include <variant> - -namespace DB -{ - + +namespace DB +{ + class Context; -class Throttler; -using ThrottlerPtr = std::shared_ptr<Throttler>; - -struct Progress; -using ProgressCallback = std::function<void(const Progress & progress)>; - -struct BlockStreamProfileInfo; -using ProfileInfoCallback = std::function<void(const BlockStreamProfileInfo & info)>; - +class Throttler; +using ThrottlerPtr = std::shared_ptr<Throttler>; + +struct Progress; +using ProgressCallback = std::function<void(const Progress & progress)>; + +struct BlockStreamProfileInfo; +using ProfileInfoCallback = std::function<void(const BlockStreamProfileInfo & info)>; + class RemoteQueryExecutorReadContext; /// This is the same type as StorageS3Source::IteratorWrapper using TaskIterator = std::function<String()>; -/// This class allows one to launch queries on remote replicas of one shard and get results -class RemoteQueryExecutor -{ -public: +/// This class allows one to launch queries on remote replicas of one shard and get results +class RemoteQueryExecutor +{ +public: using ReadContext = RemoteQueryExecutorReadContext; - /// Takes already set connection. + /// Takes already set connection. /// We don't own connection, thus we have to drain it synchronously. - RemoteQueryExecutor( - Connection & connection, + RemoteQueryExecutor( + Connection & connection, const String & query_, const Block & header_, ContextPtr context_, - ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), + ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {}); - + /// Takes already set connection. RemoteQueryExecutor( std::shared_ptr<Connection> connection, @@ -50,134 +50,134 @@ public: ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {}); - /// Accepts several connections already taken from pool. - RemoteQueryExecutor( + /// Accepts several connections already taken from pool. + RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, std::vector<IConnectionPool::Entry> && connections_, const String & query_, const Block & header_, ContextPtr context_, - const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), + const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {}); - - /// Takes a pool and gets one or several connections from it. - RemoteQueryExecutor( - const ConnectionPoolWithFailoverPtr & pool, + + /// Takes a pool and gets one or several connections from it. + RemoteQueryExecutor( + const ConnectionPoolWithFailoverPtr & pool, const String & query_, const Block & header_, ContextPtr context_, - const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), + const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {}); - - ~RemoteQueryExecutor(); - - /// Create connection and send query, external tables and scalars. - void sendQuery(); - + + ~RemoteQueryExecutor(); + + /// Create connection and send query, external tables and scalars. + void sendQuery(); + /// Query is resent to a replica, the query itself can be modified. std::atomic<bool> resent_query { false }; - /// Read next block of data. Returns empty block if query is finished. - Block read(); - + /// Read next block of data. Returns empty block if query is finished. + Block read(); + /// Async variant of read. Returns ready block or file descriptor which may be used for polling. /// ReadContext is an internal read state. Pass empty ptr first time, reuse created one for every call. std::variant<Block, int> read(std::unique_ptr<ReadContext> & read_context); - /// Receive all remain packets and finish query. - /// It should be cancelled after read returned empty block. + /// Receive all remain packets and finish query. + /// It should be cancelled after read returned empty block. void finish(std::unique_ptr<ReadContext> * read_context = nullptr); - - /// Cancel query execution. Sends Cancel packet and ignore others. - /// This method may be called from separate thread. + + /// Cancel query execution. Sends Cancel packet and ignore others. + /// This method may be called from separate thread. void cancel(std::unique_ptr<ReadContext> * read_context = nullptr); - - /// Get totals and extremes if any. - Block getTotals() { return std::move(totals); } - Block getExtremes() { return std::move(extremes); } - - /// Set callback for progress. It will be called on Progress packet. - void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); } - - /// Set callback for profile info. It will be called on ProfileInfo packet. - void setProfileInfoCallback(ProfileInfoCallback callback) { profile_info_callback = std::move(callback); } - - /// Set the query_id. For now, used by performance test to later find the query - /// in the server query_log. Must be called before sending the query to the server. - void setQueryId(const std::string& query_id_) { assert(!sent_query); query_id = query_id_; } - - /// Specify how we allocate connections on a shard. - void setPoolMode(PoolMode pool_mode_) { pool_mode = pool_mode_; } - - void setMainTable(StorageID main_table_) { main_table = std::move(main_table_); } - - void setLogger(Poco::Logger * logger) { log = logger; } - - const Block & getHeader() const { return header; } - -private: + + /// Get totals and extremes if any. + Block getTotals() { return std::move(totals); } + Block getExtremes() { return std::move(extremes); } + + /// Set callback for progress. It will be called on Progress packet. + void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); } + + /// Set callback for profile info. It will be called on ProfileInfo packet. + void setProfileInfoCallback(ProfileInfoCallback callback) { profile_info_callback = std::move(callback); } + + /// Set the query_id. For now, used by performance test to later find the query + /// in the server query_log. Must be called before sending the query to the server. + void setQueryId(const std::string& query_id_) { assert(!sent_query); query_id = query_id_; } + + /// Specify how we allocate connections on a shard. + void setPoolMode(PoolMode pool_mode_) { pool_mode = pool_mode_; } + + void setMainTable(StorageID main_table_) { main_table = std::move(main_table_); } + + void setLogger(Poco::Logger * logger) { log = logger; } + + const Block & getHeader() const { return header; } + +private: RemoteQueryExecutor( const String & query_, const Block & header_, ContextPtr context_, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_); - Block header; - Block totals; - Block extremes; - - const String query; + Block header; + Block totals; + Block extremes; + + const String query; String query_id; ContextPtr context; - - ProgressCallback progress_callback; - ProfileInfoCallback profile_info_callback; - - /// Scalars needed to be sent to remote servers - Scalars scalars; - /// Temporary tables needed to be sent to remote servers - Tables external_tables; - QueryProcessingStage::Enum stage; + + ProgressCallback progress_callback; + ProfileInfoCallback profile_info_callback; + + /// Scalars needed to be sent to remote servers + Scalars scalars; + /// Temporary tables needed to be sent to remote servers + Tables external_tables; + QueryProcessingStage::Enum stage; /// Initiator identifier for distributed task processing std::shared_ptr<TaskIterator> task_iterator; - + std::function<std::shared_ptr<IConnections>()> create_connections; /// Hold a shared reference to the connection pool so that asynchronous connection draining will /// work safely. Make sure it's the first member so that we don't destruct it too early. const ConnectionPoolWithFailoverPtr pool; std::shared_ptr<IConnections> connections; - /// Streams for reading from temporary tables and following sending of data - /// to remote servers for GLOBAL-subqueries - std::vector<ExternalTablesData> external_tables_data; - std::mutex external_tables_mutex; - - /// Connections to replicas are established, but no queries are sent yet - std::atomic<bool> established { false }; - - /// Query is sent (used before getting first block) - std::atomic<bool> sent_query { false }; - - /** All data from all replicas are received, before EndOfStream packet. - * To prevent desynchronization, if not all data is read before object - * destruction, it's required to send cancel query request to replicas and - * read all packets before EndOfStream - */ - std::atomic<bool> finished { false }; - - /** Cancel query request was sent to all replicas because data is not needed anymore - * This behaviour may occur when: - * - data size is already satisfactory (when using LIMIT, for example) - * - an exception was thrown from client side - */ - std::atomic<bool> was_cancelled { false }; - std::mutex was_cancelled_mutex; - - /** An exception from replica was received. No need in receiving more packets or - * requesting to cancel query execution - */ - std::atomic<bool> got_exception_from_replica { false }; - - /** Unknown packet was received from replica. No need in receiving more packets or - * requesting to cancel query execution - */ - std::atomic<bool> got_unknown_packet_from_replica { false }; - + /// Streams for reading from temporary tables and following sending of data + /// to remote servers for GLOBAL-subqueries + std::vector<ExternalTablesData> external_tables_data; + std::mutex external_tables_mutex; + + /// Connections to replicas are established, but no queries are sent yet + std::atomic<bool> established { false }; + + /// Query is sent (used before getting first block) + std::atomic<bool> sent_query { false }; + + /** All data from all replicas are received, before EndOfStream packet. + * To prevent desynchronization, if not all data is read before object + * destruction, it's required to send cancel query request to replicas and + * read all packets before EndOfStream + */ + std::atomic<bool> finished { false }; + + /** Cancel query request was sent to all replicas because data is not needed anymore + * This behaviour may occur when: + * - data size is already satisfactory (when using LIMIT, for example) + * - an exception was thrown from client side + */ + std::atomic<bool> was_cancelled { false }; + std::mutex was_cancelled_mutex; + + /** An exception from replica was received. No need in receiving more packets or + * requesting to cancel query execution + */ + std::atomic<bool> got_exception_from_replica { false }; + + /** Unknown packet was received from replica. No need in receiving more packets or + * requesting to cancel query execution + */ + std::atomic<bool> got_unknown_packet_from_replica { false }; + /** Got duplicated uuids from replica */ std::atomic<bool> got_duplicated_part_uuids{ false }; @@ -186,17 +186,17 @@ private: std::mutex duplicated_part_uuids_mutex; std::vector<UUID> duplicated_part_uuids; - PoolMode pool_mode = PoolMode::GET_MANY; - StorageID main_table = StorageID::createEmpty(); - - Poco::Logger * log = nullptr; - - /// Send all scalars to remote servers - void sendScalars(); - - /// Send all temporary tables to remote servers - void sendExternalTables(); - + PoolMode pool_mode = PoolMode::GET_MANY; + StorageID main_table = StorageID::createEmpty(); + + Poco::Logger * log = nullptr; + + /// Send all scalars to remote servers + void sendScalars(); + + /// Send all temporary tables to remote servers + void sendExternalTables(); + /// Set part uuids to a query context, collected from remote replicas. /// Return true if duplicates found. bool setPartUUIDs(const std::vector<UUID> & uuids); @@ -207,14 +207,14 @@ private: /// only for `allow_experimental_query_deduplication`. std::variant<Block, int> restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context = nullptr); - /// If wasn't sent yet, send request to cancel all connections to replicas + /// If wasn't sent yet, send request to cancel all connections to replicas void tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context); - - /// Returns true if query was sent - bool isQueryPending() const; - - /// Returns true if exception was thrown - bool hasThrownException() const; + + /// Returns true if query was sent + bool isQueryPending() const; + + /// Returns true if exception was thrown + bool hasThrownException() const; /// Process packet for read and return data block if possible. std::optional<Block> processPacket(Packet packet); @@ -222,6 +222,6 @@ private: /// Reads packet by packet Block readPackets(); -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp index fd8a6f54a3..f7d05fa3be 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp @@ -1,43 +1,43 @@ #include <DataTypes/DataTypeCustomGeo.h> -#include <DataTypes/DataTypeArray.h> -#include <DataTypes/DataTypeCustom.h> -#include <DataTypes/DataTypeFactory.h> -#include <DataTypes/DataTypeTuple.h> -#include <DataTypes/DataTypesNumber.h> - -namespace DB -{ - -void registerDataTypeDomainGeo(DataTypeFactory & factory) -{ - // Custom type for point represented as its coordinates stored as Tuple(Float64, Float64) - factory.registerSimpleDataTypeCustom("Point", [] - { - return std::make_pair(DataTypeFactory::instance().get("Tuple(Float64, Float64)"), +#include <DataTypes/DataTypeArray.h> +#include <DataTypes/DataTypeCustom.h> +#include <DataTypes/DataTypeFactory.h> +#include <DataTypes/DataTypeTuple.h> +#include <DataTypes/DataTypesNumber.h> + +namespace DB +{ + +void registerDataTypeDomainGeo(DataTypeFactory & factory) +{ + // Custom type for point represented as its coordinates stored as Tuple(Float64, Float64) + factory.registerSimpleDataTypeCustom("Point", [] + { + return std::make_pair(DataTypeFactory::instance().get("Tuple(Float64, Float64)"), std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePointName>())); - }); - - // Custom type for simple polygon without holes stored as Array(Point) - factory.registerSimpleDataTypeCustom("Ring", [] - { - return std::make_pair(DataTypeFactory::instance().get("Array(Point)"), + }); + + // Custom type for simple polygon without holes stored as Array(Point) + factory.registerSimpleDataTypeCustom("Ring", [] + { + return std::make_pair(DataTypeFactory::instance().get("Array(Point)"), std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeRingName>())); - }); - - // Custom type for polygon with holes stored as Array(Ring) - // First element of outer array is outer shape of polygon and all the following are holes - factory.registerSimpleDataTypeCustom("Polygon", [] - { - return std::make_pair(DataTypeFactory::instance().get("Array(Ring)"), + }); + + // Custom type for polygon with holes stored as Array(Ring) + // First element of outer array is outer shape of polygon and all the following are holes + factory.registerSimpleDataTypeCustom("Polygon", [] + { + return std::make_pair(DataTypeFactory::instance().get("Array(Ring)"), std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePolygonName>())); - }); - - // Custom type for multiple polygons with holes stored as Array(Polygon) - factory.registerSimpleDataTypeCustom("MultiPolygon", [] - { - return std::make_pair(DataTypeFactory::instance().get("Array(Polygon)"), + }); + + // Custom type for multiple polygons with holes stored as Array(Polygon) + factory.registerSimpleDataTypeCustom("MultiPolygon", [] + { + return std::make_pair(DataTypeFactory::instance().get("Array(Polygon)"), std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeMultiPolygonName>())); - }); -} - -} + }); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp index 85204eb05d..808aa43528 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp @@ -1,29 +1,29 @@ #include <DataTypes/Serializations/SerializationIP.h> -#include <DataTypes/DataTypeFactory.h> -#include <DataTypes/DataTypeCustom.h> - -namespace DB -{ - -void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory) -{ - factory.registerSimpleDataTypeCustom("IPv4", [] - { +#include <DataTypes/DataTypeFactory.h> +#include <DataTypes/DataTypeCustom.h> + +namespace DB +{ + +void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory) +{ + factory.registerSimpleDataTypeCustom("IPv4", [] + { auto type = DataTypeFactory::instance().get("UInt32"); return std::make_pair(type, std::make_unique<DataTypeCustomDesc>( std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<SerializationIPv4>(type->getDefaultSerialization()))); - }); - - factory.registerSimpleDataTypeCustom("IPv6", [] - { + }); + + factory.registerSimpleDataTypeCustom("IPv6", [] + { auto type = DataTypeFactory::instance().get("FixedString(16)"); return std::make_pair(type, std::make_unique<DataTypeCustomDesc>( std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<SerializationIPv6>(type->getDefaultSerialization()))); - }); - - /// MySQL, MariaDB - factory.registerAlias("INET4", "IPv4", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INET6", "IPv6", DataTypeFactory::CaseInsensitive); -} - -} + }); + + /// MySQL, MariaDB + factory.registerAlias("INET4", "IPv4", DataTypeFactory::CaseInsensitive); + factory.registerAlias("INET6", "IPv6", DataTypeFactory::CaseInsensitive); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index f1ee5c670f..023629fc69 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -1,38 +1,38 @@ #include <Common/FieldVisitorToString.h> -#include <Common/typeid_cast.h> - -#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h> -#include <DataTypes/DataTypeLowCardinality.h> -#include <DataTypes/DataTypeTuple.h> -#include <DataTypes/DataTypeFactory.h> - -#include <AggregateFunctions/AggregateFunctionFactory.h> -#include <Parsers/ASTFunction.h> -#include <Parsers/ASTLiteral.h> -#include <Parsers/ASTIdentifier.h> - -#include <boost/algorithm/string/join.hpp> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int SYNTAX_ERROR; - extern const int BAD_ARGUMENTS; - extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; -} - +#include <Common/typeid_cast.h> + +#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h> +#include <DataTypes/DataTypeLowCardinality.h> +#include <DataTypes/DataTypeTuple.h> +#include <DataTypes/DataTypeFactory.h> + +#include <AggregateFunctions/AggregateFunctionFactory.h> +#include <Parsers/ASTFunction.h> +#include <Parsers/ASTLiteral.h> +#include <Parsers/ASTIdentifier.h> + +#include <boost/algorithm/string/join.hpp> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; + extern const int BAD_ARGUMENTS; + extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; +} + void DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(const AggregateFunctionPtr & function) { /// TODO Make it sane. static const std::vector<String> supported_functions{"any", "anyLast", "min", "max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor", "sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray"}; - + // check function if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions)) { @@ -40,54 +40,54 @@ void DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(const Aggreg ErrorCodes::BAD_ARGUMENTS); } } - -String DataTypeCustomSimpleAggregateFunction::getName() const -{ + +String DataTypeCustomSimpleAggregateFunction::getName() const +{ WriteBufferFromOwnString stream; - stream << "SimpleAggregateFunction(" << function->getName(); - - if (!parameters.empty()) - { - stream << "("; - for (size_t i = 0; i < parameters.size(); ++i) - { - if (i) - stream << ", "; + stream << "SimpleAggregateFunction(" << function->getName(); + + if (!parameters.empty()) + { + stream << "("; + for (size_t i = 0; i < parameters.size(); ++i) + { + if (i) + stream << ", "; stream << applyVisitor(FieldVisitorToString(), parameters[i]); - } - stream << ")"; - } - - for (const auto & argument_type : argument_types) - stream << ", " << argument_type->getName(); - - stream << ")"; - return stream.str(); -} - - -static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments) -{ - String function_name; - AggregateFunctionPtr function; - DataTypes argument_types; - Array params_row; - - if (!arguments || arguments->children.empty()) - throw Exception("Data type SimpleAggregateFunction requires parameters: " - "name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (const ASTFunction * parametric = arguments->children[0]->as<ASTFunction>()) - { - if (parametric->parameters) - throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR); - function_name = parametric->name; - + } + stream << ")"; + } + + for (const auto & argument_type : argument_types) + stream << ", " << argument_type->getName(); + + stream << ")"; + return stream.str(); +} + + +static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments) +{ + String function_name; + AggregateFunctionPtr function; + DataTypes argument_types; + Array params_row; + + if (!arguments || arguments->children.empty()) + throw Exception("Data type SimpleAggregateFunction requires parameters: " + "name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (const ASTFunction * parametric = arguments->children[0]->as<ASTFunction>()) + { + if (parametric->parameters) + throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR); + function_name = parametric->name; + if (parametric->arguments) - { + { const ASTs & parameters = parametric->arguments->as<ASTExpressionList &>().children; params_row.resize(parameters.size()); - + for (size_t i = 0; i < parameters.size(); ++i) { const ASTLiteral * lit = parameters[i]->as<ASTLiteral>(); @@ -101,48 +101,48 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum params_row[i] = lit->value; } - } - } - else if (auto opt_name = tryGetIdentifierName(arguments->children[0])) - { - function_name = *opt_name; - } - else if (arguments->children[0]->as<ASTLiteral>()) - { - throw Exception("Aggregate function name for data type SimpleAggregateFunction must be passed as identifier (without quotes) or function", - ErrorCodes::BAD_ARGUMENTS); - } - else - throw Exception("Unexpected AST element passed as aggregate function name for data type SimpleAggregateFunction. Must be identifier or function.", - ErrorCodes::BAD_ARGUMENTS); - - for (size_t i = 1; i < arguments->children.size(); ++i) - argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i])); - - if (function_name.empty()) - throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR); - - AggregateFunctionProperties properties; - function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties); - + } + } + else if (auto opt_name = tryGetIdentifierName(arguments->children[0])) + { + function_name = *opt_name; + } + else if (arguments->children[0]->as<ASTLiteral>()) + { + throw Exception("Aggregate function name for data type SimpleAggregateFunction must be passed as identifier (without quotes) or function", + ErrorCodes::BAD_ARGUMENTS); + } + else + throw Exception("Unexpected AST element passed as aggregate function name for data type SimpleAggregateFunction. Must be identifier or function.", + ErrorCodes::BAD_ARGUMENTS); + + for (size_t i = 1; i < arguments->children.size(); ++i) + argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i])); + + if (function_name.empty()) + throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR); + + AggregateFunctionProperties properties; + function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties); + DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(function); - - DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName()); - - if (!function->getReturnType()->equals(*removeLowCardinality(storage_type))) - { - throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(), - ErrorCodes::BAD_ARGUMENTS); - } - - DataTypeCustomNamePtr custom_name = std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, argument_types, params_row); - - return std::make_pair(storage_type, std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr)); -} - -void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory) -{ - factory.registerDataTypeCustom("SimpleAggregateFunction", create); -} - -} + + DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName()); + + if (!function->getReturnType()->equals(*removeLowCardinality(storage_type))) + { + throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(), + ErrorCodes::BAD_ARGUMENTS); + } + + DataTypeCustomNamePtr custom_name = std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, argument_types, params_row); + + return std::make_pair(storage_type, std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr)); +} + +void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory) +{ + factory.registerDataTypeCustom("SimpleAggregateFunction", create); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h index bd153a4f59..dc054144e1 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h @@ -1,42 +1,42 @@ -#pragma once - -#include <DataTypes/DataTypeCustom.h> -#include <AggregateFunctions/IAggregateFunction.h> - -#include <IO/ReadHelpers.h> - -namespace DB -{ - -/** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard - * data type but when rows are merged, an aggregation function is applied. - * - * The aggregation function is limited to simple functions whose merge state is the final result: - * any, anyLast, min, max, sum - * - * Examples: - * - * SimpleAggregateFunction(sum, Nullable(Float64)) - * SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) - * SimpleAggregateFunction(anyLast, IPv4) - * +#pragma once + +#include <DataTypes/DataTypeCustom.h> +#include <AggregateFunctions/IAggregateFunction.h> + +#include <IO/ReadHelpers.h> + +namespace DB +{ + +/** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard + * data type but when rows are merged, an aggregation function is applied. + * + * The aggregation function is limited to simple functions whose merge state is the final result: + * any, anyLast, min, max, sum + * + * Examples: + * + * SimpleAggregateFunction(sum, Nullable(Float64)) + * SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) + * SimpleAggregateFunction(anyLast, IPv4) + * * Technically, a standard IDataType is instantiated and customized with IDataTypeCustomName and DataTypeCustomDesc. - */ - -class DataTypeCustomSimpleAggregateFunction : public IDataTypeCustomName -{ -private: - const AggregateFunctionPtr function; - const DataTypes argument_types; - const Array parameters; - -public: - DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_) - : function(function_), argument_types(argument_types_), parameters(parameters_) {} - - const AggregateFunctionPtr getFunction() const { return function; } - String getName() const override; + */ + +class DataTypeCustomSimpleAggregateFunction : public IDataTypeCustomName +{ +private: + const AggregateFunctionPtr function; + const DataTypes argument_types; + const Array parameters; + +public: + DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_) + : function(function_), argument_types(argument_types_), parameters(parameters_) {} + + const AggregateFunctionPtr getFunction() const { return function; } + String getName() const override; static void checkSupportedFunctions(const AggregateFunctionPtr & function); -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index 141fa08dc3..41ba81814d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -1,90 +1,90 @@ -#include <Columns/ColumnArray.h> -#include <Columns/ColumnConst.h> -#include <Columns/ColumnTuple.h> +#include <Columns/ColumnArray.h> +#include <Columns/ColumnConst.h> +#include <Columns/ColumnTuple.h> #include <Columns/ColumnMap.h> -#include <Columns/ColumnLowCardinality.h> - -#include <DataTypes/DataTypeLowCardinality.h> -#include <DataTypes/DataTypeArray.h> -#include <DataTypes/DataTypeTuple.h> +#include <Columns/ColumnLowCardinality.h> + +#include <DataTypes/DataTypeLowCardinality.h> +#include <DataTypes/DataTypeArray.h> +#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeMap.h> - -#include <Common/assert_cast.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int TYPE_MISMATCH; -} - -DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) -{ - if (!type) - return type; - - if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get())) - return std::make_shared<DataTypeArray>(recursiveRemoveLowCardinality(array_type->getNestedType())); - - if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get())) - { - DataTypes elements = tuple_type->getElements(); - for (auto & element : elements) - element = recursiveRemoveLowCardinality(element); - - if (tuple_type->haveExplicitNames()) + +#include <Common/assert_cast.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int TYPE_MISMATCH; +} + +DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) +{ + if (!type) + return type; + + if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get())) + return std::make_shared<DataTypeArray>(recursiveRemoveLowCardinality(array_type->getNestedType())); + + if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get())) + { + DataTypes elements = tuple_type->getElements(); + for (auto & element : elements) + element = recursiveRemoveLowCardinality(element); + + if (tuple_type->haveExplicitNames()) return std::make_shared<DataTypeTuple>(elements, tuple_type->getElementNames(), tuple_type->serializeNames()); - else - return std::make_shared<DataTypeTuple>(elements); - } - + else + return std::make_shared<DataTypeTuple>(elements); + } + if (const auto * map_type = typeid_cast<const DataTypeMap *>(type.get())) { return std::make_shared<DataTypeMap>(recursiveRemoveLowCardinality(map_type->getKeyType()), recursiveRemoveLowCardinality(map_type->getValueType())); } - if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get())) - return low_cardinality_type->getDictionaryType(); - - return type; -} - -ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) -{ - if (!column) - return column; - - if (const auto * column_array = typeid_cast<const ColumnArray *>(column.get())) - { - const auto & data = column_array->getDataPtr(); - auto data_no_lc = recursiveRemoveLowCardinality(data); - if (data.get() == data_no_lc.get()) - return column; - - return ColumnArray::create(data_no_lc, column_array->getOffsetsPtr()); - } - - if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get())) - { - const auto & nested = column_const->getDataColumnPtr(); - auto nested_no_lc = recursiveRemoveLowCardinality(nested); - if (nested.get() == nested_no_lc.get()) - return column; - - return ColumnConst::create(nested_no_lc, column_const->size()); - } - - if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get())) - { - auto columns = column_tuple->getColumns(); - for (auto & element : columns) - element = recursiveRemoveLowCardinality(element); - return ColumnTuple::create(columns); - } - + if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get())) + return low_cardinality_type->getDictionaryType(); + + return type; +} + +ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) +{ + if (!column) + return column; + + if (const auto * column_array = typeid_cast<const ColumnArray *>(column.get())) + { + const auto & data = column_array->getDataPtr(); + auto data_no_lc = recursiveRemoveLowCardinality(data); + if (data.get() == data_no_lc.get()) + return column; + + return ColumnArray::create(data_no_lc, column_array->getOffsetsPtr()); + } + + if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get())) + { + const auto & nested = column_const->getDataColumnPtr(); + auto nested_no_lc = recursiveRemoveLowCardinality(nested); + if (nested.get() == nested_no_lc.get()) + return column; + + return ColumnConst::create(nested_no_lc, column_const->size()); + } + + if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get())) + { + auto columns = column_tuple->getColumns(); + for (auto & element : columns) + element = recursiveRemoveLowCardinality(element); + return ColumnTuple::create(columns); + } + if (const auto * column_map = typeid_cast<const ColumnMap *>(column.get())) { const auto & nested = column_map->getNestedColumnPtr(); @@ -95,102 +95,102 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) return ColumnMap::create(nested_no_lc); } - if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get())) - return column_low_cardinality->convertToFullColumn(); - - return column; -} - -ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) -{ - if (!column) - return column; - - if (from_type->equals(*to_type)) - return column; - - /// We can allow insert enum column if it's numeric type is the same as the column's type in table. - if (WhichDataType(to_type).isEnum() && from_type->getTypeId() == to_type->getTypeId()) - return column; - - if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get())) - { - const auto & nested = column_const->getDataColumnPtr(); - auto nested_no_lc = recursiveTypeConversion(nested, from_type, to_type); - if (nested.get() == nested_no_lc.get()) - return column; - - return ColumnConst::create(nested_no_lc, column_const->size()); - } - - if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(from_type.get())) - { - if (to_type->equals(*low_cardinality_type->getDictionaryType())) - return column->convertToFullColumnIfLowCardinality(); - } - - if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(to_type.get())) - { - if (from_type->equals(*low_cardinality_type->getDictionaryType())) - { - auto col = low_cardinality_type->createColumn(); - assert_cast<ColumnLowCardinality &>(*col).insertRangeFromFullColumn(*column, 0, column->size()); - return col; - } - } - - if (const auto * from_array_type = typeid_cast<const DataTypeArray *>(from_type.get())) - { - if (const auto * to_array_type = typeid_cast<const DataTypeArray *>(to_type.get())) - { - const auto * column_array = typeid_cast<const ColumnArray *>(column.get()); - if (!column_array) - throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), - ErrorCodes::ILLEGAL_COLUMN); - - const auto & nested_from = from_array_type->getNestedType(); - const auto & nested_to = to_array_type->getNestedType(); - - return ColumnArray::create( - recursiveTypeConversion(column_array->getDataPtr(), nested_from, nested_to), - column_array->getOffsetsPtr()); - } - } - - if (const auto * from_tuple_type = typeid_cast<const DataTypeTuple *>(from_type.get())) - { - if (const auto * to_tuple_type = typeid_cast<const DataTypeTuple *>(to_type.get())) - { - const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()); - if (!column_tuple) - throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), - ErrorCodes::ILLEGAL_COLUMN); - - auto columns = column_tuple->getColumns(); - const auto & from_elements = from_tuple_type->getElements(); - const auto & to_elements = to_tuple_type->getElements(); - - bool has_converted = false; - - for (size_t i = 0; i < columns.size(); ++i) - { - auto & element = columns[i]; - auto element_no_lc = recursiveTypeConversion(element, from_elements.at(i), to_elements.at(i)); - if (element.get() != element_no_lc.get()) - { - element = element_no_lc; - has_converted = true; - } - } - - if (!has_converted) - return column; - - return ColumnTuple::create(columns); - } - } - - throw Exception("Cannot convert: " + from_type->getName() + " to " + to_type->getName(), ErrorCodes::TYPE_MISMATCH); -} - -} + if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get())) + return column_low_cardinality->convertToFullColumn(); + + return column; +} + +ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) +{ + if (!column) + return column; + + if (from_type->equals(*to_type)) + return column; + + /// We can allow insert enum column if it's numeric type is the same as the column's type in table. + if (WhichDataType(to_type).isEnum() && from_type->getTypeId() == to_type->getTypeId()) + return column; + + if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get())) + { + const auto & nested = column_const->getDataColumnPtr(); + auto nested_no_lc = recursiveTypeConversion(nested, from_type, to_type); + if (nested.get() == nested_no_lc.get()) + return column; + + return ColumnConst::create(nested_no_lc, column_const->size()); + } + + if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(from_type.get())) + { + if (to_type->equals(*low_cardinality_type->getDictionaryType())) + return column->convertToFullColumnIfLowCardinality(); + } + + if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(to_type.get())) + { + if (from_type->equals(*low_cardinality_type->getDictionaryType())) + { + auto col = low_cardinality_type->createColumn(); + assert_cast<ColumnLowCardinality &>(*col).insertRangeFromFullColumn(*column, 0, column->size()); + return col; + } + } + + if (const auto * from_array_type = typeid_cast<const DataTypeArray *>(from_type.get())) + { + if (const auto * to_array_type = typeid_cast<const DataTypeArray *>(to_type.get())) + { + const auto * column_array = typeid_cast<const ColumnArray *>(column.get()); + if (!column_array) + throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), + ErrorCodes::ILLEGAL_COLUMN); + + const auto & nested_from = from_array_type->getNestedType(); + const auto & nested_to = to_array_type->getNestedType(); + + return ColumnArray::create( + recursiveTypeConversion(column_array->getDataPtr(), nested_from, nested_to), + column_array->getOffsetsPtr()); + } + } + + if (const auto * from_tuple_type = typeid_cast<const DataTypeTuple *>(from_type.get())) + { + if (const auto * to_tuple_type = typeid_cast<const DataTypeTuple *>(to_type.get())) + { + const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()); + if (!column_tuple) + throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), + ErrorCodes::ILLEGAL_COLUMN); + + auto columns = column_tuple->getColumns(); + const auto & from_elements = from_tuple_type->getElements(); + const auto & to_elements = to_tuple_type->getElements(); + + bool has_converted = false; + + for (size_t i = 0; i < columns.size(); ++i) + { + auto & element = columns[i]; + auto element_no_lc = recursiveTypeConversion(element, from_elements.at(i), to_elements.at(i)); + if (element.get() != element_no_lc.get()) + { + element = element_no_lc; + has_converted = true; + } + } + + if (!has_converted) + return column; + + return ColumnTuple::create(columns); + } + } + + throw Exception("Cannot convert: " + from_type->getName() + " to " + to_type->getName(), ErrorCodes::TYPE_MISMATCH); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h index ff692abf1e..ae938b1104 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h @@ -1,57 +1,57 @@ -#pragma once - +#pragma once + #include <DataTypes/Serializations/SerializationWrapper.h> - -namespace DB -{ - -class ReadBuffer; -class WriteBuffer; -struct FormatSettings; -class IColumn; - -/** Simple IDataTypeCustomTextSerialization that uses serializeText/deserializeText - * for all serialization and deserialization. */ + +namespace DB +{ + +class ReadBuffer; +class WriteBuffer; +struct FormatSettings; +class IColumn; + +/** Simple IDataTypeCustomTextSerialization that uses serializeText/deserializeText + * for all serialization and deserialization. */ class SerializationCustomSimpleText : public SerializationWrapper -{ -public: +{ +public: SerializationCustomSimpleText(const SerializationPtr & nested_); - // Methods that subclasses must override in order to get full serialization/deserialization support. - virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0; - virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; - - /** Text deserialization without quoting or escaping. - */ - void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - - /** Text serialization with escaping but without quoting. - */ - void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - - /** Text serialization as a literal that may be inserted into a query. - */ - void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - - /** Text serialization for the CSV format. - */ - void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - /** delimiter - the delimiter we expect when reading a string value that is not double-quoted - * (the delimiter is not consumed). - */ - void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - - /** Text serialization intended for using in JSON format. - * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes. - */ - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - - /** Text serialization for putting into the XML format. - */ - void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; -}; - -} + // Methods that subclasses must override in order to get full serialization/deserialization support. + virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0; + virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + + /** Text deserialization without quoting or escaping. + */ + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + /** Text serialization with escaping but without quoting. + */ + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + /** Text serialization as a literal that may be inserted into a query. + */ + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + /** Text serialization for the CSV format. + */ + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + /** delimiter - the delimiter we expect when reading a string value that is not double-quoted + * (the delimiter is not consumed). + */ + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + /** Text serialization intended for using in JSON format. + * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes. + */ + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + /** Text serialization for putting into the XML format. + */ + void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h index 45fd1483ee..7fcac8928c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h @@ -1,48 +1,48 @@ -#pragma once - +#pragma once + #include <Disks/IDisk.h> #include <Interpreters/Context_fwd.h> #include <common/types.h> - -#include <boost/noncopyable.hpp> -#include <Poco/Util/AbstractConfiguration.h> - + +#include <boost/noncopyable.hpp> +#include <Poco/Util/AbstractConfiguration.h> + #include <functional> #include <map> #include <unordered_map> - -namespace DB -{ - + +namespace DB +{ + using DisksMap = std::map<String, DiskPtr>; -/** - * Disk factory. Responsible for creating new disk objects. - */ -class DiskFactory final : private boost::noncopyable -{ -public: - using Creator = std::function<DiskPtr( - const String & name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, +/** + * Disk factory. Responsible for creating new disk objects. + */ +class DiskFactory final : private boost::noncopyable +{ +public: + using Creator = std::function<DiskPtr( + const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, ContextPtr context, const DisksMap & map)>; - - static DiskFactory & instance(); - - void registerDiskType(const String & disk_type, Creator creator); - - DiskPtr create( - const String & name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, + + static DiskFactory & instance(); + + void registerDiskType(const String & disk_type, Creator creator); + + DiskPtr create( + const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, ContextPtr context, const DisksMap & map) const; - -private: - using DiskTypeRegistry = std::unordered_map<String, Creator>; - DiskTypeRegistry registry; -}; - -} + +private: + using DiskTypeRegistry = std::unordered_map<String, Creator>; + DiskTypeRegistry registry; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp index 202d3a4c6b..37d758c538 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp @@ -1,36 +1,36 @@ -#include "DiskLocal.h" -#include <Common/createHardLink.h> -#include "DiskFactory.h" - +#include "DiskLocal.h" +#include <Common/createHardLink.h> +#include "DiskFactory.h" + #include <Disks/LocalDirectorySyncGuard.h> -#include <Interpreters/Context.h> -#include <Common/filesystemHelpers.h> -#include <Common/quoteString.h> -#include <IO/createReadBufferFromFileBase.h> +#include <Interpreters/Context.h> +#include <Common/filesystemHelpers.h> +#include <Common/quoteString.h> +#include <IO/createReadBufferFromFileBase.h> #include <fstream> #include <unistd.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int EXCESSIVE_ELEMENT_IN_CONFIG; - extern const int PATH_ACCESS_DENIED; - extern const int INCORRECT_DISK_INDEX; + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int EXCESSIVE_ELEMENT_IN_CONFIG; + extern const int PATH_ACCESS_DENIED; + extern const int INCORRECT_DISK_INDEX; extern const int CANNOT_TRUNCATE_FILE; extern const int CANNOT_UNLINK; extern const int CANNOT_RMDIR; -} - -std::mutex DiskLocal::reservation_mutex; - - -using DiskLocalPtr = std::shared_ptr<DiskLocal>; - +} + +std::mutex DiskLocal::reservation_mutex; + + +using DiskLocalPtr = std::shared_ptr<DiskLocal>; + static void loadDiskLocalConfig(const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, @@ -81,203 +81,203 @@ static void loadDiskLocalConfig(const String & name, } } -class DiskLocalReservation : public IReservation -{ -public: - DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) - { - } - - UInt64 getSize() const override { return size; } - - DiskPtr getDisk(size_t i) const override; - - Disks getDisks() const override { return {disk}; } - - void update(UInt64 new_size) override; - - ~DiskLocalReservation() override; - -private: - DiskLocalPtr disk; - UInt64 size; - CurrentMetrics::Increment metric_increment; -}; - - -class DiskLocalDirectoryIterator : public IDiskDirectoryIterator -{ -public: - explicit DiskLocalDirectoryIterator(const String & disk_path_, const String & dir_path_) +class DiskLocalReservation : public IReservation +{ +public: + DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_) + : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + { + } + + UInt64 getSize() const override { return size; } + + DiskPtr getDisk(size_t i) const override; + + Disks getDisks() const override { return {disk}; } + + void update(UInt64 new_size) override; + + ~DiskLocalReservation() override; + +private: + DiskLocalPtr disk; + UInt64 size; + CurrentMetrics::Increment metric_increment; +}; + + +class DiskLocalDirectoryIterator : public IDiskDirectoryIterator +{ +public: + explicit DiskLocalDirectoryIterator(const String & disk_path_, const String & dir_path_) : dir_path(dir_path_), entry(fs::path(disk_path_) / dir_path_) - { - } - + { + } + void next() override { ++entry; } - + bool isValid() const override { return entry != fs::directory_iterator(); } - - String path() const override - { + + String path() const override + { if (entry->is_directory()) return dir_path / entry->path().filename() / ""; - else + else return dir_path / entry->path().filename(); - } - - + } + + String name() const override { return entry->path().filename(); } -private: +private: fs::path dir_path; fs::directory_iterator entry; -}; - - -ReservationPtr DiskLocal::reserve(UInt64 bytes) -{ - if (!tryReserve(bytes)) - return {}; - return std::make_unique<DiskLocalReservation>(std::static_pointer_cast<DiskLocal>(shared_from_this()), bytes); -} - -bool DiskLocal::tryReserve(UInt64 bytes) -{ - std::lock_guard lock(DiskLocal::reservation_mutex); - if (bytes == 0) - { +}; + + +ReservationPtr DiskLocal::reserve(UInt64 bytes) +{ + if (!tryReserve(bytes)) + return {}; + return std::make_unique<DiskLocalReservation>(std::static_pointer_cast<DiskLocal>(shared_from_this()), bytes); +} + +bool DiskLocal::tryReserve(UInt64 bytes) +{ + std::lock_guard lock(DiskLocal::reservation_mutex); + if (bytes == 0) + { LOG_DEBUG(log, "Reserving 0 bytes on disk {}", backQuote(name)); - ++reservation_count; - return true; - } - - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); - if (unreserved_space >= bytes) - { + ++reservation_count; + return true; + } + + auto available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (unreserved_space >= bytes) + { LOG_DEBUG(log, "Reserving {} on disk {}, having unreserved {}.", - ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); - ++reservation_count; - reserved_bytes += bytes; - return true; - } - return false; -} - -UInt64 DiskLocal::getTotalSpace() const -{ - struct statvfs fs; - if (name == "default") /// for default disk we get space from path/data/ + ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); + ++reservation_count; + reserved_bytes += bytes; + return true; + } + return false; +} + +UInt64 DiskLocal::getTotalSpace() const +{ + struct statvfs fs; + if (name == "default") /// for default disk we get space from path/data/ fs = getStatVFS((fs::path(disk_path) / "data/").string()); - else - fs = getStatVFS(disk_path); - UInt64 total_size = fs.f_blocks * fs.f_bsize; - if (total_size < keep_free_space_bytes) - return 0; - return total_size - keep_free_space_bytes; -} - -UInt64 DiskLocal::getAvailableSpace() const -{ - /// we use f_bavail, because part of b_free space is - /// available for superuser only and for system purposes - struct statvfs fs; - if (name == "default") /// for default disk we get space from path/data/ + else + fs = getStatVFS(disk_path); + UInt64 total_size = fs.f_blocks * fs.f_bsize; + if (total_size < keep_free_space_bytes) + return 0; + return total_size - keep_free_space_bytes; +} + +UInt64 DiskLocal::getAvailableSpace() const +{ + /// we use f_bavail, because part of b_free space is + /// available for superuser only and for system purposes + struct statvfs fs; + if (name == "default") /// for default disk we get space from path/data/ fs = getStatVFS((fs::path(disk_path) / "data/").string()); - else - fs = getStatVFS(disk_path); - UInt64 total_size = fs.f_bavail * fs.f_bsize; - if (total_size < keep_free_space_bytes) - return 0; - return total_size - keep_free_space_bytes; -} - -UInt64 DiskLocal::getUnreservedSpace() const -{ - std::lock_guard lock(DiskLocal::reservation_mutex); - auto available_space = getAvailableSpace(); - available_space -= std::min(available_space, reserved_bytes); - return available_space; -} - -bool DiskLocal::exists(const String & path) const -{ + else + fs = getStatVFS(disk_path); + UInt64 total_size = fs.f_bavail * fs.f_bsize; + if (total_size < keep_free_space_bytes) + return 0; + return total_size - keep_free_space_bytes; +} + +UInt64 DiskLocal::getUnreservedSpace() const +{ + std::lock_guard lock(DiskLocal::reservation_mutex); + auto available_space = getAvailableSpace(); + available_space -= std::min(available_space, reserved_bytes); + return available_space; +} + +bool DiskLocal::exists(const String & path) const +{ return fs::exists(fs::path(disk_path) / path); -} - -bool DiskLocal::isFile(const String & path) const -{ +} + +bool DiskLocal::isFile(const String & path) const +{ return fs::is_regular_file(fs::path(disk_path) / path); -} - -bool DiskLocal::isDirectory(const String & path) const -{ +} + +bool DiskLocal::isDirectory(const String & path) const +{ return fs::is_directory(fs::path(disk_path) / path); -} - -size_t DiskLocal::getFileSize(const String & path) const -{ +} + +size_t DiskLocal::getFileSize(const String & path) const +{ return fs::file_size(fs::path(disk_path) / path); -} - -void DiskLocal::createDirectory(const String & path) -{ +} + +void DiskLocal::createDirectory(const String & path) +{ fs::create_directory(fs::path(disk_path) / path); -} - -void DiskLocal::createDirectories(const String & path) -{ +} + +void DiskLocal::createDirectories(const String & path) +{ fs::create_directories(fs::path(disk_path) / path); -} - -void DiskLocal::clearDirectory(const String & path) -{ +} + +void DiskLocal::clearDirectory(const String & path) +{ for (const auto & entry : fs::directory_iterator(fs::path(disk_path) / path)) fs::remove(entry.path()); -} - -void DiskLocal::moveDirectory(const String & from_path, const String & to_path) -{ +} + +void DiskLocal::moveDirectory(const String & from_path, const String & to_path) +{ fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path); -} - -DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path) -{ - return std::make_unique<DiskLocalDirectoryIterator>(disk_path, path); -} - -void DiskLocal::moveFile(const String & from_path, const String & to_path) -{ +} + +DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path) +{ + return std::make_unique<DiskLocalDirectoryIterator>(disk_path, path); +} + +void DiskLocal::moveFile(const String & from_path, const String & to_path) +{ fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path); -} - -void DiskLocal::replaceFile(const String & from_path, const String & to_path) -{ +} + +void DiskLocal::replaceFile(const String & from_path, const String & to_path) +{ fs::path from_file = fs::path(disk_path) / from_path; fs::path to_file = fs::path(disk_path) / to_path; fs::rename(from_file, to_file); -} - +} + std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, size_t estimated_size) const -{ +{ return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, estimated_size); -} - -std::unique_ptr<WriteBufferFromFileBase> +} + +std::unique_ptr<WriteBufferFromFileBase> DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode) -{ - int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1; +{ + int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1; return std::make_unique<WriteBufferFromFile>(fs::path(disk_path) / path, buf_size, flags); -} - +} + void DiskLocal::removeFile(const String & path) -{ +{ auto fs_path = fs::path(disk_path) / path; if (0 != unlink(fs_path.c_str())) throwFromErrnoWithPath("Cannot unlink file " + fs_path.string(), fs_path, ErrorCodes::CANNOT_UNLINK); -} - +} + void DiskLocal::removeFileIfExists(const String & path) { auto fs_path = fs::path(disk_path) / path; @@ -292,33 +292,33 @@ void DiskLocal::removeDirectory(const String & path) throwFromErrnoWithPath("Cannot rmdir " + fs_path.string(), fs_path, ErrorCodes::CANNOT_RMDIR); } -void DiskLocal::removeRecursive(const String & path) -{ +void DiskLocal::removeRecursive(const String & path) +{ fs::remove_all(fs::path(disk_path) / path); -} - -void DiskLocal::listFiles(const String & path, std::vector<String> & file_names) -{ +} + +void DiskLocal::listFiles(const String & path, std::vector<String> & file_names) +{ file_names.clear(); for (const auto & entry : fs::directory_iterator(fs::path(disk_path) / path)) file_names.emplace_back(entry.path().filename()); -} - -void DiskLocal::setLastModified(const String & path, const Poco::Timestamp & timestamp) -{ +} + +void DiskLocal::setLastModified(const String & path, const Poco::Timestamp & timestamp) +{ FS::setModificationTime(fs::path(disk_path) / path, timestamp.epochTime()); -} - -Poco::Timestamp DiskLocal::getLastModified(const String & path) -{ +} + +Poco::Timestamp DiskLocal::getLastModified(const String & path) +{ return FS::getModificationTimestamp(fs::path(disk_path) / path); -} - -void DiskLocal::createHardLink(const String & src_path, const String & dst_path) -{ +} + +void DiskLocal::createHardLink(const String & src_path, const String & dst_path) +{ DB::createHardLink(fs::path(disk_path) / src_path, fs::path(disk_path) / dst_path); -} - +} + void DiskLocal::truncateFile(const String & path, size_t size) { int res = truncate((fs::path(disk_path) / path).string().data(), size); @@ -326,26 +326,26 @@ void DiskLocal::truncateFile(const String & path, size_t size) throwFromErrnoWithPath("Cannot truncate file " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE); } -void DiskLocal::createFile(const String & path) -{ +void DiskLocal::createFile(const String & path) +{ FS::createFile(fs::path(disk_path) / path); -} - -void DiskLocal::setReadOnly(const String & path) -{ +} + +void DiskLocal::setReadOnly(const String & path) +{ fs::permissions(fs::path(disk_path) / path, fs::perms::owner_write | fs::perms::group_write | fs::perms::others_write, fs::perm_options::remove); -} - -bool inline isSameDiskType(const IDisk & one, const IDisk & another) -{ - return typeid(one) == typeid(another); -} - -void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) -{ - if (isSameDiskType(*this, *to_disk)) +} + +bool inline isSameDiskType(const IDisk & one, const IDisk & another) +{ + return typeid(one) == typeid(another); +} + +void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) +{ + if (isSameDiskType(*this, *to_disk)) { fs::path to = fs::path(to_disk->getPath()) / to_path; fs::path from = fs::path(disk_path) / from_path; @@ -356,10 +356,10 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. } - else + else copyThroughBuffers(from_path, to_disk, to_path); /// Base implementation. -} - +} + SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const { return std::make_unique<LocalDirectorySyncGuard>(fs::path(disk_path) / path); @@ -380,63 +380,63 @@ void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & confi keep_free_space_bytes = new_keep_free_space_bytes; } -DiskPtr DiskLocalReservation::getDisk(size_t i) const -{ - if (i != 0) - { - throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); - } - return disk; -} - -void DiskLocalReservation::update(UInt64 new_size) -{ - std::lock_guard lock(DiskLocal::reservation_mutex); - disk->reserved_bytes -= size; - size = new_size; - disk->reserved_bytes += size; -} - -DiskLocalReservation::~DiskLocalReservation() -{ - try - { - std::lock_guard lock(DiskLocal::reservation_mutex); - if (disk->reserved_bytes < size) - { - disk->reserved_bytes = 0; +DiskPtr DiskLocalReservation::getDisk(size_t i) const +{ + if (i != 0) + { + throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); + } + return disk; +} + +void DiskLocalReservation::update(UInt64 new_size) +{ + std::lock_guard lock(DiskLocal::reservation_mutex); + disk->reserved_bytes -= size; + size = new_size; + disk->reserved_bytes += size; +} + +DiskLocalReservation::~DiskLocalReservation() +{ + try + { + std::lock_guard lock(DiskLocal::reservation_mutex); + if (disk->reserved_bytes < size) + { + disk->reserved_bytes = 0; LOG_ERROR(disk->log, "Unbalanced reservations size for disk '{}'.", disk->getName()); - } - else - { - disk->reserved_bytes -= size; - } - - if (disk->reservation_count == 0) + } + else + { + disk->reserved_bytes -= size; + } + + if (disk->reservation_count == 0) LOG_ERROR(disk->log, "Unbalanced reservation count for disk '{}'.", disk->getName()); - else - --disk->reservation_count; - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - - -void registerDiskLocal(DiskFactory & factory) -{ - auto creator = [](const String & name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, + else + --disk->reservation_count; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + +void registerDiskLocal(DiskFactory & factory) +{ + auto creator = [](const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, ContextPtr context, const DisksMap & /*map*/) -> DiskPtr { String path; UInt64 keep_free_space_bytes; loadDiskLocalConfig(name, config, config_prefix, context, path, keep_free_space_bytes); - return std::make_shared<DiskLocal>(name, path, keep_free_space_bytes); - }; - factory.registerDiskType("local", creator); -} - -} + return std::make_shared<DiskLocal>(name, path, keep_free_space_bytes); + }; + factory.registerDiskType("local", creator); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h index f7ebbf9416..145211a83e 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h @@ -1,125 +1,125 @@ -#pragma once - +#pragma once + #include <common/logger_useful.h> -#include <Disks/IDisk.h> -#include <IO/ReadBufferFromFile.h> -#include <IO/ReadBufferFromFileBase.h> -#include <IO/WriteBufferFromFile.h> +#include <Disks/IDisk.h> +#include <IO/ReadBufferFromFile.h> +#include <IO/ReadBufferFromFileBase.h> +#include <IO/WriteBufferFromFile.h> #include <Poco/Util/AbstractConfiguration.h> - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -class DiskLocalReservation; - -class DiskLocal : public IDisk -{ -public: - friend class DiskLocalReservation; - - DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_) - : name(name_), disk_path(path_), keep_free_space_bytes(keep_free_space_bytes_) - { - if (disk_path.back() != '/') + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +class DiskLocalReservation; + +class DiskLocal : public IDisk +{ +public: + friend class DiskLocalReservation; + + DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_) + : name(name_), disk_path(path_), keep_free_space_bytes(keep_free_space_bytes_) + { + if (disk_path.back() != '/') throw Exception("Disk path must end with '/', but '" + disk_path + "' doesn't.", ErrorCodes::LOGICAL_ERROR); - } - - const String & getName() const override { return name; } - - const String & getPath() const override { return disk_path; } - - ReservationPtr reserve(UInt64 bytes) override; - - UInt64 getTotalSpace() const override; - - UInt64 getAvailableSpace() const override; - - UInt64 getUnreservedSpace() const override; - - UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; } - - bool exists(const String & path) const override; - - bool isFile(const String & path) const override; - - bool isDirectory(const String & path) const override; - - size_t getFileSize(const String & path) const override; - - void createDirectory(const String & path) override; - - void createDirectories(const String & path) override; - - void clearDirectory(const String & path) override; - - void moveDirectory(const String & from_path, const String & to_path) override; - - DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; - - void createFile(const String & path) override; - - void moveFile(const String & from_path, const String & to_path) override; - - void replaceFile(const String & from_path, const String & to_path) override; - - void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override; - - void listFiles(const String & path, std::vector<String> & file_names) override; - - std::unique_ptr<ReadBufferFromFileBase> readFile( - const String & path, + } + + const String & getName() const override { return name; } + + const String & getPath() const override { return disk_path; } + + ReservationPtr reserve(UInt64 bytes) override; + + UInt64 getTotalSpace() const override; + + UInt64 getAvailableSpace() const override; + + UInt64 getUnreservedSpace() const override; + + UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; } + + bool exists(const String & path) const override; + + bool isFile(const String & path) const override; + + bool isDirectory(const String & path) const override; + + size_t getFileSize(const String & path) const override; + + void createDirectory(const String & path) override; + + void createDirectories(const String & path) override; + + void clearDirectory(const String & path) override; + + void moveDirectory(const String & from_path, const String & to_path) override; + + DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; + + void createFile(const String & path) override; + + void moveFile(const String & from_path, const String & to_path) override; + + void replaceFile(const String & from_path, const String & to_path) override; + + void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override; + + void listFiles(const String & path, std::vector<String> & file_names) override; + + std::unique_ptr<ReadBufferFromFileBase> readFile( + const String & path, const ReadSettings & settings, size_t estimated_size) const override; - - std::unique_ptr<WriteBufferFromFileBase> writeFile( - const String & path, - size_t buf_size, + + std::unique_ptr<WriteBufferFromFileBase> writeFile( + const String & path, + size_t buf_size, WriteMode mode) override; - + void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; void removeDirectory(const String & path) override; - void removeRecursive(const String & path) override; - - void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - - Poco::Timestamp getLastModified(const String & path) override; - - void setReadOnly(const String & path) override; - - void createHardLink(const String & src_path, const String & dst_path) override; - + void removeRecursive(const String & path) override; + + void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; + + Poco::Timestamp getLastModified(const String & path) override; + + void setReadOnly(const String & path) override; + + void createHardLink(const String & src_path, const String & dst_path) override; + void truncateFile(const String & path, size_t size) override; DiskType getType() const override { return DiskType::Local; } bool isRemote() const override { return false; } - + bool supportZeroCopyReplication() const override { return false; } SyncGuardPtr getDirectorySyncGuard(const String & path) const override; void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &) override; -private: - bool tryReserve(UInt64 bytes); - -private: - const String name; - const String disk_path; +private: + bool tryReserve(UInt64 bytes); + +private: + const String name; + const String disk_path; std::atomic<UInt64> keep_free_space_bytes; - - UInt64 reserved_bytes = 0; - UInt64 reservation_count = 0; - - static std::mutex reservation_mutex; + + UInt64 reserved_bytes = 0; + UInt64 reservation_count = 0; + + static std::mutex reservation_mutex; Poco::Logger * log = &Poco::Logger::get("DiskLocal"); -}; - +}; + -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp index a9d81c2761..9407c582c2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp @@ -1,115 +1,115 @@ -#include "DiskLocal.h" -#include "DiskSelector.h" - -#include <IO/WriteHelpers.h> -#include <Common/escapeForFileName.h> -#include <Common/quoteString.h> +#include "DiskLocal.h" +#include "DiskSelector.h" + +#include <IO/WriteHelpers.h> +#include <Common/escapeForFileName.h> +#include <Common/quoteString.h> #include <Common/StringUtils/StringUtils.h> -#include <common/logger_useful.h> -#include <Interpreters/Context.h> - -#include <set> - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int EXCESSIVE_ELEMENT_IN_CONFIG; - extern const int UNKNOWN_DISK; -} - +#include <common/logger_useful.h> +#include <Interpreters/Context.h> + +#include <set> + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int EXCESSIVE_ELEMENT_IN_CONFIG; + extern const int UNKNOWN_DISK; +} + DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) -{ - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - - auto & factory = DiskFactory::instance(); - - constexpr auto default_disk_name = "default"; - bool has_default_disk = false; - for (const auto & disk_name : keys) - { - if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) - throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); - - if (disk_name == default_disk_name) - has_default_disk = true; - - auto disk_config_prefix = config_prefix + "." + disk_name; - +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + + auto & factory = DiskFactory::instance(); + + constexpr auto default_disk_name = "default"; + bool has_default_disk = false; + for (const auto & disk_name : keys) + { + if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) + throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + + if (disk_name == default_disk_name) + has_default_disk = true; + + auto disk_config_prefix = config_prefix + "." + disk_name; + disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); - } - if (!has_default_disk) + } + if (!has_default_disk) disks.emplace(default_disk_name, std::make_shared<DiskLocal>(default_disk_name, context->getPath(), 0)); -} - - -DiskSelectorPtr DiskSelector::updateFromConfig( +} + + +DiskSelectorPtr DiskSelector::updateFromConfig( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) const -{ - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - - auto & factory = DiskFactory::instance(); - - std::shared_ptr<DiskSelector> result = std::make_shared<DiskSelector>(*this); - - constexpr auto default_disk_name = "default"; +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + + auto & factory = DiskFactory::instance(); + + std::shared_ptr<DiskSelector> result = std::make_shared<DiskSelector>(*this); + + constexpr auto default_disk_name = "default"; DisksMap old_disks_minus_new_disks (result->getDisksMap()); - - for (const auto & disk_name : keys) - { - if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) - throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); - + + for (const auto & disk_name : keys) + { + if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) + throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + auto disk_config_prefix = config_prefix + "." + disk_name; - if (result->getDisksMap().count(disk_name) == 0) - { + if (result->getDisksMap().count(disk_name) == 0) + { result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap())); - } - else - { + } + else + { auto disk = old_disks_minus_new_disks[disk_name]; disk->applyNewSettings(config, context, disk_config_prefix, result->getDisksMap()); - old_disks_minus_new_disks.erase(disk_name); - } - } - - old_disks_minus_new_disks.erase(default_disk_name); - - if (!old_disks_minus_new_disks.empty()) - { - WriteBufferFromOwnString warning; - if (old_disks_minus_new_disks.size() == 1) - writeString("Disk ", warning); - else - writeString("Disks ", warning); - - int index = 0; + old_disks_minus_new_disks.erase(disk_name); + } + } + + old_disks_minus_new_disks.erase(default_disk_name); + + if (!old_disks_minus_new_disks.empty()) + { + WriteBufferFromOwnString warning; + if (old_disks_minus_new_disks.size() == 1) + writeString("Disk ", warning); + else + writeString("Disks ", warning); + + int index = 0; for (const auto & [name, _] : old_disks_minus_new_disks) - { - if (index++ > 0) - writeString(", ", warning); - writeBackQuotedString(name, warning); - } - - writeString(" disappeared from configuration, this change will be applied after restart of ClickHouse", warning); - LOG_WARNING(&Poco::Logger::get("DiskSelector"), warning.str()); - } - - return result; -} - - -DiskPtr DiskSelector::get(const String & name) const -{ - auto it = disks.find(name); - if (it == disks.end()) - throw Exception("Unknown disk " + name, ErrorCodes::UNKNOWN_DISK); - return it->second; -} - -} + { + if (index++ > 0) + writeString(", ", warning); + writeBackQuotedString(name, warning); + } + + writeString(" disappeared from configuration, this change will be applied after restart of ClickHouse", warning); + LOG_WARNING(&Poco::Logger::get("DiskSelector"), warning.str()); + } + + return result; +} + + +DiskPtr DiskSelector::get(const String & name) const +{ + auto it = disks.find(name); + if (it == disks.end()) + throw Exception("Unknown disk " + name, ErrorCodes::UNKNOWN_DISK); + return it->second; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h index c662a9de15..5475221544 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h @@ -1,44 +1,44 @@ -#pragma once - -#include <Disks/DiskFactory.h> -#include <Disks/IDisk.h> - -#include <Poco/Util/AbstractConfiguration.h> - -#include <map> - -namespace DB -{ - -class DiskSelector; -using DiskSelectorPtr = std::shared_ptr<const DiskSelector>; - -/// Parse .xml configuration and store information about disks -/// Mostly used for introspection. -class DiskSelector -{ -public: +#pragma once + +#include <Disks/DiskFactory.h> +#include <Disks/IDisk.h> + +#include <Poco/Util/AbstractConfiguration.h> + +#include <map> + +namespace DB +{ + +class DiskSelector; +using DiskSelectorPtr = std::shared_ptr<const DiskSelector>; + +/// Parse .xml configuration and store information about disks +/// Mostly used for introspection. +class DiskSelector +{ +public: DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - DiskSelector(const DiskSelector & from) : disks(from.disks) { } - + DiskSelector(const DiskSelector & from) : disks(from.disks) { } + DiskSelectorPtr updateFromConfig( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context ) const; - - /// Get disk by name - DiskPtr get(const String & name) const; - - /// Get all disks with names - const DisksMap & getDisksMap() const { return disks; } + + /// Get disk by name + DiskPtr get(const String & name) const; + + /// Get all disks with names + const DisksMap & getDisksMap() const { return disks; } void addToDiskMap(const String & name, DiskPtr disk) - { - disks.emplace(name, disk); - } - -private: - DisksMap disks; -}; - -} + { + disks.emplace(name, disk); + } + +private: + DisksMap disks; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp index 7113df561b..df0f921389 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp @@ -1,43 +1,43 @@ -#include "IDisk.h" +#include "IDisk.h" #include "Disks/Executor.h" -#include <IO/ReadBufferFromFileBase.h> -#include <IO/WriteBufferFromFileBase.h> +#include <IO/ReadBufferFromFileBase.h> +#include <IO/WriteBufferFromFileBase.h> #include <IO/copyData.h> -#include <Poco/Logger.h> -#include <common/logger_useful.h> +#include <Poco/Logger.h> +#include <common/logger_useful.h> #include <Common/setThreadName.h> - -namespace DB -{ - + +namespace DB +{ + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; } -bool IDisk::isDirectoryEmpty(const String & path) -{ - return !iterateDirectory(path)->isValid(); -} - -void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, const String & to_path) -{ +bool IDisk::isDirectoryEmpty(const String & path) +{ + return !iterateDirectory(path)->isValid(); +} + +void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, const String & to_path) +{ LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.", from_disk.getName(), from_disk.getPath(), from_path, to_disk.getName(), to_disk.getPath(), to_path); - - auto in = from_disk.readFile(from_path); - auto out = to_disk.writeFile(to_path); - copyData(*in, *out); + + auto in = from_disk.readFile(from_path); + auto out = to_disk.writeFile(to_path); + copyData(*in, *out); out->finalize(); -} - +} + using ResultsCollector = std::vector<std::future<void>>; void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results) -{ +{ if (from_disk.isFile(from_path)) - { + { auto result = exec.execute( [&from_disk, from_path, &to_disk, to_path]() { @@ -46,18 +46,18 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p }); results.push_back(std::move(result)); - } - else - { + } + else + { fs::path dir_name = fs::path(from_path).parent_path().filename(); fs::path dest(fs::path(to_path) / dir_name); to_disk.createDirectories(dest); - + for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next()) asyncCopy(from_disk, it->path(), to_disk, dest, exec, results); - } -} - + } +} + void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) { auto & exec = to_disk->getExecutor(); @@ -79,7 +79,7 @@ void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_dis void IDisk::truncateFile(const String &, size_t) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate operation is not implemented for disk of type {}", getType()); -} +} SyncGuardPtr IDisk::getDirectorySyncGuard(const String & /* path */) const { diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h index b62a0436bd..db07987572 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h @@ -1,25 +1,25 @@ -#pragma once - +#pragma once + #include <Interpreters/Context_fwd.h> #include <Interpreters/Context.h> -#include <Core/Defines.h> +#include <Core/Defines.h> #include <common/types.h> -#include <Common/CurrentMetrics.h> -#include <Common/Exception.h> +#include <Common/CurrentMetrics.h> +#include <Common/Exception.h> #include <Disks/Executor.h> #include <Disks/DiskType.h> #include <IO/ReadSettings.h> - -#include <memory> -#include <mutex> -#include <utility> -#include <boost/noncopyable.hpp> -#include <Poco/Timestamp.h> + +#include <memory> +#include <mutex> +#include <utility> +#include <boost/noncopyable.hpp> +#include <Poco/Timestamp.h> #include <filesystem> - + namespace fs = std::filesystem; - + namespace Poco { namespace Util @@ -28,52 +28,52 @@ namespace Poco } } -namespace CurrentMetrics -{ +namespace CurrentMetrics +{ extern const Metric DiskSpaceReservedForMerge; -} - -namespace DB -{ - -class IDiskDirectoryIterator; -using DiskDirectoryIteratorPtr = std::unique_ptr<IDiskDirectoryIterator>; - -class IReservation; -using ReservationPtr = std::unique_ptr<IReservation>; +} + +namespace DB +{ + +class IDiskDirectoryIterator; +using DiskDirectoryIteratorPtr = std::unique_ptr<IDiskDirectoryIterator>; + +class IReservation; +using ReservationPtr = std::unique_ptr<IReservation>; using Reservations = std::vector<ReservationPtr>; - -class ReadBufferFromFileBase; -class WriteBufferFromFileBase; + +class ReadBufferFromFileBase; +class WriteBufferFromFileBase; class MMappedFileCache; - -/** - * Mode of opening a file for write. - */ -enum class WriteMode -{ - Rewrite, - Append -}; - -/** - * Provide interface for reservation. - */ -class Space : public std::enable_shared_from_this<Space> -{ -public: - /// Return the name of the space object. - virtual const String & getName() const = 0; - - /// Reserve the specified number of bytes. - virtual ReservationPtr reserve(UInt64 bytes) = 0; - - virtual ~Space() = default; -}; - -using SpacePtr = std::shared_ptr<Space>; - -/** + +/** + * Mode of opening a file for write. + */ +enum class WriteMode +{ + Rewrite, + Append +}; + +/** + * Provide interface for reservation. + */ +class Space : public std::enable_shared_from_this<Space> +{ +public: + /// Return the name of the space object. + virtual const String & getName() const = 0; + + /// Reserve the specified number of bytes. + virtual ReservationPtr reserve(UInt64 bytes) = 0; + + virtual ~Space() = default; +}; + +using SpacePtr = std::shared_ptr<Space>; + +/** * A guard, that should synchronize file's or directory's state * with storage device (e.g. fsync in POSIX) in its destructor. */ @@ -87,105 +87,105 @@ public: using SyncGuardPtr = std::unique_ptr<ISyncGuard>; /** - * A unit of storage persisting data and metadata. - * Abstract underlying storage technology. - * Responsible for: - * - file management; - * - space accounting and reservation. - */ -class IDisk : public Space -{ -public: + * A unit of storage persisting data and metadata. + * Abstract underlying storage technology. + * Responsible for: + * - file management; + * - space accounting and reservation. + */ +class IDisk : public Space +{ +public: /// Default constructor. explicit IDisk(std::unique_ptr<Executor> executor_ = std::make_unique<SyncExecutor>()) : executor(std::move(executor_)) { } - /// Root path for all files stored on the disk. - /// It's not required to be a local filesystem path. - virtual const String & getPath() const = 0; - - /// Total available space on the disk. - virtual UInt64 getTotalSpace() const = 0; - - /// Space currently available on the disk. - virtual UInt64 getAvailableSpace() const = 0; - - /// Space available for reservation (available space minus reserved space). - virtual UInt64 getUnreservedSpace() const = 0; - - /// Amount of bytes which should be kept free on the disk. - virtual UInt64 getKeepingFreeSpace() const { return 0; } - - /// Return `true` if the specified file exists. - virtual bool exists(const String & path) const = 0; - - /// Return `true` if the specified file exists and it's a regular file (not a directory or special file type). - virtual bool isFile(const String & path) const = 0; - - /// Return `true` if the specified file exists and it's a directory. - virtual bool isDirectory(const String & path) const = 0; - - /// Return size of the specified file. - virtual size_t getFileSize(const String & path) const = 0; - - /// Create directory. - virtual void createDirectory(const String & path) = 0; - - /// Create directory and all parent directories if necessary. - virtual void createDirectories(const String & path) = 0; - - /// Remove all files from the directory. Directories are not removed. - virtual void clearDirectory(const String & path) = 0; - - /// Move directory from `from_path` to `to_path`. - virtual void moveDirectory(const String & from_path, const String & to_path) = 0; - - /// Return iterator to the contents of the specified directory. - virtual DiskDirectoryIteratorPtr iterateDirectory(const String & path) = 0; - - /// Return `true` if the specified directory is empty. - bool isDirectoryEmpty(const String & path); - - /// Create empty file at `path`. - virtual void createFile(const String & path) = 0; - - /// Move the file from `from_path` to `to_path`. - /// If a file with `to_path` path already exists, an exception will be thrown . - virtual void moveFile(const String & from_path, const String & to_path) = 0; - - /// Move the file from `from_path` to `to_path`. - /// If a file with `to_path` path already exists, it will be replaced. - virtual void replaceFile(const String & from_path, const String & to_path) = 0; - - /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`. - virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path); - - /// List files at `path` and add their names to `file_names` - virtual void listFiles(const String & path, std::vector<String> & file_names) = 0; - - /// Open the file for read and return ReadBufferFromFileBase object. - virtual std::unique_ptr<ReadBufferFromFileBase> readFile( - const String & path, + /// Root path for all files stored on the disk. + /// It's not required to be a local filesystem path. + virtual const String & getPath() const = 0; + + /// Total available space on the disk. + virtual UInt64 getTotalSpace() const = 0; + + /// Space currently available on the disk. + virtual UInt64 getAvailableSpace() const = 0; + + /// Space available for reservation (available space minus reserved space). + virtual UInt64 getUnreservedSpace() const = 0; + + /// Amount of bytes which should be kept free on the disk. + virtual UInt64 getKeepingFreeSpace() const { return 0; } + + /// Return `true` if the specified file exists. + virtual bool exists(const String & path) const = 0; + + /// Return `true` if the specified file exists and it's a regular file (not a directory or special file type). + virtual bool isFile(const String & path) const = 0; + + /// Return `true` if the specified file exists and it's a directory. + virtual bool isDirectory(const String & path) const = 0; + + /// Return size of the specified file. + virtual size_t getFileSize(const String & path) const = 0; + + /// Create directory. + virtual void createDirectory(const String & path) = 0; + + /// Create directory and all parent directories if necessary. + virtual void createDirectories(const String & path) = 0; + + /// Remove all files from the directory. Directories are not removed. + virtual void clearDirectory(const String & path) = 0; + + /// Move directory from `from_path` to `to_path`. + virtual void moveDirectory(const String & from_path, const String & to_path) = 0; + + /// Return iterator to the contents of the specified directory. + virtual DiskDirectoryIteratorPtr iterateDirectory(const String & path) = 0; + + /// Return `true` if the specified directory is empty. + bool isDirectoryEmpty(const String & path); + + /// Create empty file at `path`. + virtual void createFile(const String & path) = 0; + + /// Move the file from `from_path` to `to_path`. + /// If a file with `to_path` path already exists, an exception will be thrown . + virtual void moveFile(const String & from_path, const String & to_path) = 0; + + /// Move the file from `from_path` to `to_path`. + /// If a file with `to_path` path already exists, it will be replaced. + virtual void replaceFile(const String & from_path, const String & to_path) = 0; + + /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`. + virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path); + + /// List files at `path` and add their names to `file_names` + virtual void listFiles(const String & path, std::vector<String> & file_names) = 0; + + /// Open the file for read and return ReadBufferFromFileBase object. + virtual std::unique_ptr<ReadBufferFromFileBase> readFile( + const String & path, const ReadSettings & settings = ReadSettings{}, size_t estimated_size = 0) const = 0; - - /// Open the file for write and return WriteBufferFromFileBase object. - virtual std::unique_ptr<WriteBufferFromFileBase> writeFile( - const String & path, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + + /// Open the file for write and return WriteBufferFromFileBase object. + virtual std::unique_ptr<WriteBufferFromFileBase> writeFile( + const String & path, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, WriteMode mode = WriteMode::Rewrite) = 0; - + /// Remove file. Throws exception if file doesn't exists or it's a directory. virtual void removeFile(const String & path) = 0; - + /// Remove file if it exists. virtual void removeFileIfExists(const String & path) = 0; /// Remove directory. Throws exception if it's not a directory or if directory is not empty. virtual void removeDirectory(const String & path) = 0; - /// Remove file or directory with all children. Use with extra caution. Throws exception if file doesn't exists. - virtual void removeRecursive(const String & path) = 0; - + /// Remove file or directory with all children. Use with extra caution. Throws exception if file doesn't exists. + virtual void removeRecursive(const String & path) = 0; + /// Remove file. Throws exception if file doesn't exists or if directory is not empty. /// Differs from removeFile for S3/HDFS disks /// Second bool param is a flag to remove (true) or keep (false) shared data on S3 @@ -201,22 +201,22 @@ public: /// Second bool param is a flag to remove (true) or keep (false) shared data on S3 virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); } - /// Set last modified time to file or directory at `path`. - virtual void setLastModified(const String & path, const Poco::Timestamp & timestamp) = 0; - - /// Get last modified time of file or directory at `path`. - virtual Poco::Timestamp getLastModified(const String & path) = 0; - - /// Set file at `path` as read-only. - virtual void setReadOnly(const String & path) = 0; - - /// Create hardlink from `src_path` to `dst_path`. - virtual void createHardLink(const String & src_path, const String & dst_path) = 0; - + /// Set last modified time to file or directory at `path`. + virtual void setLastModified(const String & path, const Poco::Timestamp & timestamp) = 0; + + /// Get last modified time of file or directory at `path`. + virtual Poco::Timestamp getLastModified(const String & path) = 0; + + /// Set file at `path` as read-only. + virtual void setReadOnly(const String & path) = 0; + + /// Create hardlink from `src_path` to `dst_path`. + virtual void createHardLink(const String & src_path, const String & dst_path) = 0; + /// Truncate file to specified size. virtual void truncateFile(const String & path, size_t size); - /// Return disk type - "local", "s3", etc. + /// Return disk type - "local", "s3", etc. virtual DiskType getType() const = 0; /// Involves network interaction. @@ -265,78 +265,78 @@ protected: private: std::unique_ptr<Executor> executor; -}; - -using DiskPtr = std::shared_ptr<IDisk>; -using Disks = std::vector<DiskPtr>; - -/** - * Iterator of directory contents on particular disk. - */ -class IDiskDirectoryIterator -{ -public: - /// Iterate to the next file. - virtual void next() = 0; - - /// Return `true` if the iterator points to a valid element. - virtual bool isValid() const = 0; - - /// Path to the file that the iterator currently points to. - virtual String path() const = 0; - - /// Name of the file that the iterator currently points to. - virtual String name() const = 0; - - virtual ~IDiskDirectoryIterator() = default; -}; - -/** - * Information about reserved size on particular disk. - */ -class IReservation : boost::noncopyable -{ -public: - /// Get reservation size. - virtual UInt64 getSize() const = 0; - - /// Get i-th disk where reservation take place. - virtual DiskPtr getDisk(size_t i = 0) const = 0; - - /// Get all disks, used in reservation - virtual Disks getDisks() const = 0; - - /// Changes amount of reserved space. - virtual void update(UInt64 new_size) = 0; - - /// Unreserves reserved space. - virtual ~IReservation() = default; -}; - -/// Return full path to a file on disk. -inline String fullPath(const DiskPtr & disk, const String & path) -{ +}; + +using DiskPtr = std::shared_ptr<IDisk>; +using Disks = std::vector<DiskPtr>; + +/** + * Iterator of directory contents on particular disk. + */ +class IDiskDirectoryIterator +{ +public: + /// Iterate to the next file. + virtual void next() = 0; + + /// Return `true` if the iterator points to a valid element. + virtual bool isValid() const = 0; + + /// Path to the file that the iterator currently points to. + virtual String path() const = 0; + + /// Name of the file that the iterator currently points to. + virtual String name() const = 0; + + virtual ~IDiskDirectoryIterator() = default; +}; + +/** + * Information about reserved size on particular disk. + */ +class IReservation : boost::noncopyable +{ +public: + /// Get reservation size. + virtual UInt64 getSize() const = 0; + + /// Get i-th disk where reservation take place. + virtual DiskPtr getDisk(size_t i = 0) const = 0; + + /// Get all disks, used in reservation + virtual Disks getDisks() const = 0; + + /// Changes amount of reserved space. + virtual void update(UInt64 new_size) = 0; + + /// Unreserves reserved space. + virtual ~IReservation() = default; +}; + +/// Return full path to a file on disk. +inline String fullPath(const DiskPtr & disk, const String & path) +{ return fs::path(disk->getPath()) / path; -} - -/// Return parent path for the specified path. -inline String parentPath(const String & path) -{ +} + +/// Return parent path for the specified path. +inline String parentPath(const String & path) +{ if (path.ends_with('/')) return fs::path(path).parent_path().parent_path() / ""; return fs::path(path).parent_path() / ""; -} - -/// Return file name for the specified path. -inline String fileName(const String & path) -{ +} + +/// Return file name for the specified path. +inline String fileName(const String & path) +{ return fs::path(path).filename(); -} +} /// Return directory path for the specified path. inline String directoryPath(const String & path) { return fs::path(path).parent_path() / ""; -} +} } diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp index dbe276f467..586ba81b12 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp @@ -1,19 +1,19 @@ -#include "IVolume.h" - -#include <Common/quoteString.h> - -#include <memory> - -namespace DB -{ -namespace ErrorCodes -{ +#include "IVolume.h" + +#include <Common/quoteString.h> + +#include <memory> + +namespace DB +{ +namespace ErrorCodes +{ extern const int NO_ELEMENTS_IN_CONFIG; extern const int INCONSISTENT_RESERVATIONS; extern const int NO_RESERVATIONS_PROVIDED; extern const int UNKNOWN_VOLUME_TYPE; -} - +} + String volumeTypeToString(VolumeType type) { switch (type) @@ -30,37 +30,37 @@ String volumeTypeToString(VolumeType type) throw Exception("Unknown volume type, please add it to DB::volumeTypeToString", ErrorCodes::UNKNOWN_VOLUME_TYPE); } -IVolume::IVolume( +IVolume::IVolume( String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disk_selector) - : name(std::move(name_)) -{ - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - - for (const auto & disk : keys) - { + : name(std::move(name_)) +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + + for (const auto & disk : keys) + { if (disk.starts_with("disk")) - { - auto disk_name = config.getString(config_prefix + "." + disk); - disks.push_back(disk_selector->get(disk_name)); - } - } - - if (disks.empty()) + { + auto disk_name = config.getString(config_prefix + "." + disk); + disks.push_back(disk_selector->get(disk_name)); + } + } + + if (disks.empty()) throw Exception("Volume must contain at least one disk", ErrorCodes::NO_ELEMENTS_IN_CONFIG); -} - -UInt64 IVolume::getMaxUnreservedFreeSpace() const -{ - UInt64 res = 0; - for (const auto & disk : disks) - res = std::max(res, disk->getUnreservedSpace()); - return res; -} - +} + +UInt64 IVolume::getMaxUnreservedFreeSpace() const +{ + UInt64 res = 0; + for (const auto & disk : disks) + res = std::max(res, disk->getUnreservedSpace()); + return res; +} + MultiDiskReservation::MultiDiskReservation(Reservations & reservations_, UInt64 size_) : reservations(std::move(reservations_)) , size(size_) @@ -77,7 +77,7 @@ MultiDiskReservation::MultiDiskReservation(Reservations & reservations_, UInt64 throw Exception("Reservations must have same size", ErrorCodes::INCONSISTENT_RESERVATIONS); } } -} +} Disks MultiDiskReservation::getDisks() const { diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h index c02888ae19..c040d9d58e 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h @@ -1,78 +1,78 @@ -#pragma once - -#include <Disks/IDisk.h> -#include <Disks/DiskSelector.h> - -#include <Poco/Util/AbstractConfiguration.h> - -namespace DB -{ - -enum class VolumeType -{ - JBOD, +#pragma once + +#include <Disks/IDisk.h> +#include <Disks/DiskSelector.h> + +#include <Poco/Util/AbstractConfiguration.h> + +namespace DB +{ + +enum class VolumeType +{ + JBOD, RAID1, - SINGLE_DISK, - UNKNOWN -}; - + SINGLE_DISK, + UNKNOWN +}; + String volumeTypeToString(VolumeType t); -class IVolume; -using VolumePtr = std::shared_ptr<IVolume>; -using Volumes = std::vector<VolumePtr>; - -/** - * Disks group by some (user) criteria. For example, - * - VolumeJBOD("slow_disks", [d1, d2], 100) - * - VolumeJBOD("fast_disks", [d3, d4], 200) - * - * Here VolumeJBOD is one of implementations of IVolume. - * - * Different of implementations of this interface implement different reserve behaviour — - * VolumeJBOD reserves space on the next disk after the last used, other future implementations - * will reserve, for example, equal spaces on all disks. - */ -class IVolume : public Space -{ -public: +class IVolume; +using VolumePtr = std::shared_ptr<IVolume>; +using Volumes = std::vector<VolumePtr>; + +/** + * Disks group by some (user) criteria. For example, + * - VolumeJBOD("slow_disks", [d1, d2], 100) + * - VolumeJBOD("fast_disks", [d3, d4], 200) + * + * Here VolumeJBOD is one of implementations of IVolume. + * + * Different of implementations of this interface implement different reserve behaviour — + * VolumeJBOD reserves space on the next disk after the last used, other future implementations + * will reserve, for example, equal spaces on all disks. + */ +class IVolume : public Space +{ +public: IVolume(String name_, Disks disks_, size_t max_data_part_size_ = 0, bool perform_ttl_move_on_insert_ = true) : disks(std::move(disks_)) , name(name_) , max_data_part_size(max_data_part_size_) , perform_ttl_move_on_insert(perform_ttl_move_on_insert_) - { - } - - IVolume( - String name_, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - DiskSelectorPtr disk_selector - ); - - virtual ReservationPtr reserve(UInt64 bytes) override = 0; - - /// Volume name from config - const String & getName() const override { return name; } - virtual VolumeType getType() const = 0; - - /// Return biggest unreserved space across all disks - UInt64 getMaxUnreservedFreeSpace() const; - + { + } + + IVolume( + String name_, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + DiskSelectorPtr disk_selector + ); + + virtual ReservationPtr reserve(UInt64 bytes) override = 0; + + /// Volume name from config + const String & getName() const override { return name; } + virtual VolumeType getType() const = 0; + + /// Return biggest unreserved space across all disks + UInt64 getMaxUnreservedFreeSpace() const; + DiskPtr getDisk() const { return getDisk(0); } virtual DiskPtr getDisk(size_t i) const { return disks[i]; } - const Disks & getDisks() const { return disks; } - + const Disks & getDisks() const { return disks; } + /// Returns effective value of whether merges are allowed on this volume (true) or not (false). virtual bool areMergesAvoided() const { return false; } /// User setting for enabling and disabling merges on volume. virtual void setAvoidMergesUserOverride(bool /*avoid*/) {} -protected: - Disks disks; - const String name; +protected: + Disks disks; + const String name; public: /// Max size of reservation, zero means unlimited size @@ -80,8 +80,8 @@ public: /// Should a new data part be synchronously moved to a volume according to ttl on insert /// or move this part in background task asynchronously after insert. bool perform_ttl_move_on_insert = true; -}; - +}; + /// Reservation for multiple disks at once. Can be used in RAID1 implementation. class MultiDiskReservation : public IReservation { @@ -100,4 +100,4 @@ private: UInt64 size; }; -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h index 3a7d9a9393..bade6041ea 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h @@ -1,27 +1,27 @@ -#pragma once - -#include <Disks/IVolume.h> - -namespace DB -{ - -class SingleDiskVolume : public IVolume -{ -public: +#pragma once + +#include <Disks/IVolume.h> + +namespace DB +{ + +class SingleDiskVolume : public IVolume +{ +public: SingleDiskVolume(const String & name_, DiskPtr disk, size_t max_data_part_size_ = 0): IVolume(name_, {disk}, max_data_part_size_) - { - } - - ReservationPtr reserve(UInt64 bytes) override - { - return disks[0]->reserve(bytes); - } - - VolumeType getType() const override { return VolumeType::SINGLE_DISK; } - -}; - -using VolumeSingleDiskPtr = std::shared_ptr<SingleDiskVolume>; + { + } + + ReservationPtr reserve(UInt64 bytes) override + { + return disks[0]->reserve(bytes); + } + + VolumeType getType() const override { return VolumeType::SINGLE_DISK; } + +}; + +using VolumeSingleDiskPtr = std::shared_ptr<SingleDiskVolume>; using VolumesSingleDiskPtr = std::vector<VolumeSingleDiskPtr>; - -} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp index 1c69f1a8eb..f9cafbe545 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp @@ -1,34 +1,34 @@ -#include <DataStreams/NativeBlockInputStream.h> -#include <DataStreams/NativeBlockOutputStream.h> -#include <Formats/FormatFactory.h> - - -namespace DB -{ - -void registerInputFormatNative(FormatFactory & factory) -{ - factory.registerInputFormat("Native", []( - ReadBuffer & buf, - const Block & sample, - UInt64 /* max_block_size */, - FormatFactory::ReadCallback /* callback */, - const FormatSettings &) - { - return std::make_shared<NativeBlockInputStream>(buf, sample, 0); - }); -} - -void registerOutputFormatNative(FormatFactory & factory) -{ - factory.registerOutputFormat("Native", []( - WriteBuffer & buf, - const Block & sample, - FormatFactory::WriteCallback, - const FormatSettings &) - { - return std::make_shared<NativeBlockOutputStream>(buf, 0, sample); - }); -} - -} +#include <DataStreams/NativeBlockInputStream.h> +#include <DataStreams/NativeBlockOutputStream.h> +#include <Formats/FormatFactory.h> + + +namespace DB +{ + +void registerInputFormatNative(FormatFactory & factory) +{ + factory.registerInputFormat("Native", []( + ReadBuffer & buf, + const Block & sample, + UInt64 /* max_block_size */, + FormatFactory::ReadCallback /* callback */, + const FormatSettings &) + { + return std::make_shared<NativeBlockInputStream>(buf, sample, 0); + }); +} + +void registerOutputFormatNative(FormatFactory & factory) +{ + factory.registerOutputFormat("Native", []( + WriteBuffer & buf, + const Block & sample, + FormatFactory::WriteCallback, + const FormatSettings &) + { + return std::make_shared<NativeBlockOutputStream>(buf, 0, sample); + }); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp index 0e31886898..cfc357a055 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp @@ -1,13 +1,13 @@ -#include <Functions/FunctionFactory.h> -#include <Functions/toFixedString.h> - - -namespace DB -{ - -void registerFunctionFixedString(FunctionFactory & factory) -{ - factory.registerFunction<FunctionToFixedString>(); -} - -} +#include <Functions/FunctionFactory.h> +#include <Functions/toFixedString.h> + + +namespace DB +{ + +void registerFunctionFixedString(FunctionFactory & factory) +{ + factory.registerFunction<FunctionToFixedString>(); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp index d8e195329a..b3354c42fb 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp @@ -1,79 +1,79 @@ -#include <unistd.h> -#include <fcntl.h> - -#include <Common/ProfileEvents.h> -#include <Common/formatReadable.h> -#include <IO/MMapReadBufferFromFile.h> - - -namespace ProfileEvents -{ - extern const Event FileOpen; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; -} - - -void MMapReadBufferFromFile::open() -{ - ProfileEvents::increment(ProfileEvents::FileOpen); - - fd = ::open(file_name.c_str(), O_RDONLY | O_CLOEXEC); - - if (-1 == fd) - throwFromErrnoWithPath("Cannot open file " + file_name, file_name, - errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); -} - - -std::string MMapReadBufferFromFile::getFileName() const -{ - return file_name; -} - - -MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_) - : file_name(file_name_) -{ - open(); +#include <unistd.h> +#include <fcntl.h> + +#include <Common/ProfileEvents.h> +#include <Common/formatReadable.h> +#include <IO/MMapReadBufferFromFile.h> + + +namespace ProfileEvents +{ + extern const Event FileOpen; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; +} + + +void MMapReadBufferFromFile::open() +{ + ProfileEvents::increment(ProfileEvents::FileOpen); + + fd = ::open(file_name.c_str(), O_RDONLY | O_CLOEXEC); + + if (-1 == fd) + throwFromErrnoWithPath("Cannot open file " + file_name, file_name, + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); +} + + +std::string MMapReadBufferFromFile::getFileName() const +{ + return file_name; +} + + +MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_) + : file_name(file_name_) +{ + open(); mapped.set(fd, offset, length_); init(); -} - - -MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset) - : file_name(file_name_) -{ - open(); +} + + +MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset) + : file_name(file_name_) +{ + open(); mapped.set(fd, offset); init(); -} - - -MMapReadBufferFromFile::~MMapReadBufferFromFile() -{ - if (fd != -1) - close(); /// Exceptions will lead to std::terminate and that's Ok. -} - - -void MMapReadBufferFromFile::close() -{ - finish(); - - if (0 != ::close(fd)) - throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); - - fd = -1; - metric_increment.destroy(); -} - -} +} + + +MMapReadBufferFromFile::~MMapReadBufferFromFile() +{ + if (fd != -1) + close(); /// Exceptions will lead to std::terminate and that's Ok. +} + + +void MMapReadBufferFromFile::close() +{ + finish(); + + if (0 != ::close(fd)) + throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + + fd = -1; + metric_increment.destroy(); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h index c2ca6b726f..bc566a0489 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h @@ -1,40 +1,40 @@ -#pragma once - -#include <Common/CurrentMetrics.h> -#include <IO/MMapReadBufferFromFileDescriptor.h> - - -namespace CurrentMetrics -{ - extern const Metric OpenFileForRead; -} - - -namespace DB -{ - -class MMapReadBufferFromFile : public MMapReadBufferFromFileDescriptor -{ -public: - MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_); - - /// Map till end of file. - MMapReadBufferFromFile(const std::string & file_name_, size_t offset); - - ~MMapReadBufferFromFile() override; - - void close(); - - std::string getFileName() const override; - -private: - int fd = -1; - std::string file_name; - - CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead}; - - void open(); -}; - -} - +#pragma once + +#include <Common/CurrentMetrics.h> +#include <IO/MMapReadBufferFromFileDescriptor.h> + + +namespace CurrentMetrics +{ + extern const Metric OpenFileForRead; +} + + +namespace DB +{ + +class MMapReadBufferFromFile : public MMapReadBufferFromFileDescriptor +{ +public: + MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_); + + /// Map till end of file. + MMapReadBufferFromFile(const std::string & file_name_, size_t offset); + + ~MMapReadBufferFromFile() override; + + void close(); + + std::string getFileName() const override; + +private: + int fd = -1; + std::string file_name; + + CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead}; + + void open(); +}; + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp index c44c8cad2c..50e0fad026 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp @@ -1,43 +1,43 @@ -#include <Poco/Net/NetException.h> - -#include <IO/ReadBufferFromPocoSocket.h> -#include <Common/Exception.h> -#include <Common/NetException.h> -#include <Common/Stopwatch.h> +#include <Poco/Net/NetException.h> + +#include <IO/ReadBufferFromPocoSocket.h> +#include <Common/Exception.h> +#include <Common/NetException.h> +#include <Common/Stopwatch.h> #include <Common/ProfileEvents.h> #include <Common/CurrentMetrics.h> - - -namespace ProfileEvents -{ - extern const Event NetworkReceiveElapsedMicroseconds; + + +namespace ProfileEvents +{ + extern const Event NetworkReceiveElapsedMicroseconds; extern const Event NetworkReceiveBytes; -} - +} + namespace CurrentMetrics { extern const Metric NetworkReceive; } - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NETWORK_ERROR; - extern const int SOCKET_TIMEOUT; - extern const int CANNOT_READ_FROM_SOCKET; -} - - -bool ReadBufferFromPocoSocket::nextImpl() -{ - ssize_t bytes_read = 0; - Stopwatch watch; - - /// Add more details to exceptions. - try - { + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NETWORK_ERROR; + extern const int SOCKET_TIMEOUT; + extern const int CANNOT_READ_FROM_SOCKET; +} + + +bool ReadBufferFromPocoSocket::nextImpl() +{ + ssize_t bytes_read = 0; + Stopwatch watch; + + /// Add more details to exceptions. + try + { CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive); /// If async_callback is specified, and read will block, run async_callback and try again later. @@ -47,45 +47,45 @@ bool ReadBufferFromPocoSocket::nextImpl() async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description); bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size()); - } - catch (const Poco::Net::NetException & e) - { - throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); - } - catch (const Poco::TimeoutException &) - { - throw NetException("Timeout exceeded while reading from socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT); - } - catch (const Poco::IOException & e) - { - throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); - } - - if (bytes_read < 0) - throw NetException("Cannot read from socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); - - /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one - ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); + } + catch (const Poco::Net::NetException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + } + catch (const Poco::TimeoutException &) + { + throw NetException("Timeout exceeded while reading from socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT); + } + catch (const Poco::IOException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + } + + if (bytes_read < 0) + throw NetException("Cannot read from socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + + /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one + ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read); - - if (bytes_read) - working_buffer.resize(bytes_read); - else - return false; - - return true; -} - -ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size) + + if (bytes_read) + working_buffer.resize(bytes_read); + else + return false; + + return true; +} + +ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size) : BufferWithOwnMemory<ReadBuffer>(buf_size) , socket(socket_) , peer_address(socket.peerAddress()) , socket_description("socket (" + peer_address.toString() + ")") -{ -} - +{ +} + bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const -{ +{ if (available()) return true; @@ -93,6 +93,6 @@ bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const bool res = socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); return res; -} - -} +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h index d55a009db3..c60aafc7e2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h @@ -1,32 +1,32 @@ -#pragma once - +#pragma once + #include <IO/BufferWithOwnMemory.h> #include <IO/ReadBuffer.h> -#include <Poco/Net/Socket.h> - -namespace DB -{ - +#include <Poco/Net/Socket.h> + +namespace DB +{ + using AsyncCallback = std::function<void(int, Poco::Timespan, const std::string &)>; /// Works with the ready Poco::Net::Socket. Blocking operations. -class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer> -{ -protected: - Poco::Net::Socket & socket; - - /** For error messages. It is necessary to receive this address in advance, because, - * for example, if the connection is broken, the address will not be received anymore - * (getpeername will return an error). - */ - Poco::Net::SocketAddress peer_address; - - bool nextImpl() override; - -public: +class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer> +{ +protected: + Poco::Net::Socket & socket; + + /** For error messages. It is necessary to receive this address in advance, because, + * for example, if the connection is broken, the address will not be received anymore + * (getpeername will return an error). + */ + Poco::Net::SocketAddress peer_address; + + bool nextImpl() override; + +public: explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); - + bool poll(size_t timeout_microseconds) const; void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); } @@ -34,6 +34,6 @@ public: private: AsyncCallback async_callback; std::string socket_description; -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp index 5eb714c21c..a82e526bf3 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp @@ -1,45 +1,45 @@ #include <IO/TimeoutSetter.h> - -#include <common/logger_useful.h> - -namespace DB -{ +#include <common/logger_useful.h> -TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_, + +namespace DB +{ + +TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan send_timeout_, Poco::Timespan receive_timeout_, - bool limit_max_timeout) - : socket(socket_), send_timeout(send_timeout_), receive_timeout(receive_timeout_) -{ - old_send_timeout = socket.getSendTimeout(); - old_receive_timeout = socket.getReceiveTimeout(); - - if (!limit_max_timeout || old_send_timeout > send_timeout) - socket.setSendTimeout(send_timeout); - - if (!limit_max_timeout || old_receive_timeout > receive_timeout) - socket.setReceiveTimeout(receive_timeout); -} - + bool limit_max_timeout) + : socket(socket_), send_timeout(send_timeout_), receive_timeout(receive_timeout_) +{ + old_send_timeout = socket.getSendTimeout(); + old_receive_timeout = socket.getReceiveTimeout(); + + if (!limit_max_timeout || old_send_timeout > send_timeout) + socket.setSendTimeout(send_timeout); + + if (!limit_max_timeout || old_receive_timeout > receive_timeout) + socket.setReceiveTimeout(receive_timeout); +} + TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan timeout_, bool limit_max_timeout) - : TimeoutSetter(socket_, timeout_, timeout_, limit_max_timeout) -{ -} - -TimeoutSetter::~TimeoutSetter() -{ - try - { - socket.setSendTimeout(old_send_timeout); - socket.setReceiveTimeout(old_receive_timeout); - } + : TimeoutSetter(socket_, timeout_, timeout_, limit_max_timeout) +{ +} + +TimeoutSetter::~TimeoutSetter() +{ + try + { + socket.setSendTimeout(old_send_timeout); + socket.setReceiveTimeout(old_receive_timeout); + } catch (...) - { + { /// Sometimes caught on Mac OS X. This message can be safely ignored. /// If you are developer using Mac, please debug this error message by yourself. tryLogCurrentException("Client", "TimeoutSetter: Can't reset timeouts"); - } -} + } +} -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h index 3fef4b1c12..31c37ea07a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h @@ -1,30 +1,30 @@ -#pragma once - -#include <Poco/Net/StreamSocket.h> -#include <Poco/Timespan.h> - - -namespace DB -{ -/// Temporarily overrides socket send/receive timeouts and reset them back into destructor -/// If "limit_max_timeout" is true, timeouts could be only decreased (maxed by previous value). -struct TimeoutSetter -{ - TimeoutSetter(Poco::Net::StreamSocket & socket_, +#pragma once + +#include <Poco/Net/StreamSocket.h> +#include <Poco/Timespan.h> + + +namespace DB +{ +/// Temporarily overrides socket send/receive timeouts and reset them back into destructor +/// If "limit_max_timeout" is true, timeouts could be only decreased (maxed by previous value). +struct TimeoutSetter +{ + TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan send_timeout_, Poco::Timespan receive_timeout_, - bool limit_max_timeout = false); - + bool limit_max_timeout = false); + TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan timeout_, bool limit_max_timeout = false); - - ~TimeoutSetter(); - - Poco::Net::StreamSocket & socket; - - Poco::Timespan send_timeout; - Poco::Timespan receive_timeout; - - Poco::Timespan old_send_timeout; - Poco::Timespan old_receive_timeout; -}; -} + + ~TimeoutSetter(); + + Poco::Net::StreamSocket & socket; + + Poco::Timespan send_timeout; + Poco::Timespan receive_timeout; + + Poco::Timespan old_send_timeout; + Poco::Timespan old_receive_timeout; +}; +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp index 4f92572bdf..67cd7ba27d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp @@ -1,105 +1,105 @@ -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> - -#include <Common/ProfileEvents.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> + +#include <Common/ProfileEvents.h> #include <Common/MemoryTracker.h> - -#include <IO/WriteBufferFromFile.h> -#include <IO/WriteHelpers.h> - - -namespace ProfileEvents -{ - extern const Event FileOpen; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; -} - - -WriteBufferFromFile::WriteBufferFromFile( - const std::string & file_name_, - size_t buf_size, - int flags, - mode_t mode, - char * existing_memory, - size_t alignment) - : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_) -{ - ProfileEvents::increment(ProfileEvents::FileOpen); - -#ifdef __APPLE__ - bool o_direct = (flags != -1) && (flags & O_DIRECT); - if (o_direct) - flags = flags & ~O_DIRECT; -#endif - - fd = ::open(file_name.c_str(), flags == -1 ? O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC : flags | O_CLOEXEC, mode); - - if (-1 == fd) - throwFromErrnoWithPath("Cannot open file " + file_name, file_name, - errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); - -#ifdef __APPLE__ - if (o_direct) - { - if (fcntl(fd, F_NOCACHE, 1) == -1) - throwFromErrnoWithPath("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE); - } -#endif -} - - -/// Use pre-opened file descriptor. -WriteBufferFromFile::WriteBufferFromFile( + +#include <IO/WriteBufferFromFile.h> +#include <IO/WriteHelpers.h> + + +namespace ProfileEvents +{ + extern const Event FileOpen; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; +} + + +WriteBufferFromFile::WriteBufferFromFile( + const std::string & file_name_, + size_t buf_size, + int flags, + mode_t mode, + char * existing_memory, + size_t alignment) + : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_) +{ + ProfileEvents::increment(ProfileEvents::FileOpen); + +#ifdef __APPLE__ + bool o_direct = (flags != -1) && (flags & O_DIRECT); + if (o_direct) + flags = flags & ~O_DIRECT; +#endif + + fd = ::open(file_name.c_str(), flags == -1 ? O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC : flags | O_CLOEXEC, mode); + + if (-1 == fd) + throwFromErrnoWithPath("Cannot open file " + file_name, file_name, + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + +#ifdef __APPLE__ + if (o_direct) + { + if (fcntl(fd, F_NOCACHE, 1) == -1) + throwFromErrnoWithPath("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE); + } +#endif +} + + +/// Use pre-opened file descriptor. +WriteBufferFromFile::WriteBufferFromFile( int & fd_, - const std::string & original_file_name, - size_t buf_size, - char * existing_memory, - size_t alignment) - : - WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment), - file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) -{ + const std::string & original_file_name, + size_t buf_size, + char * existing_memory, + size_t alignment) + : + WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment), + file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) +{ fd_ = -1; -} - - -WriteBufferFromFile::~WriteBufferFromFile() -{ - if (fd < 0) - return; - +} + + +WriteBufferFromFile::~WriteBufferFromFile() +{ + if (fd < 0) + return; + /// FIXME move final flush into the caller MemoryTracker::LockExceptionInThread lock(VariableContext::Global); - + next(); - ::close(fd); -} - - -/// Close file before destruction of object. -void WriteBufferFromFile::close() -{ + ::close(fd); +} + + +/// Close file before destruction of object. +void WriteBufferFromFile::close() +{ if (fd < 0) return; - next(); - - if (0 != ::close(fd)) - throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); - - fd = -1; - metric_increment.destroy(); -} - -} + next(); + + if (0 != ::close(fd)) + throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + + fd = -1; + metric_increment.destroy(); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h index d28bc441c1..b7d5863811 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h @@ -1,59 +1,59 @@ -#pragma once - -#include <sys/types.h> - -#include <Common/CurrentMetrics.h> -#include <IO/WriteBufferFromFileDescriptor.h> - - -namespace CurrentMetrics -{ - extern const Metric OpenFileForWrite; -} - - -#ifndef O_DIRECT -#define O_DIRECT 00040000 -#endif - -namespace DB -{ - -/** Accepts path to file and opens it, or pre-opened file descriptor. - * Closes file by himself (thus "owns" a file descriptor). - */ -class WriteBufferFromFile : public WriteBufferFromFileDescriptor -{ -protected: - std::string file_name; - CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite}; - -public: - WriteBufferFromFile( - const std::string & file_name_, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - int flags = -1, - mode_t mode = 0666, - char * existing_memory = nullptr, - size_t alignment = 0); - - /// Use pre-opened file descriptor. - WriteBufferFromFile( +#pragma once + +#include <sys/types.h> + +#include <Common/CurrentMetrics.h> +#include <IO/WriteBufferFromFileDescriptor.h> + + +namespace CurrentMetrics +{ + extern const Metric OpenFileForWrite; +} + + +#ifndef O_DIRECT +#define O_DIRECT 00040000 +#endif + +namespace DB +{ + +/** Accepts path to file and opens it, or pre-opened file descriptor. + * Closes file by himself (thus "owns" a file descriptor). + */ +class WriteBufferFromFile : public WriteBufferFromFileDescriptor +{ +protected: + std::string file_name; + CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite}; + +public: + WriteBufferFromFile( + const std::string & file_name_, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + int flags = -1, + mode_t mode = 0666, + char * existing_memory = nullptr, + size_t alignment = 0); + + /// Use pre-opened file descriptor. + WriteBufferFromFile( int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. - const std::string & original_file_name = {}, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - char * existing_memory = nullptr, - size_t alignment = 0); - - ~WriteBufferFromFile() override; - - /// Close file before destruction of object. - void close(); - - std::string getFileName() const override - { - return file_name; - } -}; - -} + const std::string & original_file_name = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + char * existing_memory = nullptr, + size_t alignment = 0); + + ~WriteBufferFromFile() override; + + /// Close file before destruction of object. + void close(); + + std::string getFileName() const override + { + return file_name; + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp index 4c5e620c0e..2b9cbb88cd 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp @@ -1,11 +1,11 @@ -#include <IO/WriteBufferFromFileBase.h> - -namespace DB -{ - -WriteBufferFromFileBase::WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment) - : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment) -{ -} - -} +#include <IO/WriteBufferFromFileBase.h> + +namespace DB +{ + +WriteBufferFromFileBase::WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment) + : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment) +{ +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h index fc43dbcd2b..d35b69a7df 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h @@ -1,22 +1,22 @@ -#pragma once - -#include <string> -#include <fcntl.h> - -#include <IO/WriteBuffer.h> -#include <IO/BufferWithOwnMemory.h> - -namespace DB -{ - -class WriteBufferFromFileBase : public BufferWithOwnMemory<WriteBuffer> -{ -public: - WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment); - ~WriteBufferFromFileBase() override = default; - - void sync() override = 0; - virtual std::string getFileName() const = 0; -}; - -} +#pragma once + +#include <string> +#include <fcntl.h> + +#include <IO/WriteBuffer.h> +#include <IO/BufferWithOwnMemory.h> + +namespace DB +{ + +class WriteBufferFromFileBase : public BufferWithOwnMemory<WriteBuffer> +{ +public: + WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment); + ~WriteBufferFromFileBase() override = default; + + void sync() override = 0; + virtual std::string getFileName() const = 0; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp index 52fab90351..cd265653bb 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp @@ -1,137 +1,137 @@ -#include <unistd.h> -#include <errno.h> -#include <cassert> +#include <unistd.h> +#include <errno.h> +#include <cassert> #include <sys/types.h> #include <sys/stat.h> - -#include <Common/Exception.h> -#include <Common/ProfileEvents.h> -#include <Common/CurrentMetrics.h> -#include <Common/Stopwatch.h> + +#include <Common/Exception.h> +#include <Common/ProfileEvents.h> +#include <Common/CurrentMetrics.h> +#include <Common/Stopwatch.h> #include <Common/MemoryTracker.h> - -#include <IO/WriteBufferFromFileDescriptor.h> -#include <IO/WriteHelpers.h> - - -namespace ProfileEvents -{ - extern const Event WriteBufferFromFileDescriptorWrite; - extern const Event WriteBufferFromFileDescriptorWriteFailed; - extern const Event WriteBufferFromFileDescriptorWriteBytes; - extern const Event DiskWriteElapsedMicroseconds; -} - -namespace CurrentMetrics -{ - extern const Metric Write; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; - extern const int CANNOT_FSYNC; - extern const int CANNOT_SEEK_THROUGH_FILE; - extern const int CANNOT_TRUNCATE_FILE; + +#include <IO/WriteBufferFromFileDescriptor.h> +#include <IO/WriteHelpers.h> + + +namespace ProfileEvents +{ + extern const Event WriteBufferFromFileDescriptorWrite; + extern const Event WriteBufferFromFileDescriptorWriteFailed; + extern const Event WriteBufferFromFileDescriptorWriteBytes; + extern const Event DiskWriteElapsedMicroseconds; +} + +namespace CurrentMetrics +{ + extern const Metric Write; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; + extern const int CANNOT_FSYNC; + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int CANNOT_TRUNCATE_FILE; extern const int CANNOT_FSTAT; -} - - -void WriteBufferFromFileDescriptor::nextImpl() -{ - if (!offset()) - return; - - Stopwatch watch; - - size_t bytes_written = 0; - while (bytes_written != offset()) - { - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWrite); - - ssize_t res = 0; - { - CurrentMetrics::Increment metric_increment{CurrentMetrics::Write}; - res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written); - } - - if ((-1 == res || 0 == res) && errno != EINTR) - { - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); - throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(), - ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); - } - - if (res > 0) - bytes_written += res; - } - - ProfileEvents::increment(ProfileEvents::DiskWriteElapsedMicroseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteBytes, bytes_written); -} - - -/// Name or some description of file. -std::string WriteBufferFromFileDescriptor::getFileName() const -{ - return "(fd = " + toString(fd) + ")"; -} - - -WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( - int fd_, - size_t buf_size, - char * existing_memory, - size_t alignment) - : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {} - - -WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() -{ +} + + +void WriteBufferFromFileDescriptor::nextImpl() +{ + if (!offset()) + return; + + Stopwatch watch; + + size_t bytes_written = 0; + while (bytes_written != offset()) + { + ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWrite); + + ssize_t res = 0; + { + CurrentMetrics::Increment metric_increment{CurrentMetrics::Write}; + res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written); + } + + if ((-1 == res || 0 == res) && errno != EINTR) + { + ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); + throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); + } + + if (res > 0) + bytes_written += res; + } + + ProfileEvents::increment(ProfileEvents::DiskWriteElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteBytes, bytes_written); +} + + +/// Name or some description of file. +std::string WriteBufferFromFileDescriptor::getFileName() const +{ + return "(fd = " + toString(fd) + ")"; +} + + +WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( + int fd_, + size_t buf_size, + char * existing_memory, + size_t alignment) + : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {} + + +WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() +{ if (fd < 0) - { + { assert(!offset() && "attempt to write after close"); return; - } + } /// FIXME move final flush into the caller MemoryTracker::LockExceptionInThread lock(VariableContext::Global); next(); -} - - -void WriteBufferFromFileDescriptor::sync() -{ - /// If buffer has pending data - write it. - next(); - - /// Request OS to sync data with storage medium. - int res = fsync(fd); - if (-1 == res) - throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); -} - - -off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) -{ - off_t res = lseek(fd, offset, whence); - if (-1 == res) - throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), - ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - return res; -} - - -void WriteBufferFromFileDescriptor::truncate(off_t length) -{ - int res = ftruncate(fd, length); - if (-1 == res) - throwFromErrnoWithPath("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE); -} - +} + + +void WriteBufferFromFileDescriptor::sync() +{ + /// If buffer has pending data - write it. + next(); + + /// Request OS to sync data with storage medium. + int res = fsync(fd); + if (-1 == res) + throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); +} + + +off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) +{ + off_t res = lseek(fd, offset, whence); + if (-1 == res) + throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + return res; +} + + +void WriteBufferFromFileDescriptor::truncate(off_t length) +{ + int res = ftruncate(fd, length); + if (-1 == res) + throwFromErrnoWithPath("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE); +} + off_t WriteBufferFromFileDescriptor::size() { @@ -140,6 +140,6 @@ off_t WriteBufferFromFileDescriptor::size() if (-1 == res) throwFromErrnoWithPath("Cannot execute fstat " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSTAT); return buf.st_size; -} +} } diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h index ff64661faa..18c0ac64f6 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h @@ -1,51 +1,51 @@ -#pragma once - -#include <IO/WriteBufferFromFileBase.h> - - -namespace DB -{ - -/** Use ready file descriptor. Does not open or close a file. - */ -class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase -{ -protected: - int fd; - - void nextImpl() override; - - /// Name or some description of file. - std::string getFileName() const override; - -public: - WriteBufferFromFileDescriptor( - int fd_ = -1, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - char * existing_memory = nullptr, - size_t alignment = 0); - - /** Could be used before initialization if needed 'fd' was not passed to constructor. - * It's not possible to change 'fd' during work. - */ - void setFD(int fd_) - { - fd = fd_; - } - - ~WriteBufferFromFileDescriptor() override; - - int getFD() const - { - return fd; - } - - void sync() override; - - off_t seek(off_t offset, int whence); - void truncate(off_t length); +#pragma once + +#include <IO/WriteBufferFromFileBase.h> + + +namespace DB +{ + +/** Use ready file descriptor. Does not open or close a file. + */ +class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase +{ +protected: + int fd; + + void nextImpl() override; + + /// Name or some description of file. + std::string getFileName() const override; + +public: + WriteBufferFromFileDescriptor( + int fd_ = -1, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + char * existing_memory = nullptr, + size_t alignment = 0); + + /** Could be used before initialization if needed 'fd' was not passed to constructor. + * It's not possible to change 'fd' during work. + */ + void setFD(int fd_) + { + fd = fd_; + } + + ~WriteBufferFromFileDescriptor() override; + + int getFD() const + { + return fd; + } + + void sync() override; + + off_t seek(off_t offset, int whence); + void truncate(off_t length); off_t size(); -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp index 49aed03c03..3d9c70f039 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp @@ -1,29 +1,29 @@ -#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h> - -namespace ProfileEvents -{ - extern const Event CannotWriteToWriteBufferDiscard; -} - -namespace DB -{ - -void WriteBufferFromFileDescriptorDiscardOnFailure::nextImpl() -{ - size_t bytes_written = 0; - while (bytes_written != offset()) - { - ssize_t res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written); - - if ((-1 == res || 0 == res) && errno != EINTR) - { - ProfileEvents::increment(ProfileEvents::CannotWriteToWriteBufferDiscard); - break; /// Discard - } - - if (res > 0) - bytes_written += res; - } -} - -} +#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h> + +namespace ProfileEvents +{ + extern const Event CannotWriteToWriteBufferDiscard; +} + +namespace DB +{ + +void WriteBufferFromFileDescriptorDiscardOnFailure::nextImpl() +{ + size_t bytes_written = 0; + while (bytes_written != offset()) + { + ssize_t res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written); + + if ((-1 == res || 0 == res) && errno != EINTR) + { + ProfileEvents::increment(ProfileEvents::CannotWriteToWriteBufferDiscard); + break; /// Discard + } + + if (res > 0) + bytes_written += res; + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h index 9c621095c2..53e01c3cb2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h @@ -1,23 +1,23 @@ -#pragma once - -#include <IO/WriteBufferFromFileDescriptor.h> - - -namespace DB -{ - -/** Write to file descriptor but drop the data if write would block or fail. - * To use within signal handler. Motivating example: a signal handler invoked during execution of malloc - * should not block because some mutex (or even worse - a spinlock) may be held. - */ -class WriteBufferFromFileDescriptorDiscardOnFailure : public WriteBufferFromFileDescriptor -{ -protected: - void nextImpl() override; - -public: - using WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor; - ~WriteBufferFromFileDescriptorDiscardOnFailure() override {} -}; - -} +#pragma once + +#include <IO/WriteBufferFromFileDescriptor.h> + + +namespace DB +{ + +/** Write to file descriptor but drop the data if write would block or fail. + * To use within signal handler. Motivating example: a signal handler invoked during execution of malloc + * should not block because some mutex (or even worse - a spinlock) may be held. + */ +class WriteBufferFromFileDescriptorDiscardOnFailure : public WriteBufferFromFileDescriptor +{ +protected: + void nextImpl() override; + +public: + using WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor; + ~WriteBufferFromFileDescriptorDiscardOnFailure() override {} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp index 3db8d785c6..a0e4de4c83 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp @@ -1,89 +1,89 @@ -#include <Poco/Net/NetException.h> - -#include <IO/WriteBufferFromPocoSocket.h> - -#include <Common/Exception.h> -#include <Common/NetException.h> -#include <Common/Stopwatch.h> +#include <Poco/Net/NetException.h> + +#include <IO/WriteBufferFromPocoSocket.h> + +#include <Common/Exception.h> +#include <Common/NetException.h> +#include <Common/Stopwatch.h> #include <Common/MemoryTracker.h> #include <Common/ProfileEvents.h> #include <Common/CurrentMetrics.h> - - -namespace ProfileEvents -{ - extern const Event NetworkSendElapsedMicroseconds; + + +namespace ProfileEvents +{ + extern const Event NetworkSendElapsedMicroseconds; extern const Event NetworkSendBytes; -} - +} + namespace CurrentMetrics { extern const Metric NetworkSend; } - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NETWORK_ERROR; - extern const int SOCKET_TIMEOUT; - extern const int CANNOT_WRITE_TO_SOCKET; -} - - -void WriteBufferFromPocoSocket::nextImpl() -{ - if (!offset()) - return; - - Stopwatch watch; - - size_t bytes_written = 0; - while (bytes_written < offset()) - { - ssize_t res = 0; - - /// Add more details to exceptions. - try - { + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NETWORK_ERROR; + extern const int SOCKET_TIMEOUT; + extern const int CANNOT_WRITE_TO_SOCKET; +} + + +void WriteBufferFromPocoSocket::nextImpl() +{ + if (!offset()) + return; + + Stopwatch watch; + + size_t bytes_written = 0; + while (bytes_written < offset()) + { + ssize_t res = 0; + + /// Add more details to exceptions. + try + { CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkSend); - res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written); - } - catch (const Poco::Net::NetException & e) - { - throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); - } - catch (const Poco::TimeoutException &) - { - throw NetException("Timeout exceeded while writing to socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT); - } - catch (const Poco::IOException & e) - { - throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); - } - - if (res < 0) - throw NetException("Cannot write to socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_WRITE_TO_SOCKET); - - bytes_written += res; - } - - ProfileEvents::increment(ProfileEvents::NetworkSendElapsedMicroseconds, watch.elapsedMicroseconds()); + res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written); + } + catch (const Poco::Net::NetException & e) + { + throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + } + catch (const Poco::TimeoutException &) + { + throw NetException("Timeout exceeded while writing to socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT); + } + catch (const Poco::IOException & e) + { + throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + } + + if (res < 0) + throw NetException("Cannot write to socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_WRITE_TO_SOCKET); + + bytes_written += res; + } + + ProfileEvents::increment(ProfileEvents::NetworkSendElapsedMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::NetworkSendBytes, bytes_written); -} - -WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size) - : BufferWithOwnMemory<WriteBuffer>(buf_size), socket(socket_), peer_address(socket.peerAddress()) -{ -} - -WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket() -{ +} + +WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size) + : BufferWithOwnMemory<WriteBuffer>(buf_size), socket(socket_), peer_address(socket.peerAddress()) +{ +} + +WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket() +{ /// FIXME move final flush into the caller MemoryTracker::LockExceptionInThread lock(VariableContext::Global); next(); -} - -} +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h index bb0a8bd980..6f5142086b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h @@ -1,34 +1,34 @@ -#pragma once - -#include <Poco/Net/Socket.h> - -#include <IO/WriteBuffer.h> -#include <IO/BufferWithOwnMemory.h> - - -namespace DB -{ - -/** Works with the ready Poco::Net::Socket. Blocking operations. - */ -class WriteBufferFromPocoSocket : public BufferWithOwnMemory<WriteBuffer> -{ -protected: - Poco::Net::Socket & socket; - - /** For error messages. It is necessary to receive this address in advance, because, - * for example, if the connection is broken, the address will not be received anymore - * (getpeername will return an error). - */ - Poco::Net::SocketAddress peer_address; - - - void nextImpl() override; - -public: - WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); - - ~WriteBufferFromPocoSocket() override; -}; - -} +#pragma once + +#include <Poco/Net/Socket.h> + +#include <IO/WriteBuffer.h> +#include <IO/BufferWithOwnMemory.h> + + +namespace DB +{ + +/** Works with the ready Poco::Net::Socket. Blocking operations. + */ +class WriteBufferFromPocoSocket : public BufferWithOwnMemory<WriteBuffer> +{ +protected: + Poco::Net::Socket & socket; + + /** For error messages. It is necessary to receive this address in advance, because, + * for example, if the connection is broken, the address will not be received anymore + * (getpeername will return an error). + */ + Poco::Net::SocketAddress peer_address; + + + void nextImpl() override; + +public: + WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + + ~WriteBufferFromPocoSocket() override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h index 512d168ffc..dc94c2981b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h @@ -1,69 +1,69 @@ -#pragma once - -#include <array> - -#include <Common/SipHash.h> -#include <Common/Arena.h> -#include <Common/HashTable/Hash.h> -#include <Common/memcpySmall.h> -#include <Common/assert_cast.h> -#include <Core/Defines.h> -#include <common/StringRef.h> -#include <Columns/IColumn.h> -#include <Columns/ColumnsNumber.h> -#include <Columns/ColumnFixedString.h> -#include <Columns/ColumnLowCardinality.h> - +#pragma once + +#include <array> + +#include <Common/SipHash.h> +#include <Common/Arena.h> +#include <Common/HashTable/Hash.h> +#include <Common/memcpySmall.h> +#include <Common/assert_cast.h> +#include <Core/Defines.h> +#include <common/StringRef.h> +#include <Columns/IColumn.h> +#include <Columns/ColumnsNumber.h> +#include <Columns/ColumnFixedString.h> +#include <Columns/ColumnLowCardinality.h> + #if defined(__SSSE3__) && !defined(MEMORY_SANITIZER) #include <tmmintrin.h> #endif - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -using Sizes = std::vector<size_t>; - -/// When packing the values of nullable columns at a given row, we have to -/// store the fact that these values are nullable or not. This is achieved -/// by encoding this information as a bitmap. Let S be the size in bytes of -/// a packed values binary blob and T the number of bytes we may place into -/// this blob, the size that the bitmap shall occupy in the blob is equal to: -/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for -/// each value of S, the corresponding value of T, and the bitmap size: -/// -/// 32,28,4 -/// 16,14,2 -/// 8,7,1 -/// 4,3,1 -/// 2,1,1 -/// - -namespace -{ - -template <typename T> -constexpr auto getBitmapSize() -{ - return - (sizeof(T) == 32) ? - 4 : - (sizeof(T) == 16) ? - 2 : - ((sizeof(T) == 8) ? - 1 : - ((sizeof(T) == 4) ? - 1 : - ((sizeof(T) == 2) ? - 1 : - 0))); -} - -} - + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +using Sizes = std::vector<size_t>; + +/// When packing the values of nullable columns at a given row, we have to +/// store the fact that these values are nullable or not. This is achieved +/// by encoding this information as a bitmap. Let S be the size in bytes of +/// a packed values binary blob and T the number of bytes we may place into +/// this blob, the size that the bitmap shall occupy in the blob is equal to: +/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for +/// each value of S, the corresponding value of T, and the bitmap size: +/// +/// 32,28,4 +/// 16,14,2 +/// 8,7,1 +/// 4,3,1 +/// 2,1,1 +/// + +namespace +{ + +template <typename T> +constexpr auto getBitmapSize() +{ + return + (sizeof(T) == 32) ? + 4 : + (sizeof(T) == 16) ? + 2 : + ((sizeof(T) == 8) ? + 1 : + ((sizeof(T) == 4) ? + 1 : + ((sizeof(T) == 2) ? + 1 : + 0))); +} + +} + template<typename T, size_t step> void fillFixedBatch(size_t num_rows, const T * source, T * dest) { @@ -104,7 +104,7 @@ void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S /// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the /// binary blob. Keys are placed starting from the longest one. -template <typename T> +template <typename T> void packFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, PaddedPODArray<T> & out) { size_t offset = 0; @@ -116,194 +116,194 @@ void packFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S } template <typename T> -using KeysNullMap = std::array<UInt8, getBitmapSize<T>()>; - -/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the -/// binary blob, they are disposed in it consecutively. -template <typename T, bool has_low_cardinality = false> -static inline T ALWAYS_INLINE packFixed( - size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr, - const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr) -{ +using KeysNullMap = std::array<UInt8, getBitmapSize<T>()>; + +/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the +/// binary blob, they are disposed in it consecutively. +template <typename T, bool has_low_cardinality = false> +static inline T ALWAYS_INLINE packFixed( + size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, + const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr, + const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr) +{ T key{}; char * bytes = reinterpret_cast<char *>(&key); - size_t offset = 0; - - for (size_t j = 0; j < keys_size; ++j) - { - size_t index = i; - const IColumn * column = key_columns[j]; - if constexpr (has_low_cardinality) - { - if (const IColumn * positions = (*low_cardinality_positions)[j]) - { - switch ((*low_cardinality_sizes)[j]) - { - case sizeof(UInt8): index = assert_cast<const ColumnUInt8 *>(positions)->getElement(i); break; - case sizeof(UInt16): index = assert_cast<const ColumnUInt16 *>(positions)->getElement(i); break; - case sizeof(UInt32): index = assert_cast<const ColumnUInt32 *>(positions)->getElement(i); break; - case sizeof(UInt64): index = assert_cast<const ColumnUInt64 *>(positions)->getElement(i); break; - default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); - } - } - } - - switch (key_sizes[j]) - { - case 1: - { - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index, 1); - offset += 1; - } - break; - case 2: - if constexpr (sizeof(T) >= 2) /// To avoid warning about memcpy exceeding object size. - { - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<2>() + index * 2, 2); - offset += 2; - } - break; - case 4: - if constexpr (sizeof(T) >= 4) - { - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<4>() + index * 4, 4); - offset += 4; - } - break; - case 8: - if constexpr (sizeof(T) >= 8) - { - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<8>() + index * 8, 8); - offset += 8; - } - break; - default: - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]); - offset += key_sizes[j]; - } - } - - return key; -} - -/// Similar as above but supports nullable values. -template <typename T> -static inline T ALWAYS_INLINE packFixed( - size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - const KeysNullMap<T> & bitmap) -{ - union - { - T key; - char bytes[sizeof(key)] = {}; - }; - - size_t offset = 0; - - static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value; - static constexpr bool has_bitmap = bitmap_size > 0; - - if (has_bitmap) - { - memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8)); - offset += bitmap_size; - } - - for (size_t j = 0; j < keys_size; ++j) - { - bool is_null; - - if (!has_bitmap) - is_null = false; - else - { - size_t bucket = j / 8; - size_t off = j % 8; - is_null = ((bitmap[bucket] >> off) & 1) == 1; - } - - if (is_null) - continue; - - switch (key_sizes[j]) - { - case 1: - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i, 1); - offset += 1; - break; - case 2: - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<2>() + i * 2, 2); - offset += 2; - break; - case 4: - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<4>() + i * 4, 4); - offset += 4; - break; - case 8: - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<8>() + i * 8, 8); - offset += 8; - break; - default: - memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]); - offset += key_sizes[j]; - } - } - - return key; -} - - -/// Hash a set of keys into a UInt128 value. -static inline UInt128 ALWAYS_INLINE hash128( - size_t i, size_t keys_size, const ColumnRawPtrs & key_columns) -{ - UInt128 key; - SipHash hash; - - for (size_t j = 0; j < keys_size; ++j) - key_columns[j]->updateHashWithValue(i, hash); - + size_t offset = 0; + + for (size_t j = 0; j < keys_size; ++j) + { + size_t index = i; + const IColumn * column = key_columns[j]; + if constexpr (has_low_cardinality) + { + if (const IColumn * positions = (*low_cardinality_positions)[j]) + { + switch ((*low_cardinality_sizes)[j]) + { + case sizeof(UInt8): index = assert_cast<const ColumnUInt8 *>(positions)->getElement(i); break; + case sizeof(UInt16): index = assert_cast<const ColumnUInt16 *>(positions)->getElement(i); break; + case sizeof(UInt32): index = assert_cast<const ColumnUInt32 *>(positions)->getElement(i); break; + case sizeof(UInt64): index = assert_cast<const ColumnUInt64 *>(positions)->getElement(i); break; + default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); + } + } + } + + switch (key_sizes[j]) + { + case 1: + { + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index, 1); + offset += 1; + } + break; + case 2: + if constexpr (sizeof(T) >= 2) /// To avoid warning about memcpy exceeding object size. + { + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<2>() + index * 2, 2); + offset += 2; + } + break; + case 4: + if constexpr (sizeof(T) >= 4) + { + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<4>() + index * 4, 4); + offset += 4; + } + break; + case 8: + if constexpr (sizeof(T) >= 8) + { + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<8>() + index * 8, 8); + offset += 8; + } + break; + default: + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]); + offset += key_sizes[j]; + } + } + + return key; +} + +/// Similar as above but supports nullable values. +template <typename T> +static inline T ALWAYS_INLINE packFixed( + size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, + const KeysNullMap<T> & bitmap) +{ + union + { + T key; + char bytes[sizeof(key)] = {}; + }; + + size_t offset = 0; + + static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value; + static constexpr bool has_bitmap = bitmap_size > 0; + + if (has_bitmap) + { + memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8)); + offset += bitmap_size; + } + + for (size_t j = 0; j < keys_size; ++j) + { + bool is_null; + + if (!has_bitmap) + is_null = false; + else + { + size_t bucket = j / 8; + size_t off = j % 8; + is_null = ((bitmap[bucket] >> off) & 1) == 1; + } + + if (is_null) + continue; + + switch (key_sizes[j]) + { + case 1: + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i, 1); + offset += 1; + break; + case 2: + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<2>() + i * 2, 2); + offset += 2; + break; + case 4: + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<4>() + i * 4, 4); + offset += 4; + break; + case 8: + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<8>() + i * 8, 8); + offset += 8; + break; + default: + memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]); + offset += key_sizes[j]; + } + } + + return key; +} + + +/// Hash a set of keys into a UInt128 value. +static inline UInt128 ALWAYS_INLINE hash128( + size_t i, size_t keys_size, const ColumnRawPtrs & key_columns) +{ + UInt128 key; + SipHash hash; + + for (size_t j = 0; j < keys_size; ++j) + key_columns[j]->updateHashWithValue(i, hash); + hash.get128(key); - - return key; -} - - -/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. -static inline StringRef * ALWAYS_INLINE placeKeysInPool( - size_t keys_size, StringRefs & keys, Arena & pool) -{ - for (size_t j = 0; j < keys_size; ++j) - { - char * place = pool.alloc(keys[j].size); - memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size); - keys[j].data = place; - } - - /// Place the StringRefs on the newly copied keys in the pool. - char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef)); - memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef)); - - return reinterpret_cast<StringRef *>(res); -} - - -/** Serialize keys into a continuous chunk of memory. - */ -static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( - size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool) -{ - const char * begin = nullptr; - - size_t sum_size = 0; - for (size_t j = 0; j < keys_size; ++j) - sum_size += key_columns[j]->serializeValueIntoArena(i, pool, begin).size; - - return {begin, sum_size}; -} - - + + return key; +} + + +/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. +static inline StringRef * ALWAYS_INLINE placeKeysInPool( + size_t keys_size, StringRefs & keys, Arena & pool) +{ + for (size_t j = 0; j < keys_size; ++j) + { + char * place = pool.alloc(keys[j].size); + memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size); + keys[j].data = place; + } + + /// Place the StringRefs on the newly copied keys in the pool. + char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef)); + memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef)); + + return reinterpret_cast<StringRef *>(res); +} + + +/** Serialize keys into a continuous chunk of memory. + */ +static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( + size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool) +{ + const char * begin = nullptr; + + size_t sum_size = 0; + for (size_t j = 0; j < keys_size; ++j) + sum_size += key_columns[j]->serializeValueIntoArena(i, pool, begin).size; + + return {begin, sum_size}; +} + + /** Pack elements with shuffle instruction. * See the explanation in ColumnsHashing.h */ @@ -333,7 +333,7 @@ static T inline packFixedShuffle( T out; __builtin_memcpy(&out, &res, sizeof(T)); return out; -} +} #endif } diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h index e64abbb16a..d7b8ebca83 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h @@ -1,383 +1,383 @@ -#pragma once - -#include <mutex> -#include <memory> -#include <functional> - -#include <common/logger_useful.h> - -#include <common/StringRef.h> -#include <Common/Arena.h> -#include <Common/HashTable/FixedHashMap.h> -#include <Common/HashTable/HashMap.h> -#include <Common/HashTable/TwoLevelHashMap.h> -#include <Common/HashTable/StringHashMap.h> -#include <Common/HashTable/TwoLevelStringHashMap.h> - -#include <Common/ThreadPool.h> -#include <Common/ColumnsHashing.h> -#include <Common/assert_cast.h> -#include <Common/filesystemHelpers.h> - -#include <DataStreams/IBlockStream_fwd.h> -#include <DataStreams/SizeLimits.h> - +#pragma once + +#include <mutex> +#include <memory> +#include <functional> + +#include <common/logger_useful.h> + +#include <common/StringRef.h> +#include <Common/Arena.h> +#include <Common/HashTable/FixedHashMap.h> +#include <Common/HashTable/HashMap.h> +#include <Common/HashTable/TwoLevelHashMap.h> +#include <Common/HashTable/StringHashMap.h> +#include <Common/HashTable/TwoLevelStringHashMap.h> + +#include <Common/ThreadPool.h> +#include <Common/ColumnsHashing.h> +#include <Common/assert_cast.h> +#include <Common/filesystemHelpers.h> + +#include <DataStreams/IBlockStream_fwd.h> +#include <DataStreams/SizeLimits.h> + #include <Disks/SingleDiskVolume.h> -#include <Interpreters/AggregateDescription.h> -#include <Interpreters/AggregationCommon.h> +#include <Interpreters/AggregateDescription.h> +#include <Interpreters/AggregationCommon.h> //#include <Interpreters/JIT/compileFunction.h> - -#include <Columns/ColumnString.h> -#include <Columns/ColumnFixedString.h> -#include <Columns/ColumnAggregateFunction.h> -#include <Columns/ColumnVector.h> -#include <Columns/ColumnNullable.h> -#include <Columns/ColumnLowCardinality.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; -} - -class IBlockOutputStream; - -/** Different data structures that can be used for aggregation - * For efficiency, the aggregation data itself is put into the pool. - * Data and pool ownership (states of aggregate functions) - * is acquired later - in `convertToBlocks` function, by the ColumnAggregateFunction object. - * - * Most data structures exist in two versions: normal and two-level (TwoLevel). - * A two-level hash table works a little slower with a small number of different keys, - * but with a large number of different keys scales better, because it allows - * parallelize some operations (merging, post-processing) in a natural way. - * - * To ensure efficient work over a wide range of conditions, - * first single-level hash tables are used, - * and when the number of different keys is large enough, - * they are converted to two-level ones. - * - * PS. There are many different approaches to the effective implementation of parallel and distributed aggregation, - * best suited for different cases, and this approach is just one of them, chosen for a combination of reasons. - */ - -using AggregatedDataWithoutKey = AggregateDataPtr; - + +#include <Columns/ColumnString.h> +#include <Columns/ColumnFixedString.h> +#include <Columns/ColumnAggregateFunction.h> +#include <Columns/ColumnVector.h> +#include <Columns/ColumnNullable.h> +#include <Columns/ColumnLowCardinality.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; +} + +class IBlockOutputStream; + +/** Different data structures that can be used for aggregation + * For efficiency, the aggregation data itself is put into the pool. + * Data and pool ownership (states of aggregate functions) + * is acquired later - in `convertToBlocks` function, by the ColumnAggregateFunction object. + * + * Most data structures exist in two versions: normal and two-level (TwoLevel). + * A two-level hash table works a little slower with a small number of different keys, + * but with a large number of different keys scales better, because it allows + * parallelize some operations (merging, post-processing) in a natural way. + * + * To ensure efficient work over a wide range of conditions, + * first single-level hash tables are used, + * and when the number of different keys is large enough, + * they are converted to two-level ones. + * + * PS. There are many different approaches to the effective implementation of parallel and distributed aggregation, + * best suited for different cases, and this approach is just one of them, chosen for a combination of reasons. + */ + +using AggregatedDataWithoutKey = AggregateDataPtr; + using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize<UInt8, AggregateDataPtr>; using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap<UInt16, AggregateDataPtr>; - -using AggregatedDataWithUInt32Key = HashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>; -using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>; - -using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>; - -using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>; - -using AggregatedDataWithKeys128 = HashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>; + +using AggregatedDataWithUInt32Key = HashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>; +using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>; + +using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>; + +using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>; + +using AggregatedDataWithKeys128 = HashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>; using AggregatedDataWithKeys256 = HashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>; - -using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>; -using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>; - -using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap<AggregateDataPtr>; - -using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr>; - -using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>; + +using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>; +using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>; + +using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap<AggregateDataPtr>; + +using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr>; + +using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>; using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>; - -/** Variants with better hash function, using more than 32 bits for hash. - * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, - * but we keep in memory and merge only sub-partition of them simultaneously. - * TODO We need to switch for better hash function not only for external aggregation, - * but also for huge aggregation results on machines with terabytes of RAM. - */ - -using AggregatedDataWithUInt64KeyHash64 = HashMap<UInt64, AggregateDataPtr, DefaultHash<UInt64>>; -using AggregatedDataWithStringKeyHash64 = HashMapWithSavedHash<StringRef, AggregateDataPtr, StringRefHash64>; -using AggregatedDataWithKeys128Hash64 = HashMap<UInt128, AggregateDataPtr, UInt128Hash>; + +/** Variants with better hash function, using more than 32 bits for hash. + * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, + * but we keep in memory and merge only sub-partition of them simultaneously. + * TODO We need to switch for better hash function not only for external aggregation, + * but also for huge aggregation results on machines with terabytes of RAM. + */ + +using AggregatedDataWithUInt64KeyHash64 = HashMap<UInt64, AggregateDataPtr, DefaultHash<UInt64>>; +using AggregatedDataWithStringKeyHash64 = HashMapWithSavedHash<StringRef, AggregateDataPtr, StringRefHash64>; +using AggregatedDataWithKeys128Hash64 = HashMap<UInt128, AggregateDataPtr, UInt128Hash>; using AggregatedDataWithKeys256Hash64 = HashMap<UInt256, AggregateDataPtr, UInt256Hash>; - -template <typename Base> -struct AggregationDataWithNullKey : public Base -{ - using Base::Base; - - bool & hasNullKeyData() { return has_null_key_data; } - AggregateDataPtr & getNullKeyData() { return null_key_data; } - bool hasNullKeyData() const { return has_null_key_data; } - const AggregateDataPtr & getNullKeyData() const { return null_key_data; } - size_t size() const { return Base::size() + (has_null_key_data ? 1 : 0); } - bool empty() const { return Base::empty() && !has_null_key_data; } - void clear() - { - Base::clear(); - has_null_key_data = false; - } - void clearAndShrink() - { - Base::clearAndShrink(); - has_null_key_data = false; - } - -private: - bool has_null_key_data = false; - AggregateDataPtr null_key_data = nullptr; -}; - -template <typename Base> -struct AggregationDataWithNullKeyTwoLevel : public Base -{ - using Base::impls; - + +template <typename Base> +struct AggregationDataWithNullKey : public Base +{ + using Base::Base; + + bool & hasNullKeyData() { return has_null_key_data; } + AggregateDataPtr & getNullKeyData() { return null_key_data; } + bool hasNullKeyData() const { return has_null_key_data; } + const AggregateDataPtr & getNullKeyData() const { return null_key_data; } + size_t size() const { return Base::size() + (has_null_key_data ? 1 : 0); } + bool empty() const { return Base::empty() && !has_null_key_data; } + void clear() + { + Base::clear(); + has_null_key_data = false; + } + void clearAndShrink() + { + Base::clearAndShrink(); + has_null_key_data = false; + } + +private: + bool has_null_key_data = false; + AggregateDataPtr null_key_data = nullptr; +}; + +template <typename Base> +struct AggregationDataWithNullKeyTwoLevel : public Base +{ + using Base::impls; + AggregationDataWithNullKeyTwoLevel() = default; - - template <typename Other> - explicit AggregationDataWithNullKeyTwoLevel(const Other & other) : Base(other) - { - impls[0].hasNullKeyData() = other.hasNullKeyData(); - impls[0].getNullKeyData() = other.getNullKeyData(); - } - - bool & hasNullKeyData() { return impls[0].hasNullKeyData(); } - AggregateDataPtr & getNullKeyData() { return impls[0].getNullKeyData(); } - bool hasNullKeyData() const { return impls[0].hasNullKeyData(); } - const AggregateDataPtr & getNullKeyData() const { return impls[0].getNullKeyData(); } -}; - -template <typename ... Types> -using HashTableWithNullKey = AggregationDataWithNullKey<HashMapTable<Types ...>>; -template <typename ... Types> -using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Types ...>>; - -using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>; -using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>; - -using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey<AggregatedDataWithUInt64Key>; -using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<AggregatedDataWithStringKey>; - -using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel< - TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>, - TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; - -using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< - TwoLevelStringHashMap<AggregateDataPtr, HashTableAllocator, StringHashTableWithNullKey>>; - -using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< - TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>, - TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; - - -/// For the case where there is one numeric key. -/// FieldType is UInt8/16/32/64 for any type with corresponding bit width. -template <typename FieldType, typename TData, - bool consecutive_keys_optimization = true> -struct AggregationMethodOneNumber -{ - using Data = TData; - using Key = typename Data::key_type; - using Mapped = typename Data::mapped_type; - - Data data; - + + template <typename Other> + explicit AggregationDataWithNullKeyTwoLevel(const Other & other) : Base(other) + { + impls[0].hasNullKeyData() = other.hasNullKeyData(); + impls[0].getNullKeyData() = other.getNullKeyData(); + } + + bool & hasNullKeyData() { return impls[0].hasNullKeyData(); } + AggregateDataPtr & getNullKeyData() { return impls[0].getNullKeyData(); } + bool hasNullKeyData() const { return impls[0].hasNullKeyData(); } + const AggregateDataPtr & getNullKeyData() const { return impls[0].getNullKeyData(); } +}; + +template <typename ... Types> +using HashTableWithNullKey = AggregationDataWithNullKey<HashMapTable<Types ...>>; +template <typename ... Types> +using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Types ...>>; + +using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>; +using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>; + +using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey<AggregatedDataWithUInt64Key>; +using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<AggregatedDataWithStringKey>; + +using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>, + TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; + +using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelStringHashMap<AggregateDataPtr, HashTableAllocator, StringHashTableWithNullKey>>; + +using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>, + TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; + + +/// For the case where there is one numeric key. +/// FieldType is UInt8/16/32/64 for any type with corresponding bit width. +template <typename FieldType, typename TData, + bool consecutive_keys_optimization = true> +struct AggregationMethodOneNumber +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + AggregationMethodOneNumber() = default; - - template <typename Other> - AggregationMethodOneNumber(const Other & other) : data(other.data) {} - - /// To use one `Method` in different threads, use different `State`. - using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type, - Mapped, FieldType, consecutive_keys_optimization>; - - /// Use optimization for low cardinality. - static const bool low_cardinality_optimization = false; - + + template <typename Other> + AggregationMethodOneNumber(const Other & other) : data(other.data) {} + + /// To use one `Method` in different threads, use different `State`. + using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type, + Mapped, FieldType, consecutive_keys_optimization>; + + /// Use optimization for low cardinality. + static const bool low_cardinality_optimization = false; + /// Shuffle key columns before `insertKeyIntoColumns` call if needed. std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; } - // Insert the key from the hash table into columns. + // Insert the key from the hash table into columns. static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & /*key_sizes*/) - { + { const auto * key_holder = reinterpret_cast<const char *>(&key); auto * column = static_cast<ColumnVectorHelper *>(key_columns[0]); - column->insertRawData<sizeof(FieldType)>(key_holder); - } -}; - - -/// For the case where there is one string key. -template <typename TData> -struct AggregationMethodString -{ - using Data = TData; - using Key = typename Data::key_type; - using Mapped = typename Data::mapped_type; - - Data data; - + column->insertRawData<sizeof(FieldType)>(key_holder); + } +}; + + +/// For the case where there is one string key. +template <typename TData> +struct AggregationMethodString +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + AggregationMethodString() = default; - - template <typename Other> - AggregationMethodString(const Other & other) : data(other.data) {} - - using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>; - - static const bool low_cardinality_optimization = false; - + + template <typename Other> + AggregationMethodString(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>; + + static const bool low_cardinality_optimization = false; + std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; } static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &) - { + { static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size); - } -}; - - -/// Same as above but without cache -template <typename TData> -struct AggregationMethodStringNoCache -{ - using Data = TData; - using Key = typename Data::key_type; - using Mapped = typename Data::mapped_type; - - Data data; - + } +}; + + +/// Same as above but without cache +template <typename TData> +struct AggregationMethodStringNoCache +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + AggregationMethodStringNoCache() = default; - - template <typename Other> - AggregationMethodStringNoCache(const Other & other) : data(other.data) {} - - using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>; - - static const bool low_cardinality_optimization = false; - + + template <typename Other> + AggregationMethodStringNoCache(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>; + + static const bool low_cardinality_optimization = false; + std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; } static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &) - { + { static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size); - } -}; - - -/// For the case where there is one fixed-length string key. -template <typename TData> -struct AggregationMethodFixedString -{ - using Data = TData; - using Key = typename Data::key_type; - using Mapped = typename Data::mapped_type; - - Data data; - + } +}; + + +/// For the case where there is one fixed-length string key. +template <typename TData> +struct AggregationMethodFixedString +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + AggregationMethodFixedString() = default; - - template <typename Other> - AggregationMethodFixedString(const Other & other) : data(other.data) {} - - using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped>; - - static const bool low_cardinality_optimization = false; - + + template <typename Other> + AggregationMethodFixedString(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped>; + + static const bool low_cardinality_optimization = false; + std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; } static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &) - { + { static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size); - } -}; - -/// Same as above but without cache -template <typename TData> -struct AggregationMethodFixedStringNoCache -{ - using Data = TData; - using Key = typename Data::key_type; - using Mapped = typename Data::mapped_type; - - Data data; - + } +}; + +/// Same as above but without cache +template <typename TData> +struct AggregationMethodFixedStringNoCache +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + AggregationMethodFixedStringNoCache() = default; - - template <typename Other> - AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {} - - using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>; - - static const bool low_cardinality_optimization = false; - + + template <typename Other> + AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>; + + static const bool low_cardinality_optimization = false; + std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; } static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &) - { + { static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size); - } -}; - - -/// Single low cardinality column. -template <typename SingleColumnMethod> -struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod -{ - using Base = SingleColumnMethod; - using BaseState = typename Base::State; - - using Data = typename Base::Data; - using Key = typename Base::Key; - using Mapped = typename Base::Mapped; - - using Base::data; - - AggregationMethodSingleLowCardinalityColumn() = default; - - template <typename Other> - explicit AggregationMethodSingleLowCardinalityColumn(const Other & other) : Base(other) {} - - using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn<BaseState, Mapped, true>; - - static const bool low_cardinality_optimization = true; - + } +}; + + +/// Single low cardinality column. +template <typename SingleColumnMethod> +struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod +{ + using Base = SingleColumnMethod; + using BaseState = typename Base::State; + + using Data = typename Base::Data; + using Key = typename Base::Key; + using Mapped = typename Base::Mapped; + + using Base::data; + + AggregationMethodSingleLowCardinalityColumn() = default; + + template <typename Other> + explicit AggregationMethodSingleLowCardinalityColumn(const Other & other) : Base(other) {} + + using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn<BaseState, Mapped, true>; + + static const bool low_cardinality_optimization = true; + std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; } - static void insertKeyIntoColumns(const Key & key, + static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns_low_cardinality, const Sizes & /*key_sizes*/) - { + { auto * col = assert_cast<ColumnLowCardinality *>(key_columns_low_cardinality[0]); - - if constexpr (std::is_same_v<Key, StringRef>) - { - col->insertData(key.data, key.size); - } - else - { - col->insertData(reinterpret_cast<const char *>(&key), sizeof(key)); - } - } -}; - - -/// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits. -template <typename TData, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true> -struct AggregationMethodKeysFixed -{ - using Data = TData; - using Key = typename Data::key_type; - using Mapped = typename Data::mapped_type; - static constexpr bool has_nullable_keys = has_nullable_keys_; - static constexpr bool has_low_cardinality = has_low_cardinality_; - - Data data; - + + if constexpr (std::is_same_v<Key, StringRef>) + { + col->insertData(key.data, key.size); + } + else + { + col->insertData(reinterpret_cast<const char *>(&key), sizeof(key)); + } + } +}; + + +/// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits. +template <typename TData, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true> +struct AggregationMethodKeysFixed +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + static constexpr bool has_nullable_keys = has_nullable_keys_; + static constexpr bool has_low_cardinality = has_low_cardinality_; + + Data data; + AggregationMethodKeysFixed() = default; - - template <typename Other> - AggregationMethodKeysFixed(const Other & other) : data(other.data) {} - + + template <typename Other> + AggregationMethodKeysFixed(const Other & other) : data(other.data) {} + using State = ColumnsHashing::HashMethodKeysFixed< typename Data::value_type, Key, @@ -385,707 +385,707 @@ struct AggregationMethodKeysFixed has_nullable_keys, has_low_cardinality, use_cache>; - - static const bool low_cardinality_optimization = false; - + + static const bool low_cardinality_optimization = false; + std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> & key_columns, const Sizes & key_sizes) - { + { return State::shuffleKeyColumns(key_columns, key_sizes); } static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & key_sizes) { - size_t keys_size = key_columns.size(); - - static constexpr auto bitmap_size = has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 0; - /// In any hash key value, column values to be read start just after the bitmap, if it exists. - size_t pos = bitmap_size; - - for (size_t i = 0; i < keys_size; ++i) - { - IColumn * observed_column; - ColumnUInt8 * null_map; - - bool column_nullable = false; - if constexpr (has_nullable_keys) - column_nullable = isColumnNullable(*key_columns[i]); - - /// If we have a nullable column, get its nested column and its null map. - if (column_nullable) - { - ColumnNullable & nullable_col = assert_cast<ColumnNullable &>(*key_columns[i]); - observed_column = &nullable_col.getNestedColumn(); - null_map = assert_cast<ColumnUInt8 *>(&nullable_col.getNullMapColumn()); - } - else - { + size_t keys_size = key_columns.size(); + + static constexpr auto bitmap_size = has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 0; + /// In any hash key value, column values to be read start just after the bitmap, if it exists. + size_t pos = bitmap_size; + + for (size_t i = 0; i < keys_size; ++i) + { + IColumn * observed_column; + ColumnUInt8 * null_map; + + bool column_nullable = false; + if constexpr (has_nullable_keys) + column_nullable = isColumnNullable(*key_columns[i]); + + /// If we have a nullable column, get its nested column and its null map. + if (column_nullable) + { + ColumnNullable & nullable_col = assert_cast<ColumnNullable &>(*key_columns[i]); + observed_column = &nullable_col.getNestedColumn(); + null_map = assert_cast<ColumnUInt8 *>(&nullable_col.getNullMapColumn()); + } + else + { observed_column = key_columns[i]; - null_map = nullptr; - } - - bool is_null = false; - if (column_nullable) - { - /// The current column is nullable. Check if the value of the - /// corresponding key is nullable. Update the null map accordingly. - size_t bucket = i / 8; - size_t offset = i % 8; - UInt8 val = (reinterpret_cast<const UInt8 *>(&key)[bucket] >> offset) & 1; - null_map->insertValue(val); - is_null = val == 1; - } - - if (has_nullable_keys && is_null) - observed_column->insertDefault(); - else - { - size_t size = key_sizes[i]; - observed_column->insertData(reinterpret_cast<const char *>(&key) + pos, size); - pos += size; - } - } - } -}; - - -/** Aggregates by concatenating serialized key values. - * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts. - * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. - * Therefore, when aggregating by several strings, there is no ambiguity. - */ -template <typename TData> -struct AggregationMethodSerialized -{ - using Data = TData; - using Key = typename Data::key_type; - using Mapped = typename Data::mapped_type; - - Data data; - + null_map = nullptr; + } + + bool is_null = false; + if (column_nullable) + { + /// The current column is nullable. Check if the value of the + /// corresponding key is nullable. Update the null map accordingly. + size_t bucket = i / 8; + size_t offset = i % 8; + UInt8 val = (reinterpret_cast<const UInt8 *>(&key)[bucket] >> offset) & 1; + null_map->insertValue(val); + is_null = val == 1; + } + + if (has_nullable_keys && is_null) + observed_column->insertDefault(); + else + { + size_t size = key_sizes[i]; + observed_column->insertData(reinterpret_cast<const char *>(&key) + pos, size); + pos += size; + } + } + } +}; + + +/** Aggregates by concatenating serialized key values. + * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts. + * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. + * Therefore, when aggregating by several strings, there is no ambiguity. + */ +template <typename TData> +struct AggregationMethodSerialized +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + AggregationMethodSerialized() = default; - - template <typename Other> - AggregationMethodSerialized(const Other & other) : data(other.data) {} - - using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>; - - static const bool low_cardinality_optimization = false; - + + template <typename Other> + AggregationMethodSerialized(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>; + + static const bool low_cardinality_optimization = false; + std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; } static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &) - { + { const auto * pos = key.data; - for (auto & column : key_columns) - pos = column->deserializeAndInsertFromArena(pos); - } -}; - - -class Aggregator; - -using ColumnsHashing::HashMethodContext; -using ColumnsHashing::HashMethodContextPtr; - -struct AggregatedDataVariants : private boost::noncopyable -{ - /** Working with states of aggregate functions in the pool is arranged in the following (inconvenient) way: - * - when aggregating, states are created in the pool using IAggregateFunction::create (inside - `placement new` of arbitrary structure); - * - they must then be destroyed using IAggregateFunction::destroy (inside - calling the destructor of arbitrary structure); - * - if aggregation is complete, then, in the Aggregator::convertToBlocks function, pointers to the states of aggregate functions - * are written to ColumnAggregateFunction; ColumnAggregateFunction "acquires ownership" of them, that is - calls `destroy` in its destructor. - * - if during the aggregation, before call to Aggregator::convertToBlocks, an exception was thrown, - * then the states of aggregate functions must still be destroyed, - * otherwise, for complex states (eg, AggregateFunctionUniq), there will be memory leaks; - * - in this case, to destroy states, the destructor calls Aggregator::destroyAggregateStates method, - * but only if the variable aggregator (see below) is not nullptr; - * - that is, until you transfer ownership of the aggregate function states in the ColumnAggregateFunction, set the variable `aggregator`, - * so that when an exception occurs, the states are correctly destroyed. - * - * PS. This can be corrected by making a pool that knows about which states of aggregate functions and in which order are put in it, and knows how to destroy them. - * But this can hardly be done simply because it is planned to put variable-length strings into the same pool. - * In this case, the pool will not be able to know with what offsets objects are stored. - */ + for (auto & column : key_columns) + pos = column->deserializeAndInsertFromArena(pos); + } +}; + + +class Aggregator; + +using ColumnsHashing::HashMethodContext; +using ColumnsHashing::HashMethodContextPtr; + +struct AggregatedDataVariants : private boost::noncopyable +{ + /** Working with states of aggregate functions in the pool is arranged in the following (inconvenient) way: + * - when aggregating, states are created in the pool using IAggregateFunction::create (inside - `placement new` of arbitrary structure); + * - they must then be destroyed using IAggregateFunction::destroy (inside - calling the destructor of arbitrary structure); + * - if aggregation is complete, then, in the Aggregator::convertToBlocks function, pointers to the states of aggregate functions + * are written to ColumnAggregateFunction; ColumnAggregateFunction "acquires ownership" of them, that is - calls `destroy` in its destructor. + * - if during the aggregation, before call to Aggregator::convertToBlocks, an exception was thrown, + * then the states of aggregate functions must still be destroyed, + * otherwise, for complex states (eg, AggregateFunctionUniq), there will be memory leaks; + * - in this case, to destroy states, the destructor calls Aggregator::destroyAggregateStates method, + * but only if the variable aggregator (see below) is not nullptr; + * - that is, until you transfer ownership of the aggregate function states in the ColumnAggregateFunction, set the variable `aggregator`, + * so that when an exception occurs, the states are correctly destroyed. + * + * PS. This can be corrected by making a pool that knows about which states of aggregate functions and in which order are put in it, and knows how to destroy them. + * But this can hardly be done simply because it is planned to put variable-length strings into the same pool. + * In this case, the pool will not be able to know with what offsets objects are stored. + */ const Aggregator * aggregator = nullptr; - - size_t keys_size{}; /// Number of keys. NOTE do we need this field? - Sizes key_sizes; /// Dimensions of keys, if keys of fixed length - - /// Pools for states of aggregate functions. Ownership will be later transferred to ColumnAggregateFunction. - Arenas aggregates_pools; - Arena * aggregates_pool{}; /// The pool that is currently used for allocation. - - /** Specialization for the case when there are no keys, and for keys not fitted into max_rows_to_group_by. - */ - AggregatedDataWithoutKey without_key = nullptr; - - // Disable consecutive key optimization for Uint8/16, because they use a FixedHashMap - // and the lookup there is almost free, so we don't need to cache the last lookup result - std::unique_ptr<AggregationMethodOneNumber<UInt8, AggregatedDataWithUInt8Key, false>> key8; - std::unique_ptr<AggregationMethodOneNumber<UInt16, AggregatedDataWithUInt16Key, false>> key16; - - std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64Key>> key32; - std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>> key64; - std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKey>> key_string; - std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKey>> key_fixed_string; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt16Key, false, false, false>> keys16; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32Key>> keys32; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64Key>> keys64; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128>> keys128; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256>> keys256; - std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKey>> serialized; - - std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64KeyTwoLevel>> key32_two_level; - std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyTwoLevel>> key64_two_level; - std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_string_two_level; - std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_fixed_string_two_level; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32KeyTwoLevel>> keys32_two_level; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64KeyTwoLevel>> keys64_two_level; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel>> keys128_two_level; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel>> keys256_two_level; - std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level; - - std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyHash64>> key64_hash64; - std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKeyHash64>> key_string_hash64; - std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKeyHash64>> key_fixed_string_hash64; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128Hash64>> keys128_hash64; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256Hash64>> keys256_hash64; - std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyHash64>> serialized_hash64; - - /// Support for nullable keys. - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level; - - /// Support for low cardinality. - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false>>> low_cardinality_key8; - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false>>> low_cardinality_key16; - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key32; - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key64; - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_string; - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_fixed_string; - - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key32_two_level; - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key64_two_level; - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_string_two_level; - std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_fixed_string_two_level; - - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, false, true>> low_cardinality_keys128; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, false, true>> low_cardinality_keys256; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, false, true>> low_cardinality_keys128_two_level; - std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, false, true>> low_cardinality_keys256_two_level; - - /// In this and similar macros, the option without_key is not considered. - #define APPLY_FOR_AGGREGATED_VARIANTS(M) \ - M(key8, false) \ - M(key16, false) \ - M(key32, false) \ - M(key64, false) \ - M(key_string, false) \ - M(key_fixed_string, false) \ - M(keys16, false) \ - M(keys32, false) \ - M(keys64, false) \ - M(keys128, false) \ - M(keys256, false) \ - M(serialized, false) \ - M(key32_two_level, true) \ - M(key64_two_level, true) \ - M(key_string_two_level, true) \ - M(key_fixed_string_two_level, true) \ - M(keys32_two_level, true) \ - M(keys64_two_level, true) \ - M(keys128_two_level, true) \ - M(keys256_two_level, true) \ - M(serialized_two_level, true) \ - M(key64_hash64, false) \ - M(key_string_hash64, false) \ - M(key_fixed_string_hash64, false) \ - M(keys128_hash64, false) \ - M(keys256_hash64, false) \ - M(serialized_hash64, false) \ - M(nullable_keys128, false) \ - M(nullable_keys256, false) \ - M(nullable_keys128_two_level, true) \ - M(nullable_keys256_two_level, true) \ - M(low_cardinality_key8, false) \ - M(low_cardinality_key16, false) \ - M(low_cardinality_key32, false) \ - M(low_cardinality_key64, false) \ - M(low_cardinality_keys128, false) \ - M(low_cardinality_keys256, false) \ - M(low_cardinality_key_string, false) \ - M(low_cardinality_key_fixed_string, false) \ - M(low_cardinality_key32_two_level, true) \ - M(low_cardinality_key64_two_level, true) \ - M(low_cardinality_keys128_two_level, true) \ - M(low_cardinality_keys256_two_level, true) \ - M(low_cardinality_key_string_two_level, true) \ - M(low_cardinality_key_fixed_string_two_level, true) \ - - enum class Type - { - EMPTY = 0, - without_key, - - #define M(NAME, IS_TWO_LEVEL) NAME, - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - }; - Type type = Type::EMPTY; - - AggregatedDataVariants() : aggregates_pools(1, std::make_shared<Arena>()), aggregates_pool(aggregates_pools.back().get()) {} - bool empty() const { return type == Type::EMPTY; } - void invalidate() { type = Type::EMPTY; } - - ~AggregatedDataVariants(); - - void init(Type type_) - { - switch (type_) - { - case Type::EMPTY: break; - case Type::without_key: break; - - #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: NAME = std::make_unique<decltype(NAME)::element_type>(); break; - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - } - - type = type_; - } - - /// Number of rows (different keys). - size_t size() const - { - switch (type) - { - case Type::EMPTY: return 0; - case Type::without_key: return 1; - - #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: return NAME->data.size() + (without_key != nullptr); - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - } - - __builtin_unreachable(); - } - - /// The size without taking into account the row in which data is written for the calculation of TOTALS. - size_t sizeWithoutOverflowRow() const - { - switch (type) - { - case Type::EMPTY: return 0; - case Type::without_key: return 1; - - #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: return NAME->data.size(); - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - } - - __builtin_unreachable(); - } - - const char * getMethodName() const - { - switch (type) - { - case Type::EMPTY: return "EMPTY"; - case Type::without_key: return "without_key"; - - #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: return #NAME; - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - } - - __builtin_unreachable(); - } - - bool isTwoLevel() const - { - switch (type) - { - case Type::EMPTY: return false; - case Type::without_key: return false; - - #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: return IS_TWO_LEVEL; - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - } - - __builtin_unreachable(); - } - - #define APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ - M(key32) \ - M(key64) \ - M(key_string) \ - M(key_fixed_string) \ - M(keys32) \ - M(keys64) \ - M(keys128) \ - M(keys256) \ - M(serialized) \ - M(nullable_keys128) \ - M(nullable_keys256) \ - M(low_cardinality_key32) \ - M(low_cardinality_key64) \ - M(low_cardinality_keys128) \ - M(low_cardinality_keys256) \ - M(low_cardinality_key_string) \ - M(low_cardinality_key_fixed_string) \ - - #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ - M(key8) \ - M(key16) \ - M(keys16) \ - M(key64_hash64) \ - M(key_string_hash64)\ - M(key_fixed_string_hash64) \ - M(keys128_hash64) \ - M(keys256_hash64) \ - M(serialized_hash64) \ - M(low_cardinality_key8) \ - M(low_cardinality_key16) \ - - #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \ - APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ - APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ - - bool isConvertibleToTwoLevel() const - { - switch (type) - { - #define M(NAME) \ - case Type::NAME: return true; - - APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) - - #undef M - default: - return false; - } - } - - void convertToTwoLevel(); - - #define APPLY_FOR_VARIANTS_TWO_LEVEL(M) \ - M(key32_two_level) \ - M(key64_two_level) \ - M(key_string_two_level) \ - M(key_fixed_string_two_level) \ - M(keys32_two_level) \ - M(keys64_two_level) \ - M(keys128_two_level) \ - M(keys256_two_level) \ - M(serialized_two_level) \ - M(nullable_keys128_two_level) \ - M(nullable_keys256_two_level) \ - M(low_cardinality_key32_two_level) \ - M(low_cardinality_key64_two_level) \ - M(low_cardinality_keys128_two_level) \ - M(low_cardinality_keys256_two_level) \ - M(low_cardinality_key_string_two_level) \ - M(low_cardinality_key_fixed_string_two_level) \ - - #define APPLY_FOR_LOW_CARDINALITY_VARIANTS(M) \ - M(low_cardinality_key8) \ - M(low_cardinality_key16) \ - M(low_cardinality_key32) \ - M(low_cardinality_key64) \ - M(low_cardinality_keys128) \ - M(low_cardinality_keys256) \ - M(low_cardinality_key_string) \ - M(low_cardinality_key_fixed_string) \ - M(low_cardinality_key32_two_level) \ - M(low_cardinality_key64_two_level) \ - M(low_cardinality_keys128_two_level) \ - M(low_cardinality_keys256_two_level) \ - M(low_cardinality_key_string_two_level) \ + + size_t keys_size{}; /// Number of keys. NOTE do we need this field? + Sizes key_sizes; /// Dimensions of keys, if keys of fixed length + + /// Pools for states of aggregate functions. Ownership will be later transferred to ColumnAggregateFunction. + Arenas aggregates_pools; + Arena * aggregates_pool{}; /// The pool that is currently used for allocation. + + /** Specialization for the case when there are no keys, and for keys not fitted into max_rows_to_group_by. + */ + AggregatedDataWithoutKey without_key = nullptr; + + // Disable consecutive key optimization for Uint8/16, because they use a FixedHashMap + // and the lookup there is almost free, so we don't need to cache the last lookup result + std::unique_ptr<AggregationMethodOneNumber<UInt8, AggregatedDataWithUInt8Key, false>> key8; + std::unique_ptr<AggregationMethodOneNumber<UInt16, AggregatedDataWithUInt16Key, false>> key16; + + std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64Key>> key32; + std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>> key64; + std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKey>> key_string; + std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKey>> key_fixed_string; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt16Key, false, false, false>> keys16; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32Key>> keys32; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64Key>> keys64; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128>> keys128; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256>> keys256; + std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKey>> serialized; + + std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64KeyTwoLevel>> key32_two_level; + std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyTwoLevel>> key64_two_level; + std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_string_two_level; + std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_fixed_string_two_level; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32KeyTwoLevel>> keys32_two_level; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64KeyTwoLevel>> keys64_two_level; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel>> keys128_two_level; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel>> keys256_two_level; + std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level; + + std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyHash64>> key64_hash64; + std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKeyHash64>> key_string_hash64; + std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKeyHash64>> key_fixed_string_hash64; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128Hash64>> keys128_hash64; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256Hash64>> keys256_hash64; + std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyHash64>> serialized_hash64; + + /// Support for nullable keys. + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level; + + /// Support for low cardinality. + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false>>> low_cardinality_key8; + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false>>> low_cardinality_key16; + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key32; + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key64; + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_string; + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_fixed_string; + + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key32_two_level; + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key64_two_level; + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_string_two_level; + std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_fixed_string_two_level; + + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, false, true>> low_cardinality_keys128; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, false, true>> low_cardinality_keys256; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, false, true>> low_cardinality_keys128_two_level; + std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, false, true>> low_cardinality_keys256_two_level; + + /// In this and similar macros, the option without_key is not considered. + #define APPLY_FOR_AGGREGATED_VARIANTS(M) \ + M(key8, false) \ + M(key16, false) \ + M(key32, false) \ + M(key64, false) \ + M(key_string, false) \ + M(key_fixed_string, false) \ + M(keys16, false) \ + M(keys32, false) \ + M(keys64, false) \ + M(keys128, false) \ + M(keys256, false) \ + M(serialized, false) \ + M(key32_two_level, true) \ + M(key64_two_level, true) \ + M(key_string_two_level, true) \ + M(key_fixed_string_two_level, true) \ + M(keys32_two_level, true) \ + M(keys64_two_level, true) \ + M(keys128_two_level, true) \ + M(keys256_two_level, true) \ + M(serialized_two_level, true) \ + M(key64_hash64, false) \ + M(key_string_hash64, false) \ + M(key_fixed_string_hash64, false) \ + M(keys128_hash64, false) \ + M(keys256_hash64, false) \ + M(serialized_hash64, false) \ + M(nullable_keys128, false) \ + M(nullable_keys256, false) \ + M(nullable_keys128_two_level, true) \ + M(nullable_keys256_two_level, true) \ + M(low_cardinality_key8, false) \ + M(low_cardinality_key16, false) \ + M(low_cardinality_key32, false) \ + M(low_cardinality_key64, false) \ + M(low_cardinality_keys128, false) \ + M(low_cardinality_keys256, false) \ + M(low_cardinality_key_string, false) \ + M(low_cardinality_key_fixed_string, false) \ + M(low_cardinality_key32_two_level, true) \ + M(low_cardinality_key64_two_level, true) \ + M(low_cardinality_keys128_two_level, true) \ + M(low_cardinality_keys256_two_level, true) \ + M(low_cardinality_key_string_two_level, true) \ + M(low_cardinality_key_fixed_string_two_level, true) \ + + enum class Type + { + EMPTY = 0, + without_key, + + #define M(NAME, IS_TWO_LEVEL) NAME, + APPLY_FOR_AGGREGATED_VARIANTS(M) + #undef M + }; + Type type = Type::EMPTY; + + AggregatedDataVariants() : aggregates_pools(1, std::make_shared<Arena>()), aggregates_pool(aggregates_pools.back().get()) {} + bool empty() const { return type == Type::EMPTY; } + void invalidate() { type = Type::EMPTY; } + + ~AggregatedDataVariants(); + + void init(Type type_) + { + switch (type_) + { + case Type::EMPTY: break; + case Type::without_key: break; + + #define M(NAME, IS_TWO_LEVEL) \ + case Type::NAME: NAME = std::make_unique<decltype(NAME)::element_type>(); break; + APPLY_FOR_AGGREGATED_VARIANTS(M) + #undef M + } + + type = type_; + } + + /// Number of rows (different keys). + size_t size() const + { + switch (type) + { + case Type::EMPTY: return 0; + case Type::without_key: return 1; + + #define M(NAME, IS_TWO_LEVEL) \ + case Type::NAME: return NAME->data.size() + (without_key != nullptr); + APPLY_FOR_AGGREGATED_VARIANTS(M) + #undef M + } + + __builtin_unreachable(); + } + + /// The size without taking into account the row in which data is written for the calculation of TOTALS. + size_t sizeWithoutOverflowRow() const + { + switch (type) + { + case Type::EMPTY: return 0; + case Type::without_key: return 1; + + #define M(NAME, IS_TWO_LEVEL) \ + case Type::NAME: return NAME->data.size(); + APPLY_FOR_AGGREGATED_VARIANTS(M) + #undef M + } + + __builtin_unreachable(); + } + + const char * getMethodName() const + { + switch (type) + { + case Type::EMPTY: return "EMPTY"; + case Type::without_key: return "without_key"; + + #define M(NAME, IS_TWO_LEVEL) \ + case Type::NAME: return #NAME; + APPLY_FOR_AGGREGATED_VARIANTS(M) + #undef M + } + + __builtin_unreachable(); + } + + bool isTwoLevel() const + { + switch (type) + { + case Type::EMPTY: return false; + case Type::without_key: return false; + + #define M(NAME, IS_TWO_LEVEL) \ + case Type::NAME: return IS_TWO_LEVEL; + APPLY_FOR_AGGREGATED_VARIANTS(M) + #undef M + } + + __builtin_unreachable(); + } + + #define APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ + M(key32) \ + M(key64) \ + M(key_string) \ + M(key_fixed_string) \ + M(keys32) \ + M(keys64) \ + M(keys128) \ + M(keys256) \ + M(serialized) \ + M(nullable_keys128) \ + M(nullable_keys256) \ + M(low_cardinality_key32) \ + M(low_cardinality_key64) \ + M(low_cardinality_keys128) \ + M(low_cardinality_keys256) \ + M(low_cardinality_key_string) \ + M(low_cardinality_key_fixed_string) \ + + #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ + M(key8) \ + M(key16) \ + M(keys16) \ + M(key64_hash64) \ + M(key_string_hash64)\ + M(key_fixed_string_hash64) \ + M(keys128_hash64) \ + M(keys256_hash64) \ + M(serialized_hash64) \ + M(low_cardinality_key8) \ + M(low_cardinality_key16) \ + + #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \ + APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ + APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ + + bool isConvertibleToTwoLevel() const + { + switch (type) + { + #define M(NAME) \ + case Type::NAME: return true; + + APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) + + #undef M + default: + return false; + } + } + + void convertToTwoLevel(); + + #define APPLY_FOR_VARIANTS_TWO_LEVEL(M) \ + M(key32_two_level) \ + M(key64_two_level) \ + M(key_string_two_level) \ + M(key_fixed_string_two_level) \ + M(keys32_two_level) \ + M(keys64_two_level) \ + M(keys128_two_level) \ + M(keys256_two_level) \ + M(serialized_two_level) \ + M(nullable_keys128_two_level) \ + M(nullable_keys256_two_level) \ + M(low_cardinality_key32_two_level) \ + M(low_cardinality_key64_two_level) \ + M(low_cardinality_keys128_two_level) \ + M(low_cardinality_keys256_two_level) \ + M(low_cardinality_key_string_two_level) \ + M(low_cardinality_key_fixed_string_two_level) \ + + #define APPLY_FOR_LOW_CARDINALITY_VARIANTS(M) \ + M(low_cardinality_key8) \ + M(low_cardinality_key16) \ + M(low_cardinality_key32) \ + M(low_cardinality_key64) \ + M(low_cardinality_keys128) \ + M(low_cardinality_keys256) \ + M(low_cardinality_key_string) \ + M(low_cardinality_key_fixed_string) \ + M(low_cardinality_key32_two_level) \ + M(low_cardinality_key64_two_level) \ + M(low_cardinality_keys128_two_level) \ + M(low_cardinality_keys256_two_level) \ + M(low_cardinality_key_string_two_level) \ M(low_cardinality_key_fixed_string_two_level) - + bool isLowCardinality() const - { - switch (type) - { - #define M(NAME) \ - case Type::NAME: return true; - - APPLY_FOR_LOW_CARDINALITY_VARIANTS(M) - #undef M - default: - return false; - } - } - - static HashMethodContextPtr createCache(Type type, const HashMethodContext::Settings & settings) - { - switch (type) - { - case Type::without_key: return nullptr; - - #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: \ - { \ - using TPtr ## NAME = decltype(AggregatedDataVariants::NAME); \ - using T ## NAME = typename TPtr ## NAME ::element_type; \ - return T ## NAME ::State::createContext(settings); \ - } - - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - - default: - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - } - } -}; - -using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>; -using ManyAggregatedDataVariants = std::vector<AggregatedDataVariantsPtr>; -using ManyAggregatedDataVariantsPtr = std::shared_ptr<ManyAggregatedDataVariants>; - + { + switch (type) + { + #define M(NAME) \ + case Type::NAME: return true; + + APPLY_FOR_LOW_CARDINALITY_VARIANTS(M) + #undef M + default: + return false; + } + } + + static HashMethodContextPtr createCache(Type type, const HashMethodContext::Settings & settings) + { + switch (type) + { + case Type::without_key: return nullptr; + + #define M(NAME, IS_TWO_LEVEL) \ + case Type::NAME: \ + { \ + using TPtr ## NAME = decltype(AggregatedDataVariants::NAME); \ + using T ## NAME = typename TPtr ## NAME ::element_type; \ + return T ## NAME ::State::createContext(settings); \ + } + + APPLY_FOR_AGGREGATED_VARIANTS(M) + #undef M + + default: + throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + } + } +}; + +using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>; +using ManyAggregatedDataVariants = std::vector<AggregatedDataVariantsPtr>; +using ManyAggregatedDataVariantsPtr = std::shared_ptr<ManyAggregatedDataVariants>; + class CompiledAggregateFunctionsHolder; -/** How are "total" values calculated with WITH TOTALS? - * (For more details, see TotalsHavingTransform.) - * - * In the absence of group_by_overflow_mode = 'any', the data is aggregated as usual, but the states of the aggregate functions are not finalized. - * Later, the aggregate function states for all rows (passed through HAVING) are merged into one - this will be TOTALS. - * - * If there is group_by_overflow_mode = 'any', the data is aggregated as usual, except for the keys that did not fit in max_rows_to_group_by. - * For these keys, the data is aggregated into one additional row - see below under the names `overflow_row`, `overflows`... - * Later, the aggregate function states for all rows (passed through HAVING) are merged into one, - * also overflow_row is added or not added (depending on the totals_mode setting) also - this will be TOTALS. - */ - - -/** Aggregates the source of the blocks. - */ +/** How are "total" values calculated with WITH TOTALS? + * (For more details, see TotalsHavingTransform.) + * + * In the absence of group_by_overflow_mode = 'any', the data is aggregated as usual, but the states of the aggregate functions are not finalized. + * Later, the aggregate function states for all rows (passed through HAVING) are merged into one - this will be TOTALS. + * + * If there is group_by_overflow_mode = 'any', the data is aggregated as usual, except for the keys that did not fit in max_rows_to_group_by. + * For these keys, the data is aggregated into one additional row - see below under the names `overflow_row`, `overflows`... + * Later, the aggregate function states for all rows (passed through HAVING) are merged into one, + * also overflow_row is added or not added (depending on the totals_mode setting) also - this will be TOTALS. + */ + + +/** Aggregates the source of the blocks. + */ class Aggregator final -{ -public: - struct Params - { - /// Data structure of source blocks. - Block src_header; - /// Data structure of intermediate blocks before merge. - Block intermediate_header; - - /// What to count. - const ColumnNumbers keys; - const AggregateDescriptions aggregates; - const size_t keys_size; - const size_t aggregates_size; - - /// The settings of approximate calculation of GROUP BY. - const bool overflow_row; /// Do we need to put into AggregatedDataVariants::without_key aggregates for keys that are not in max_rows_to_group_by. - const size_t max_rows_to_group_by; - const OverflowMode group_by_overflow_mode; - - /// Two-level aggregation settings (used for a large number of keys). - /** With how many keys or the size of the aggregation state in bytes, - * two-level aggregation begins to be used. Enough to reach of at least one of the thresholds. - * 0 - the corresponding threshold is not specified. - */ - size_t group_by_two_level_threshold; - size_t group_by_two_level_threshold_bytes; - - /// Settings to flush temporary data to the filesystem (external aggregation). - const size_t max_bytes_before_external_group_by; /// 0 - do not use external aggregation. - - /// Return empty result when aggregating without keys on empty set. - bool empty_result_for_aggregation_by_empty_set; - +{ +public: + struct Params + { + /// Data structure of source blocks. + Block src_header; + /// Data structure of intermediate blocks before merge. + Block intermediate_header; + + /// What to count. + const ColumnNumbers keys; + const AggregateDescriptions aggregates; + const size_t keys_size; + const size_t aggregates_size; + + /// The settings of approximate calculation of GROUP BY. + const bool overflow_row; /// Do we need to put into AggregatedDataVariants::without_key aggregates for keys that are not in max_rows_to_group_by. + const size_t max_rows_to_group_by; + const OverflowMode group_by_overflow_mode; + + /// Two-level aggregation settings (used for a large number of keys). + /** With how many keys or the size of the aggregation state in bytes, + * two-level aggregation begins to be used. Enough to reach of at least one of the thresholds. + * 0 - the corresponding threshold is not specified. + */ + size_t group_by_two_level_threshold; + size_t group_by_two_level_threshold_bytes; + + /// Settings to flush temporary data to the filesystem (external aggregation). + const size_t max_bytes_before_external_group_by; /// 0 - do not use external aggregation. + + /// Return empty result when aggregating without keys on empty set. + bool empty_result_for_aggregation_by_empty_set; + VolumePtr tmp_volume; - - /// Settings is used to determine cache size. No threads are created. - size_t max_threads; - - const size_t min_free_disk_space; + + /// Settings is used to determine cache size. No threads are created. + size_t max_threads; + + const size_t min_free_disk_space; bool compile_aggregate_expressions; size_t min_count_to_compile_aggregate_expression; - Params( - const Block & src_header_, - const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, - bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_, - size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_, - size_t max_bytes_before_external_group_by_, - bool empty_result_for_aggregation_by_empty_set_, + Params( + const Block & src_header_, + const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, + bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_, + size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_, + size_t max_bytes_before_external_group_by_, + bool empty_result_for_aggregation_by_empty_set_, VolumePtr tmp_volume_, size_t max_threads_, size_t min_free_disk_space_, bool compile_aggregate_expressions_, size_t min_count_to_compile_aggregate_expression_, const Block & intermediate_header_ = {}) - : src_header(src_header_), + : src_header(src_header_), intermediate_header(intermediate_header_), - keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()), - overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_), - group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_), - max_bytes_before_external_group_by(max_bytes_before_external_group_by_), - empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_), - tmp_volume(tmp_volume_), max_threads(max_threads_), + keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()), + overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_), + group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_), + max_bytes_before_external_group_by(max_bytes_before_external_group_by_), + empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_), + tmp_volume(tmp_volume_), max_threads(max_threads_), min_free_disk_space(min_free_disk_space_), compile_aggregate_expressions(compile_aggregate_expressions_), min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_) - { - } - - /// Only parameters that matter during merge. - Params(const Block & intermediate_header_, - const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_) + { + } + + /// Only parameters that matter during merge. + Params(const Block & intermediate_header_, + const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_) : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0, false, 0) - { - intermediate_header = intermediate_header_; - } - - static Block getHeader( - const Block & src_header, - const Block & intermediate_header, - const ColumnNumbers & keys, - const AggregateDescriptions & aggregates, - bool final); - - Block getHeader(bool final) const - { - return getHeader(src_header, intermediate_header, keys, aggregates, final); - } - - /// Returns keys and aggregated for EXPLAIN query - void explain(WriteBuffer & out, size_t indent) const; + { + intermediate_header = intermediate_header_; + } + + static Block getHeader( + const Block & src_header, + const Block & intermediate_header, + const ColumnNumbers & keys, + const AggregateDescriptions & aggregates, + bool final); + + Block getHeader(bool final) const + { + return getHeader(src_header, intermediate_header, keys, aggregates, final); + } + + /// Returns keys and aggregated for EXPLAIN query + void explain(WriteBuffer & out, size_t indent) const; void explain(JSONBuilder::JSONMap & map) const; - }; - + }; + explicit Aggregator(const Params & params_); - - using AggregateColumns = std::vector<ColumnRawPtrs>; - using AggregateColumnsData = std::vector<ColumnAggregateFunction::Container *>; - using AggregateColumnsConstData = std::vector<const ColumnAggregateFunction::Container *>; + + using AggregateColumns = std::vector<ColumnRawPtrs>; + using AggregateColumnsData = std::vector<ColumnAggregateFunction::Container *>; + using AggregateColumnsConstData = std::vector<const ColumnAggregateFunction::Container *>; using AggregateFunctionsPlainPtrs = std::vector<const IAggregateFunction *>; - - /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break'). - bool executeOnBlock(const Block & block, AggregatedDataVariants & result, - ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block + + /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break'). + bool executeOnBlock(const Block & block, AggregatedDataVariants & result, + ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block bool & no_more_keys) const; - - bool executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result, - ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block + + bool executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result, + ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block bool & no_more_keys) const; - + /// Used for aggregate projection. bool mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const; - /** Convert the aggregation data structure into a block. - * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. - * - * If final = false, then ColumnAggregateFunction is created as the aggregation columns with the state of the calculations, - * which can then be combined with other states (for distributed query processing). - * If final = true, then columns with ready values are created as aggregate columns. - */ - BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const; - - ManyAggregatedDataVariants prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const; - - using BucketToBlocks = std::map<Int32, BlocksList>; - /// Merge partially aggregated blocks separated to buckets into one data structure. - void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads); - - /// Merge several partially aggregated blocks into one. - /// Precondition: for all blocks block.info.is_overflows flag must be the same. - /// (either all blocks are from overflow data or none blocks are). - /// The resulting block has the same value of is_overflows flag. - Block mergeBlocks(BlocksList & blocks, bool final); - - /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used. - * This is needed to simplify merging of that data with other results, that are already two-level. - */ + /** Convert the aggregation data structure into a block. + * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. + * + * If final = false, then ColumnAggregateFunction is created as the aggregation columns with the state of the calculations, + * which can then be combined with other states (for distributed query processing). + * If final = true, then columns with ready values are created as aggregate columns. + */ + BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const; + + ManyAggregatedDataVariants prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const; + + using BucketToBlocks = std::map<Int32, BlocksList>; + /// Merge partially aggregated blocks separated to buckets into one data structure. + void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads); + + /// Merge several partially aggregated blocks into one. + /// Precondition: for all blocks block.info.is_overflows flag must be the same. + /// (either all blocks are from overflow data or none blocks are). + /// The resulting block has the same value of is_overflows flag. + Block mergeBlocks(BlocksList & blocks, bool final); + + /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used. + * This is needed to simplify merging of that data with other results, that are already two-level. + */ std::vector<Block> convertBlockToTwoLevel(const Block & block) const; - - /// For external aggregation. + + /// For external aggregation. void writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path) const; void writeToTemporaryFile(AggregatedDataVariants & data_variants) const; - - bool hasTemporaryFiles() const { return !temporary_files.empty(); } - - struct TemporaryFiles - { - std::vector<std::unique_ptr<Poco::TemporaryFile>> files; - size_t sum_size_uncompressed = 0; - size_t sum_size_compressed = 0; - mutable std::mutex mutex; - - bool empty() const - { - std::lock_guard lock(mutex); - return files.empty(); - } - }; - - const TemporaryFiles & getTemporaryFiles() const { return temporary_files; } - - /// Get data structure of the result. - Block getHeader(bool final) const; - + + bool hasTemporaryFiles() const { return !temporary_files.empty(); } + + struct TemporaryFiles + { + std::vector<std::unique_ptr<Poco::TemporaryFile>> files; + size_t sum_size_uncompressed = 0; + size_t sum_size_compressed = 0; + mutable std::mutex mutex; + + bool empty() const + { + std::lock_guard lock(mutex); + return files.empty(); + } + }; + + const TemporaryFiles & getTemporaryFiles() const { return temporary_files; } + + /// Get data structure of the result. + Block getHeader(bool final) const; + private: - friend struct AggregatedDataVariants; - friend class ConvertingAggregatedToChunksTransform; - friend class ConvertingAggregatedToChunksSource; - friend class AggregatingInOrderTransform; - - Params params; - - AggregatedDataVariants::Type method_chosen; - Sizes key_sizes; - - HashMethodContextPtr aggregation_state_cache; - - AggregateFunctionsPlainPtrs aggregate_functions; - - /** This array serves two purposes. - * + friend struct AggregatedDataVariants; + friend class ConvertingAggregatedToChunksTransform; + friend class ConvertingAggregatedToChunksSource; + friend class AggregatingInOrderTransform; + + Params params; + + AggregatedDataVariants::Type method_chosen; + Sizes key_sizes; + + HashMethodContextPtr aggregation_state_cache; + + AggregateFunctionsPlainPtrs aggregate_functions; + + /** This array serves two purposes. + * * Function arguments are collected side by side, and they do not need to be collected from different places. Also the array is made zero-terminated. - * The inner loop (for the case without_key) is almost twice as compact; performance gain of about 30%. - */ - struct AggregateFunctionInstruction - { + * The inner loop (for the case without_key) is almost twice as compact; performance gain of about 30%. + */ + struct AggregateFunctionInstruction + { const IAggregateFunction * that{}; size_t state_offset{}; const IColumn ** arguments{}; const IAggregateFunction * batch_that{}; const IColumn ** batch_arguments{}; const UInt64 * offsets{}; - }; - - using AggregateFunctionInstructions = std::vector<AggregateFunctionInstruction>; - using NestedColumnsHolder = std::vector<std::vector<const IColumn *>>; - - Sizes offsets_of_aggregate_states; /// The offset to the n-th aggregate function in a row of aggregate functions. - size_t total_size_of_aggregate_states = 0; /// The total size of the row from the aggregate functions. - - // add info to track alignment requirement - // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn) - size_t align_aggregate_states = 1; - - bool all_aggregates_has_trivial_destructor = false; - - /// How many RAM were used to process the query before processing the first block. - Int64 memory_usage_before_aggregation = 0; - - Poco::Logger * log = &Poco::Logger::get("Aggregator"); - - /// For external aggregation. + }; + + using AggregateFunctionInstructions = std::vector<AggregateFunctionInstruction>; + using NestedColumnsHolder = std::vector<std::vector<const IColumn *>>; + + Sizes offsets_of_aggregate_states; /// The offset to the n-th aggregate function in a row of aggregate functions. + size_t total_size_of_aggregate_states = 0; /// The total size of the row from the aggregate functions. + + // add info to track alignment requirement + // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn) + size_t align_aggregate_states = 1; + + bool all_aggregates_has_trivial_destructor = false; + + /// How many RAM were used to process the query before processing the first block. + Int64 memory_usage_before_aggregation = 0; + + Poco::Logger * log = &Poco::Logger::get("Aggregator"); + + /// For external aggregation. mutable TemporaryFiles temporary_files; - + #if USE_EMBEDDED_COMPILER std::shared_ptr<CompiledAggregateFunctionsHolder> compiled_aggregate_functions_holder; #endif @@ -1096,240 +1096,240 @@ private: */ void compileAggregateFunctionsIfNeeded(); - /** Select the aggregation method based on the number and types of keys. */ - AggregatedDataVariants::Type chooseAggregationMethod(); - - /** Create states of aggregate functions for one key. - */ + /** Select the aggregation method based on the number and types of keys. */ + AggregatedDataVariants::Type chooseAggregationMethod(); + + /** Create states of aggregate functions for one key. + */ template <bool skip_compiled_aggregate_functions = false> - void createAggregateStates(AggregateDataPtr & aggregate_data) const; - - /** Call `destroy` methods for states of aggregate functions. - * Used in the exception handler for aggregation, since RAII in this case is not applicable. - */ + void createAggregateStates(AggregateDataPtr & aggregate_data) const; + + /** Call `destroy` methods for states of aggregate functions. + * Used in the exception handler for aggregation, since RAII in this case is not applicable. + */ void destroyAllAggregateStates(AggregatedDataVariants & result) const; - - - /// Process one data block, aggregate the data into a hash table. - template <typename Method> - void executeImpl( - Method & method, - Arena * aggregates_pool, - size_t rows, - ColumnRawPtrs & key_columns, - AggregateFunctionInstruction * aggregate_instructions, - bool no_more_keys, - AggregateDataPtr overflow_row) const; - - /// Specialization for a particular value no_more_keys. + + + /// Process one data block, aggregate the data into a hash table. + template <typename Method> + void executeImpl( + Method & method, + Arena * aggregates_pool, + size_t rows, + ColumnRawPtrs & key_columns, + AggregateFunctionInstruction * aggregate_instructions, + bool no_more_keys, + AggregateDataPtr overflow_row) const; + + /// Specialization for a particular value no_more_keys. template <bool no_more_keys, bool use_compiled_functions, typename Method> void executeImplBatch( - Method & method, - typename Method::State & state, - Arena * aggregates_pool, - size_t rows, - AggregateFunctionInstruction * aggregate_instructions, - AggregateDataPtr overflow_row) const; - - /// For case when there are no keys (all aggregate into one row). + Method & method, + typename Method::State & state, + Arena * aggregates_pool, + size_t rows, + AggregateFunctionInstruction * aggregate_instructions, + AggregateDataPtr overflow_row) const; + + /// For case when there are no keys (all aggregate into one row). template <bool use_compiled_functions> void executeWithoutKeyImpl( - AggregatedDataWithoutKey & res, - size_t rows, - AggregateFunctionInstruction * aggregate_instructions, + AggregatedDataWithoutKey & res, + size_t rows, + AggregateFunctionInstruction * aggregate_instructions, Arena * arena) const; - - static void executeOnIntervalWithoutKeyImpl( - AggregatedDataWithoutKey & res, - size_t row_begin, - size_t row_end, - AggregateFunctionInstruction * aggregate_instructions, - Arena * arena); - - template <typename Method> - void writeToTemporaryFileImpl( - AggregatedDataVariants & data_variants, - Method & method, + + static void executeOnIntervalWithoutKeyImpl( + AggregatedDataWithoutKey & res, + size_t row_begin, + size_t row_end, + AggregateFunctionInstruction * aggregate_instructions, + Arena * arena); + + template <typename Method> + void writeToTemporaryFileImpl( + AggregatedDataVariants & data_variants, + Method & method, IBlockOutputStream & out) const; - - /// Merge NULL key data from hash table `src` into `dst`. - template <typename Method, typename Table> - void mergeDataNullKey( - Table & table_dst, - Table & table_src, - Arena * arena) const; - - /// Merge data from hash table `src` into `dst`. + + /// Merge NULL key data from hash table `src` into `dst`. + template <typename Method, typename Table> + void mergeDataNullKey( + Table & table_dst, + Table & table_src, + Arena * arena) const; + + /// Merge data from hash table `src` into `dst`. template <typename Method, bool use_compiled_functions, typename Table> - void mergeDataImpl( - Table & table_dst, - Table & table_src, - Arena * arena) const; - - /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`. - template <typename Method, typename Table> - void mergeDataNoMoreKeysImpl( - Table & table_dst, - AggregatedDataWithoutKey & overflows, - Table & table_src, - Arena * arena) const; - - /// Same, but ignores the rest of the keys. - template <typename Method, typename Table> - void mergeDataOnlyExistingKeysImpl( - Table & table_dst, - Table & table_src, - Arena * arena) const; - - void mergeWithoutKeyDataImpl( - ManyAggregatedDataVariants & non_empty_data) const; - - template <typename Method> - void mergeSingleLevelDataImpl( - ManyAggregatedDataVariants & non_empty_data) const; - - template <typename Method, typename Table> - void convertToBlockImpl( - Method & method, - Table & data, - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - Arena * arena, - bool final) const; - - template <typename Mapped> - void insertAggregatesIntoColumns( - Mapped & mapped, - MutableColumns & final_aggregate_columns, - Arena * arena) const; - + void mergeDataImpl( + Table & table_dst, + Table & table_src, + Arena * arena) const; + + /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`. + template <typename Method, typename Table> + void mergeDataNoMoreKeysImpl( + Table & table_dst, + AggregatedDataWithoutKey & overflows, + Table & table_src, + Arena * arena) const; + + /// Same, but ignores the rest of the keys. + template <typename Method, typename Table> + void mergeDataOnlyExistingKeysImpl( + Table & table_dst, + Table & table_src, + Arena * arena) const; + + void mergeWithoutKeyDataImpl( + ManyAggregatedDataVariants & non_empty_data) const; + + template <typename Method> + void mergeSingleLevelDataImpl( + ManyAggregatedDataVariants & non_empty_data) const; + + template <typename Method, typename Table> + void convertToBlockImpl( + Method & method, + Table & data, + MutableColumns & key_columns, + AggregateColumnsData & aggregate_columns, + MutableColumns & final_aggregate_columns, + Arena * arena, + bool final) const; + + template <typename Mapped> + void insertAggregatesIntoColumns( + Mapped & mapped, + MutableColumns & final_aggregate_columns, + Arena * arena) const; + template <typename Method, bool use_compiled_functions, typename Table> - void convertToBlockImplFinal( - Method & method, - Table & data, + void convertToBlockImplFinal( + Method & method, + Table & data, std::vector<IColumn *> key_columns, - MutableColumns & final_aggregate_columns, - Arena * arena) const; - - template <typename Method, typename Table> - void convertToBlockImplNotFinal( - Method & method, - Table & data, + MutableColumns & final_aggregate_columns, + Arena * arena) const; + + template <typename Method, typename Table> + void convertToBlockImplNotFinal( + Method & method, + Table & data, std::vector<IColumn *> key_columns, - AggregateColumnsData & aggregate_columns) const; - - template <typename Filler> - Block prepareBlockAndFill( - AggregatedDataVariants & data_variants, - bool final, - size_t rows, - Filler && filler) const; - - template <typename Method> - Block convertOneBucketToBlock( - AggregatedDataVariants & data_variants, - Method & method, + AggregateColumnsData & aggregate_columns) const; + + template <typename Filler> + Block prepareBlockAndFill( + AggregatedDataVariants & data_variants, + bool final, + size_t rows, + Filler && filler) const; + + template <typename Method> + Block convertOneBucketToBlock( + AggregatedDataVariants & data_variants, + Method & method, Arena * arena, - bool final, - size_t bucket) const; - - Block mergeAndConvertOneBucketToBlock( - ManyAggregatedDataVariants & variants, - Arena * arena, - bool final, - size_t bucket, - std::atomic<bool> * is_cancelled = nullptr) const; - - Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; - Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const; - BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const; - - template <typename Method> - BlocksList prepareBlocksAndFillTwoLevelImpl( - AggregatedDataVariants & data_variants, - Method & method, - bool final, - ThreadPool * thread_pool) const; - - template <bool no_more_keys, typename Method, typename Table> - void mergeStreamsImplCase( - Block & block, - Arena * aggregates_pool, - Method & method, - Table & data, - AggregateDataPtr overflow_row) const; - - template <typename Method, typename Table> - void mergeStreamsImpl( - Block & block, - Arena * aggregates_pool, - Method & method, - Table & data, - AggregateDataPtr overflow_row, - bool no_more_keys) const; - - void mergeWithoutKeyStreamsImpl( - Block & block, - AggregatedDataVariants & result) const; - - template <typename Method> - void mergeBucketImpl( - ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic<bool> * is_cancelled = nullptr) const; - - template <typename Method> - void convertBlockToTwoLevelImpl( - Method & method, - Arena * pool, - ColumnRawPtrs & key_columns, - const Block & source, - std::vector<Block> & destinations) const; - - template <typename Method, typename Table> - void destroyImpl(Table & table) const; - - void destroyWithoutKey( - AggregatedDataVariants & result) const; - - - /** Checks constraints on the maximum number of keys for aggregation. - * If it is exceeded, then, depending on the group_by_overflow_mode, either - * - throws an exception; - * - returns false, which means that execution must be aborted; - * - sets the variable no_more_keys to true. - */ - bool checkLimits(size_t result_size, bool & no_more_keys) const; - - void prepareAggregateInstructions( - Columns columns, - AggregateColumns & aggregate_columns, - Columns & materialized_columns, - AggregateFunctionInstructions & instructions, + bool final, + size_t bucket) const; + + Block mergeAndConvertOneBucketToBlock( + ManyAggregatedDataVariants & variants, + Arena * arena, + bool final, + size_t bucket, + std::atomic<bool> * is_cancelled = nullptr) const; + + Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; + Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const; + BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const; + + template <typename Method> + BlocksList prepareBlocksAndFillTwoLevelImpl( + AggregatedDataVariants & data_variants, + Method & method, + bool final, + ThreadPool * thread_pool) const; + + template <bool no_more_keys, typename Method, typename Table> + void mergeStreamsImplCase( + Block & block, + Arena * aggregates_pool, + Method & method, + Table & data, + AggregateDataPtr overflow_row) const; + + template <typename Method, typename Table> + void mergeStreamsImpl( + Block & block, + Arena * aggregates_pool, + Method & method, + Table & data, + AggregateDataPtr overflow_row, + bool no_more_keys) const; + + void mergeWithoutKeyStreamsImpl( + Block & block, + AggregatedDataVariants & result) const; + + template <typename Method> + void mergeBucketImpl( + ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic<bool> * is_cancelled = nullptr) const; + + template <typename Method> + void convertBlockToTwoLevelImpl( + Method & method, + Arena * pool, + ColumnRawPtrs & key_columns, + const Block & source, + std::vector<Block> & destinations) const; + + template <typename Method, typename Table> + void destroyImpl(Table & table) const; + + void destroyWithoutKey( + AggregatedDataVariants & result) const; + + + /** Checks constraints on the maximum number of keys for aggregation. + * If it is exceeded, then, depending on the group_by_overflow_mode, either + * - throws an exception; + * - returns false, which means that execution must be aborted; + * - sets the variable no_more_keys to true. + */ + bool checkLimits(size_t result_size, bool & no_more_keys) const; + + void prepareAggregateInstructions( + Columns columns, + AggregateColumns & aggregate_columns, + Columns & materialized_columns, + AggregateFunctionInstructions & instructions, NestedColumnsHolder & nested_columns_holder) const; - + void addSingleKeyToAggregateColumns( const AggregatedDataVariants & data_variants, MutableColumns & aggregate_columns) const; - + void addArenasToAggregateColumns( const AggregatedDataVariants & data_variants, MutableColumns & aggregate_columns) const; - void createStatesAndFillKeyColumnsWithSingleKey( - AggregatedDataVariants & data_variants, - Columns & key_columns, size_t key_row, + void createStatesAndFillKeyColumnsWithSingleKey( + AggregatedDataVariants & data_variants, + Columns & key_columns, size_t key_row, MutableColumns & final_key_columns) const; -}; - - -/** Get the aggregation variant by its type. */ -template <typename Method> Method & getDataVariant(AggregatedDataVariants & variants); - -#define M(NAME, IS_TWO_LEVEL) \ - template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant<decltype(AggregatedDataVariants::NAME)::element_type>(AggregatedDataVariants & variants) { return *variants.NAME; } - -APPLY_FOR_AGGREGATED_VARIANTS(M) - -#undef M - -} +}; + + +/** Get the aggregation variant by its type. */ +template <typename Method> Method & getDataVariant(AggregatedDataVariants & variants); + +#define M(NAME, IS_TWO_LEVEL) \ + template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant<decltype(AggregatedDataVariants::NAME)::element_type>(AggregatedDataVariants & variants) { return *variants.NAME; } + +APPLY_FOR_AGGREGATED_VARIANTS(M) + +#undef M + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp index f1329573c8..3591303893 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp @@ -1,71 +1,71 @@ -#include <Interpreters/ClientInfo.h> -#include <IO/ReadBuffer.h> -#include <IO/WriteBuffer.h> -#include <IO/ReadHelpers.h> -#include <IO/WriteHelpers.h> -#include <Core/Defines.h> -#include <common/getFQDNOrHostName.h> -#include <unistd.h> - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - -void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) const -{ - if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) - throw Exception("Logical error: method ClientInfo::write is called for unsupported server revision", ErrorCodes::LOGICAL_ERROR); - - writeBinary(UInt8(query_kind), out); - if (empty()) - return; - - writeBinary(initial_user, out); - writeBinary(initial_query_id, out); - writeBinary(initial_address.toString(), out); - +#include <Interpreters/ClientInfo.h> +#include <IO/ReadBuffer.h> +#include <IO/WriteBuffer.h> +#include <IO/ReadHelpers.h> +#include <IO/WriteHelpers.h> +#include <Core/Defines.h> +#include <common/getFQDNOrHostName.h> +#include <unistd.h> + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) const +{ + if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) + throw Exception("Logical error: method ClientInfo::write is called for unsupported server revision", ErrorCodes::LOGICAL_ERROR); + + writeBinary(UInt8(query_kind), out); + if (empty()) + return; + + writeBinary(initial_user, out); + writeBinary(initial_query_id, out); + writeBinary(initial_address.toString(), out); + if (server_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME) writeBinary(initial_query_start_time_microseconds, out); - writeBinary(UInt8(interface), out); - - if (interface == Interface::TCP) - { - writeBinary(os_user, out); - writeBinary(client_hostname, out); - writeBinary(client_name, out); - writeVarUInt(client_version_major, out); - writeVarUInt(client_version_minor, out); + writeBinary(UInt8(interface), out); + + if (interface == Interface::TCP) + { + writeBinary(os_user, out); + writeBinary(client_hostname, out); + writeBinary(client_name, out); + writeVarUInt(client_version_major, out); + writeVarUInt(client_version_minor, out); writeVarUInt(client_tcp_protocol_version, out); - } - else if (interface == Interface::HTTP) - { - writeBinary(UInt8(http_method), out); - writeBinary(http_user_agent, out); + } + else if (interface == Interface::HTTP) + { + writeBinary(UInt8(http_method), out); + writeBinary(http_user_agent, out); if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_X_FORWARDED_FOR_IN_CLIENT_INFO) writeBinary(forwarded_for, out); if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_REFERER_IN_CLIENT_INFO) writeBinary(http_referer, out); - } - - if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO) - writeBinary(quota_key, out); - + } + + if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO) + writeBinary(quota_key, out); + if (server_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH) writeVarUInt(distributed_depth, out); - if (interface == Interface::TCP) - { - if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) - writeVarUInt(client_version_patch, out); - } + if (interface == Interface::TCP) + { + if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) + writeVarUInt(client_version_patch, out); + } if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) { @@ -86,74 +86,74 @@ void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) writeBinary(uint8_t(0), out); } } -} - - -void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision) -{ - if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) - throw Exception("Logical error: method ClientInfo::read is called for unsupported client revision", ErrorCodes::LOGICAL_ERROR); - - UInt8 read_query_kind = 0; - readBinary(read_query_kind, in); - query_kind = QueryKind(read_query_kind); - if (empty()) - return; - - readBinary(initial_user, in); - readBinary(initial_query_id, in); - - String initial_address_string; - readBinary(initial_address_string, in); - initial_address = Poco::Net::SocketAddress(initial_address_string); - +} + + +void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision) +{ + if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) + throw Exception("Logical error: method ClientInfo::read is called for unsupported client revision", ErrorCodes::LOGICAL_ERROR); + + UInt8 read_query_kind = 0; + readBinary(read_query_kind, in); + query_kind = QueryKind(read_query_kind); + if (empty()) + return; + + readBinary(initial_user, in); + readBinary(initial_query_id, in); + + String initial_address_string; + readBinary(initial_address_string, in); + initial_address = Poco::Net::SocketAddress(initial_address_string); + if (client_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME) { readBinary(initial_query_start_time_microseconds, in); initial_query_start_time = initial_query_start_time_microseconds / 1000000; } - UInt8 read_interface = 0; - readBinary(read_interface, in); - interface = Interface(read_interface); - - if (interface == Interface::TCP) - { - readBinary(os_user, in); - readBinary(client_hostname, in); - readBinary(client_name, in); - readVarUInt(client_version_major, in); - readVarUInt(client_version_minor, in); + UInt8 read_interface = 0; + readBinary(read_interface, in); + interface = Interface(read_interface); + + if (interface == Interface::TCP) + { + readBinary(os_user, in); + readBinary(client_hostname, in); + readBinary(client_name, in); + readVarUInt(client_version_major, in); + readVarUInt(client_version_minor, in); readVarUInt(client_tcp_protocol_version, in); - } - else if (interface == Interface::HTTP) - { - UInt8 read_http_method = 0; - readBinary(read_http_method, in); - http_method = HTTPMethod(read_http_method); - - readBinary(http_user_agent, in); + } + else if (interface == Interface::HTTP) + { + UInt8 read_http_method = 0; + readBinary(read_http_method, in); + http_method = HTTPMethod(read_http_method); + + readBinary(http_user_agent, in); if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_X_FORWARDED_FOR_IN_CLIENT_INFO) readBinary(forwarded_for, in); if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_REFERER_IN_CLIENT_INFO) readBinary(http_referer, in); - } - - if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO) - readBinary(quota_key, in); - + } + + if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO) + readBinary(quota_key, in); + if (client_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH) readVarUInt(distributed_depth, in); - if (interface == Interface::TCP) - { - if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) - readVarUInt(client_version_patch, in); - else + if (interface == Interface::TCP) + { + if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) + readVarUInt(client_version_patch, in); + else client_version_patch = client_tcp_protocol_version; - } + } if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) { @@ -167,32 +167,32 @@ void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision) readBinary(client_trace_context.trace_flags, in); } } -} - - -void ClientInfo::setInitialQuery() -{ - query_kind = QueryKind::INITIAL_QUERY; - fillOSUserHostNameAndVersionInfo(); +} + + +void ClientInfo::setInitialQuery() +{ + query_kind = QueryKind::INITIAL_QUERY; + fillOSUserHostNameAndVersionInfo(); client_name = "ClickHouse " + client_name; -} - - -void ClientInfo::fillOSUserHostNameAndVersionInfo() -{ - os_user.resize(256, '\0'); - if (0 == getlogin_r(os_user.data(), os_user.size() - 1)) - os_user.resize(strlen(os_user.c_str())); - else - os_user.clear(); /// Don't mind if we cannot determine user login. - - client_hostname = getFQDNOrHostName(); - - client_version_major = DBMS_VERSION_MAJOR; - client_version_minor = DBMS_VERSION_MINOR; - client_version_patch = DBMS_VERSION_PATCH; +} + + +void ClientInfo::fillOSUserHostNameAndVersionInfo() +{ + os_user.resize(256, '\0'); + if (0 == getlogin_r(os_user.data(), os_user.size() - 1)) + os_user.resize(strlen(os_user.c_str())); + else + os_user.clear(); /// Don't mind if we cannot determine user login. + + client_hostname = getFQDNOrHostName(); + + client_version_major = DBMS_VERSION_MAJOR; + client_version_minor = DBMS_VERSION_MINOR; + client_version_patch = DBMS_VERSION_PATCH; client_tcp_protocol_version = DBMS_TCP_PROTOCOL_VERSION; -} - - -} +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp index 7259c8a456..e5959273f0 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp @@ -1,87 +1,87 @@ -#include <Interpreters/Cluster.h> -#include <common/SimpleCache.h> -#include <Common/DNSResolver.h> -#include <Common/escapeForFileName.h> -#include <Common/isLocalAddress.h> -#include <Common/parseAddress.h> +#include <Interpreters/Cluster.h> +#include <common/SimpleCache.h> +#include <Common/DNSResolver.h> +#include <Common/escapeForFileName.h> +#include <Common/isLocalAddress.h> +#include <Common/parseAddress.h> #include <Common/Config/AbstractConfigurationComparison.h> #include <Core/Settings.h> -#include <IO/WriteHelpers.h> -#include <IO/ReadHelpers.h> -#include <Poco/Util/AbstractConfiguration.h> -#include <Poco/Util/Application.h> +#include <IO/WriteHelpers.h> +#include <IO/ReadHelpers.h> +#include <Poco/Util/AbstractConfiguration.h> +#include <Poco/Util/Application.h> #include <common/range.h> #include <boost/range/algorithm_ext/erase.hpp> - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int EXCESSIVE_ELEMENT_IN_CONFIG; - extern const int LOGICAL_ERROR; - extern const int SHARD_HAS_NO_CONNECTIONS; - extern const int SYNTAX_ERROR; -} - -namespace -{ - -/// Default shard weight. -constexpr UInt32 default_weight = 1; - -inline bool isLocalImpl(const Cluster::Address & address, const Poco::Net::SocketAddress & resolved_address, UInt16 clickhouse_port) -{ - /// If there is replica, for which: - /// - its port is the same that the server is listening; - /// - its host is resolved to set of addresses, one of which is the same as one of addresses of network interfaces of the server machine*; - /// then we must go to this shard without any inter-process communication. - /// - /// * - this criteria is somewhat approximate. - /// - /// Also, replica is considered non-local, if it has default database set - /// (only reason is to avoid query rewrite). - - return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port); -} - + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int EXCESSIVE_ELEMENT_IN_CONFIG; + extern const int LOGICAL_ERROR; + extern const int SHARD_HAS_NO_CONNECTIONS; + extern const int SYNTAX_ERROR; +} + +namespace +{ + +/// Default shard weight. +constexpr UInt32 default_weight = 1; + +inline bool isLocalImpl(const Cluster::Address & address, const Poco::Net::SocketAddress & resolved_address, UInt16 clickhouse_port) +{ + /// If there is replica, for which: + /// - its port is the same that the server is listening; + /// - its host is resolved to set of addresses, one of which is the same as one of addresses of network interfaces of the server machine*; + /// then we must go to this shard without any inter-process communication. + /// + /// * - this criteria is somewhat approximate. + /// + /// Also, replica is considered non-local, if it has default database set + /// (only reason is to avoid query rewrite). + + return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port); +} + void concatInsertPath(std::string & insert_path, const std::string & dir_name) { if (insert_path.empty()) insert_path = dir_name; else insert_path += "," + dir_name; -} - } -/// Implementation of Cluster::Address class - -std::optional<Poco::Net::SocketAddress> Cluster::Address::getResolvedAddress() const -{ - try - { - return DNSResolver::instance().resolveAddress(host_name, port); - } - catch (...) - { - /// Failure in DNS resolution in cluster initialization is Ok. - tryLogCurrentException("Cluster"); - return {}; - } -} - - -bool Cluster::Address::isLocal(UInt16 clickhouse_port) const -{ - if (auto resolved = getResolvedAddress()) - return isLocalImpl(*this, *resolved, clickhouse_port); - return false; -} - - -Cluster::Address::Address( +} + +/// Implementation of Cluster::Address class + +std::optional<Poco::Net::SocketAddress> Cluster::Address::getResolvedAddress() const +{ + try + { + return DNSResolver::instance().resolveAddress(host_name, port); + } + catch (...) + { + /// Failure in DNS resolution in cluster initialization is Ok. + tryLogCurrentException("Cluster"); + return {}; + } +} + + +bool Cluster::Address::isLocal(UInt16 clickhouse_port) const +{ + if (auto resolved = getResolvedAddress()) + return isLocalImpl(*this, *resolved, clickhouse_port); + return false; +} + + +Cluster::Address::Address( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const String & cluster_, @@ -92,27 +92,27 @@ Cluster::Address::Address( , cluster_secret(cluster_secret_) , shard_index(shard_index_) , replica_index(replica_index_) -{ - host_name = config.getString(config_prefix + ".host"); - port = static_cast<UInt16>(config.getInt(config_prefix + ".port")); - if (config.has(config_prefix + ".user")) - user_specified = true; - - user = config.getString(config_prefix + ".user", "default"); - password = config.getString(config_prefix + ".password", ""); - default_database = config.getString(config_prefix + ".default_database", ""); - secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable; - priority = config.getInt(config_prefix + ".priority", 1); - const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; - is_local = isLocal(config.getInt(port_type, 0)); +{ + host_name = config.getString(config_prefix + ".host"); + port = static_cast<UInt16>(config.getInt(config_prefix + ".port")); + if (config.has(config_prefix + ".user")) + user_specified = true; + + user = config.getString(config_prefix + ".user", "default"); + password = config.getString(config_prefix + ".password", ""); + default_database = config.getString(config_prefix + ".default_database", ""); + secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable; + priority = config.getInt(config_prefix + ".priority", 1); + const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; + is_local = isLocal(config.getInt(port_type, 0)); /// By default compression is disabled if address looks like localhost. /// NOTE: it's still enabled when interacting with servers on different port, but we don't want to complicate the logic. compression = config.getBool(config_prefix + ".compression", !is_local) ? Protocol::Compression::Enable : Protocol::Compression::Disable; -} - - +} + + Cluster::Address::Address( const String & host_port_, const String & user_, @@ -124,7 +124,7 @@ Cluster::Address::Address( UInt32 shard_index_, UInt32 replica_index_) : user(user_), password(password_) -{ +{ bool can_be_local = true; std::pair<std::string, UInt16> parsed_host_port; if (!treat_local_port_as_remote) @@ -147,158 +147,158 @@ Cluster::Address::Address( parsed_host_port = parseAddress(host_port_, clickhouse_port); } } - host_name = parsed_host_port.first; - port = parsed_host_port.second; - secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable; - priority = priority_; + host_name = parsed_host_port.first; + port = parsed_host_port.second; + secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable; + priority = priority_; is_local = can_be_local && isLocal(clickhouse_port); shard_index = shard_index_; replica_index = replica_index_; -} - - -String Cluster::Address::toString() const -{ - return toString(host_name, port); -} - -String Cluster::Address::toString(const String & host_name, UInt16 port) -{ - return escapeForFileName(host_name) + ':' + DB::toString(port); -} - -String Cluster::Address::readableString() const -{ - String res; - - /// If it looks like IPv6 address add braces to avoid ambiguity in ipv6_host:port notation - if (host_name.find_first_of(':') != std::string::npos && !host_name.empty() && host_name.back() != ']') - res += '[' + host_name + ']'; - else - res += host_name; - - res += ':' + DB::toString(port); - return res; -} - -std::pair<String, UInt16> Cluster::Address::fromString(const String & host_port_string) -{ - auto pos = host_port_string.find_last_of(':'); - if (pos == std::string::npos) - throw Exception("Incorrect <host>:<port> format " + host_port_string, ErrorCodes::SYNTAX_ERROR); - - return {unescapeForFileName(host_port_string.substr(0, pos)), parse<UInt16>(host_port_string.substr(pos + 1))}; -} - - -String Cluster::Address::toFullString(bool use_compact_format) const -{ - if (use_compact_format) - { - if (shard_index == 0 || replica_index == 0) - // shard_num/replica_num like in system.clusters table - throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR); - +} + + +String Cluster::Address::toString() const +{ + return toString(host_name, port); +} + +String Cluster::Address::toString(const String & host_name, UInt16 port) +{ + return escapeForFileName(host_name) + ':' + DB::toString(port); +} + +String Cluster::Address::readableString() const +{ + String res; + + /// If it looks like IPv6 address add braces to avoid ambiguity in ipv6_host:port notation + if (host_name.find_first_of(':') != std::string::npos && !host_name.empty() && host_name.back() != ']') + res += '[' + host_name + ']'; + else + res += host_name; + + res += ':' + DB::toString(port); + return res; +} + +std::pair<String, UInt16> Cluster::Address::fromString(const String & host_port_string) +{ + auto pos = host_port_string.find_last_of(':'); + if (pos == std::string::npos) + throw Exception("Incorrect <host>:<port> format " + host_port_string, ErrorCodes::SYNTAX_ERROR); + + return {unescapeForFileName(host_port_string.substr(0, pos)), parse<UInt16>(host_port_string.substr(pos + 1))}; +} + + +String Cluster::Address::toFullString(bool use_compact_format) const +{ + if (use_compact_format) + { + if (shard_index == 0 || replica_index == 0) + // shard_num/replica_num like in system.clusters table + throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR); + return fmt::format("shard{}_replica{}", shard_index, replica_index); - } - else - { - return - escapeForFileName(user) - + (password.empty() ? "" : (':' + escapeForFileName(password))) + '@' - + escapeForFileName(host_name) + ':' + std::to_string(port) - + (default_database.empty() ? "" : ('#' + escapeForFileName(default_database))) - + ((secure == Protocol::Secure::Enable) ? "+secure" : ""); - } -} - -Cluster::Address Cluster::Address::fromFullString(const String & full_string) -{ - const char * address_begin = full_string.data(); - const char * address_end = address_begin + full_string.size(); - - const char * user_pw_end = strchr(full_string.data(), '@'); - + } + else + { + return + escapeForFileName(user) + + (password.empty() ? "" : (':' + escapeForFileName(password))) + '@' + + escapeForFileName(host_name) + ':' + std::to_string(port) + + (default_database.empty() ? "" : ('#' + escapeForFileName(default_database))) + + ((secure == Protocol::Secure::Enable) ? "+secure" : ""); + } +} + +Cluster::Address Cluster::Address::fromFullString(const String & full_string) +{ + const char * address_begin = full_string.data(); + const char * address_end = address_begin + full_string.size(); + + const char * user_pw_end = strchr(full_string.data(), '@'); + /// parsing with the new shard{shard_index}[_replica{replica_index}] format if (!user_pw_end && full_string.starts_with("shard")) - { - const char * underscore = strchr(full_string.data(), '_'); - - Address address; - address.shard_index = parse<UInt32>(address_begin + strlen("shard")); - address.replica_index = underscore ? parse<UInt32>(underscore + strlen("_replica")) : 0; - - return address; - } - else - { - /// parsing with the old user[:password]@host:port#default_database format - /// This format is appeared to be inconvenient for the following reasons: - /// - credentials are exposed in file name; - /// - the file name can be too long. - - Protocol::Secure secure = Protocol::Secure::Disable; - const char * secure_tag = "+secure"; + { + const char * underscore = strchr(full_string.data(), '_'); + + Address address; + address.shard_index = parse<UInt32>(address_begin + strlen("shard")); + address.replica_index = underscore ? parse<UInt32>(underscore + strlen("_replica")) : 0; + + return address; + } + else + { + /// parsing with the old user[:password]@host:port#default_database format + /// This format is appeared to be inconvenient for the following reasons: + /// - credentials are exposed in file name; + /// - the file name can be too long. + + Protocol::Secure secure = Protocol::Secure::Disable; + const char * secure_tag = "+secure"; if (full_string.ends_with(secure_tag)) - { - address_end -= strlen(secure_tag); - secure = Protocol::Secure::Enable; - } - - const char * colon = strchr(full_string.data(), ':'); - if (!user_pw_end || !colon) - throw Exception("Incorrect user[:password]@host:port#default_database format " + full_string, ErrorCodes::SYNTAX_ERROR); - - const bool has_pw = colon < user_pw_end; - const char * host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon; - if (!host_end) - throw Exception("Incorrect address '" + full_string + "', it does not contain port", ErrorCodes::SYNTAX_ERROR); - - const char * has_db = strchr(full_string.data(), '#'); - const char * port_end = has_db ? has_db : address_end; - - Address address; - address.secure = secure; - address.port = parse<UInt16>(host_end + 1, port_end - (host_end + 1)); - address.host_name = unescapeForFileName(std::string(user_pw_end + 1, host_end)); - address.user = unescapeForFileName(std::string(address_begin, has_pw ? colon : user_pw_end)); - address.password = has_pw ? unescapeForFileName(std::string(colon + 1, user_pw_end)) : std::string(); - address.default_database = has_db ? unescapeForFileName(std::string(has_db + 1, address_end)) : std::string(); - // address.priority ignored - return address; - } -} - - -/// Implementation of Clusters class - + { + address_end -= strlen(secure_tag); + secure = Protocol::Secure::Enable; + } + + const char * colon = strchr(full_string.data(), ':'); + if (!user_pw_end || !colon) + throw Exception("Incorrect user[:password]@host:port#default_database format " + full_string, ErrorCodes::SYNTAX_ERROR); + + const bool has_pw = colon < user_pw_end; + const char * host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon; + if (!host_end) + throw Exception("Incorrect address '" + full_string + "', it does not contain port", ErrorCodes::SYNTAX_ERROR); + + const char * has_db = strchr(full_string.data(), '#'); + const char * port_end = has_db ? has_db : address_end; + + Address address; + address.secure = secure; + address.port = parse<UInt16>(host_end + 1, port_end - (host_end + 1)); + address.host_name = unescapeForFileName(std::string(user_pw_end + 1, host_end)); + address.user = unescapeForFileName(std::string(address_begin, has_pw ? colon : user_pw_end)); + address.password = has_pw ? unescapeForFileName(std::string(colon + 1, user_pw_end)) : std::string(); + address.default_database = has_db ? unescapeForFileName(std::string(has_db + 1, address_end)) : std::string(); + // address.priority ignored + return address; + } +} + + +/// Implementation of Clusters class + Clusters::Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix) -{ +{ updateClusters(config, settings, config_prefix); -} - - -ClusterPtr Clusters::getCluster(const std::string & cluster_name) const -{ - std::lock_guard lock(mutex); - - auto it = impl.find(cluster_name); - return (it != impl.end()) ? it->second : nullptr; -} - - -void Clusters::setCluster(const String & cluster_name, const std::shared_ptr<Cluster> & cluster) -{ - std::lock_guard lock(mutex); - impl[cluster_name] = cluster; -} - - +} + + +ClusterPtr Clusters::getCluster(const std::string & cluster_name) const +{ + std::lock_guard lock(mutex); + + auto it = impl.find(cluster_name); + return (it != impl.end()) ? it->second : nullptr; +} + + +void Clusters::setCluster(const String & cluster_name, const std::shared_ptr<Cluster> & cluster) +{ + std::lock_guard lock(mutex); + impl[cluster_name] = cluster; +} + + void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_config, const Settings & settings, const String & config_prefix, Poco::Util::AbstractConfiguration * old_config) -{ +{ Poco::Util::AbstractConfiguration::Keys new_config_keys; new_config.keys(config_prefix, new_config_keys); - + /// If old config is set, we will update only clusters with updated config. /// In this case, we first need to find clusters that were deleted from config. Poco::Util::AbstractConfiguration::Keys deleted_keys; @@ -314,11 +314,11 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf old_config_keys.begin(), old_config_keys.end(), new_config_keys.begin(), new_config_keys.end(), std::back_inserter(deleted_keys)); } - std::lock_guard lock(mutex); - + std::lock_guard lock(mutex); + /// If old config is set, remove deleted clusters from impl, otherwise just clear it. if (old_config) - { + { for (const auto & key : deleted_keys) impl.erase(key); } @@ -327,181 +327,181 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf for (const auto & key : new_config_keys) { - if (key.find('.') != String::npos) - throw Exception("Cluster names with dots are not supported: '" + key + "'", ErrorCodes::SYNTAX_ERROR); - + if (key.find('.') != String::npos) + throw Exception("Cluster names with dots are not supported: '" + key + "'", ErrorCodes::SYNTAX_ERROR); + /// If old config is set and cluster config wasn't changed, don't update this cluster. if (!old_config || !isSameConfiguration(new_config, *old_config, config_prefix + "." + key)) impl[key] = std::make_shared<Cluster>(new_config, settings, config_prefix, key); - } -} - -Clusters::Impl Clusters::getContainer() const -{ - std::lock_guard lock(mutex); - /// The following line copies container of shared_ptrs to return value under lock - return impl; -} - - -/// Implementation of `Cluster` class - + } +} + +Clusters::Impl Clusters::getContainer() const +{ + std::lock_guard lock(mutex); + /// The following line copies container of shared_ptrs to return value under lock + return impl; +} + + +/// Implementation of `Cluster` class + Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix_, const String & cluster_name) : name(cluster_name) -{ +{ auto config_prefix = config_prefix_ + "." + cluster_name; - Poco::Util::AbstractConfiguration::Keys config_keys; + Poco::Util::AbstractConfiguration::Keys config_keys; config.keys(config_prefix, config_keys); - + config_prefix += "."; secret = config.getString(config_prefix + "secret", ""); boost::range::remove_erase(config_keys, "secret"); - if (config_keys.empty()) + if (config_keys.empty()) throw Exception("No cluster elements (shard, node) specified in config at path " + config_prefix, ErrorCodes::SHARD_HAS_NO_CONNECTIONS); - - UInt32 current_shard_num = 1; - for (const auto & key : config_keys) - { + + UInt32 current_shard_num = 1; + for (const auto & key : config_keys) + { if (key.starts_with("node")) - { - /// Shard without replicas. - - Addresses addresses; - - const auto & prefix = config_prefix + key; - const auto weight = config.getInt(prefix + ".weight", default_weight); - + { + /// Shard without replicas. + + Addresses addresses; + + const auto & prefix = config_prefix + key; + const auto weight = config.getInt(prefix + ".weight", default_weight); + addresses.emplace_back(config, prefix, cluster_name, secret, current_shard_num, 1); - const auto & address = addresses.back(); - - ShardInfo info; - info.shard_num = current_shard_num; - info.weight = weight; - - if (address.is_local) - info.local_addresses.push_back(address); - + const auto & address = addresses.back(); + + ShardInfo info; + info.shard_num = current_shard_num; + info.weight = weight; + + if (address.is_local) + info.local_addresses.push_back(address); + auto pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, - address.host_name, address.port, - address.default_database, address.user, address.password, + settings.distributed_connections_pool_size, + address.host_name, address.port, + address.default_database, address.user, address.password, address.cluster, address.cluster_secret, - "server", address.compression, - address.secure, address.priority); - - info.pool = std::make_shared<ConnectionPoolWithFailover>( - ConnectionPoolPtrs{pool}, settings.load_balancing); - info.per_replica_pools = {std::move(pool)}; - - if (weight) - slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); - - shards_info.emplace_back(std::move(info)); - addresses_with_failover.emplace_back(std::move(addresses)); - } + "server", address.compression, + address.secure, address.priority); + + info.pool = std::make_shared<ConnectionPoolWithFailover>( + ConnectionPoolPtrs{pool}, settings.load_balancing); + info.per_replica_pools = {std::move(pool)}; + + if (weight) + slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); + + shards_info.emplace_back(std::move(info)); + addresses_with_failover.emplace_back(std::move(addresses)); + } else if (key.starts_with("shard")) - { - /// Shard with replicas. - - Poco::Util::AbstractConfiguration::Keys replica_keys; - config.keys(config_prefix + key, replica_keys); - - addresses_with_failover.emplace_back(); - Addresses & replica_addresses = addresses_with_failover.back(); - UInt32 current_replica_num = 1; - - const auto & partial_prefix = config_prefix + key + "."; - const auto weight = config.getUInt(partial_prefix + ".weight", default_weight); - - bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false); - + { + /// Shard with replicas. + + Poco::Util::AbstractConfiguration::Keys replica_keys; + config.keys(config_prefix + key, replica_keys); + + addresses_with_failover.emplace_back(); + Addresses & replica_addresses = addresses_with_failover.back(); + UInt32 current_replica_num = 1; + + const auto & partial_prefix = config_prefix + key + "."; + const auto weight = config.getUInt(partial_prefix + ".weight", default_weight); + + bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false); + ShardInfoInsertPathForInternalReplication insert_paths; /// "_all_replicas" is a marker that will be replaced with all replicas /// (for creating connections in the Distributed engine) insert_paths.compact = fmt::format("shard{}_all_replicas", current_shard_num); - - for (const auto & replica_key : replica_keys) - { + + for (const auto & replica_key : replica_keys) + { if (replica_key.starts_with("weight") ||replica_key.starts_with("internal_replication")) - continue; - + continue; + if (replica_key.starts_with("replica")) - { + { replica_addresses.emplace_back(config, partial_prefix + replica_key, cluster_name, secret, current_shard_num, current_replica_num); - ++current_replica_num; - - if (internal_replication) - { + ++current_replica_num; + + if (internal_replication) + { auto dir_name = replica_addresses.back().toFullString(/* use_compact_format= */ false); if (!replica_addresses.back().is_local) concatInsertPath(insert_paths.prefer_localhost_replica, dir_name); concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name); - } - } - else - throw Exception("Unknown element in config: " + replica_key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - - Addresses shard_local_addresses; - - ConnectionPoolPtrs all_replicas_pools; - all_replicas_pools.reserve(replica_addresses.size()); - - for (const auto & replica : replica_addresses) - { + } + } + else + throw Exception("Unknown element in config: " + replica_key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + + Addresses shard_local_addresses; + + ConnectionPoolPtrs all_replicas_pools; + all_replicas_pools.reserve(replica_addresses.size()); + + for (const auto & replica : replica_addresses) + { auto replica_pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, - replica.host_name, replica.port, - replica.default_database, replica.user, replica.password, + settings.distributed_connections_pool_size, + replica.host_name, replica.port, + replica.default_database, replica.user, replica.password, replica.cluster, replica.cluster_secret, - "server", replica.compression, - replica.secure, replica.priority); - - all_replicas_pools.emplace_back(replica_pool); - if (replica.is_local) - shard_local_addresses.push_back(replica); - } - - ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>( - all_replicas_pools, settings.load_balancing, - settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap); - - if (weight) - slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); - - shards_info.push_back({ + "server", replica.compression, + replica.secure, replica.priority); + + all_replicas_pools.emplace_back(replica_pool); + if (replica.is_local) + shard_local_addresses.push_back(replica); + } + + ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>( + all_replicas_pools, settings.load_balancing, + settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap); + + if (weight) + slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); + + shards_info.push_back({ std::move(insert_paths), - current_shard_num, - weight, - std::move(shard_local_addresses), - std::move(shard_pool), - std::move(all_replicas_pools), - internal_replication - }); - } - else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - - ++current_shard_num; - } - - if (addresses_with_failover.empty()) - throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); - - initMisc(); -} - - + current_shard_num, + weight, + std::move(shard_local_addresses), + std::move(shard_pool), + std::move(all_replicas_pools), + internal_replication + }); + } + else + throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + + ++current_shard_num; + } + + if (addresses_with_failover.empty()) + throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + + initMisc(); +} + + Cluster::Cluster( const Settings & settings, const std::vector<std::vector<String>> & names, @@ -512,13 +512,13 @@ Cluster::Cluster( bool treat_local_port_as_remote, bool secure, Int64 priority) -{ - UInt32 current_shard_num = 1; - - for (const auto & shard : names) - { - Addresses current; - for (const auto & replica : shard) +{ + UInt32 current_shard_num = 1; + + for (const auto & shard : names) + { + Addresses current; + for (const auto & replica : shard) current.emplace_back( replica, username, @@ -529,163 +529,163 @@ Cluster::Cluster( priority, current_shard_num, current.size() + 1); - - addresses_with_failover.emplace_back(current); - - Addresses shard_local_addresses; - ConnectionPoolPtrs all_replicas; - all_replicas.reserve(current.size()); - - for (const auto & replica : current) - { + + addresses_with_failover.emplace_back(current); + + Addresses shard_local_addresses; + ConnectionPoolPtrs all_replicas; + all_replicas.reserve(current.size()); + + for (const auto & replica : current) + { auto replica_pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, - replica.host_name, replica.port, - replica.default_database, replica.user, replica.password, + settings.distributed_connections_pool_size, + replica.host_name, replica.port, + replica.default_database, replica.user, replica.password, replica.cluster, replica.cluster_secret, - "server", replica.compression, replica.secure, replica.priority); - all_replicas.emplace_back(replica_pool); - if (replica.is_local && !treat_local_as_remote) - shard_local_addresses.push_back(replica); - } - - ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>( - all_replicas, settings.load_balancing, - settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap); - - slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size()); - shards_info.push_back({ + "server", replica.compression, replica.secure, replica.priority); + all_replicas.emplace_back(replica_pool); + if (replica.is_local && !treat_local_as_remote) + shard_local_addresses.push_back(replica); + } + + ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>( + all_replicas, settings.load_balancing, + settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap); + + slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size()); + shards_info.push_back({ {}, // insert_path_for_internal_replication - current_shard_num, - default_weight, - std::move(shard_local_addresses), - std::move(shard_pool), - std::move(all_replicas), - false // has_internal_replication - }); - ++current_shard_num; - } - - initMisc(); -} - - + current_shard_num, + default_weight, + std::move(shard_local_addresses), + std::move(shard_pool), + std::move(all_replicas), + false // has_internal_replication + }); + ++current_shard_num; + } + + initMisc(); +} + + Poco::Timespan Cluster::saturate(Poco::Timespan v, Poco::Timespan limit) -{ - if (limit.totalMicroseconds() == 0) - return v; - else - return (v > limit) ? limit : v; -} - - -void Cluster::initMisc() -{ - for (const auto & shard_info : shards_info) - { - if (!shard_info.isLocal() && !shard_info.hasRemoteConnections()) - throw Exception("Found shard without any specified connection", - ErrorCodes::SHARD_HAS_NO_CONNECTIONS); - } - - for (const auto & shard_info : shards_info) - { - if (shard_info.isLocal()) - ++local_shard_count; - else - ++remote_shard_count; - } - - for (auto & shard_info : shards_info) - { - if (!shard_info.isLocal()) - { - any_remote_shard_info = &shard_info; - break; - } - } -} - -std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings) const -{ - return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings)}; -} - -std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const -{ - return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, {index}) }; -} - -std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector<size_t> & indices) const -{ - return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) }; -} - -Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings) -{ - if (from.addresses_with_failover.empty()) - throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR); - +{ + if (limit.totalMicroseconds() == 0) + return v; + else + return (v > limit) ? limit : v; +} + + +void Cluster::initMisc() +{ + for (const auto & shard_info : shards_info) + { + if (!shard_info.isLocal() && !shard_info.hasRemoteConnections()) + throw Exception("Found shard without any specified connection", + ErrorCodes::SHARD_HAS_NO_CONNECTIONS); + } + + for (const auto & shard_info : shards_info) + { + if (shard_info.isLocal()) + ++local_shard_count; + else + ++remote_shard_count; + } + + for (auto & shard_info : shards_info) + { + if (!shard_info.isLocal()) + { + any_remote_shard_info = &shard_info; + break; + } + } +} + +std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings) const +{ + return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings)}; +} + +std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const +{ + return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, {index}) }; +} + +std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector<size_t> & indices) const +{ + return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) }; +} + +Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings) +{ + if (from.addresses_with_failover.empty()) + throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR); + UInt32 shard_num = 0; - std::set<std::pair<String, int>> unique_hosts; + std::set<std::pair<String, int>> unique_hosts; for (size_t shard_index : collections::range(0, from.shards_info.size())) - { - const auto & replicas = from.addresses_with_failover[shard_index]; - for (const auto & address : replicas) - { - if (!unique_hosts.emplace(address.host_name, address.port).second) - continue; /// Duplicate host, skip. - - ShardInfo info; + { + const auto & replicas = from.addresses_with_failover[shard_index]; + for (const auto & address : replicas) + { + if (!unique_hosts.emplace(address.host_name, address.port).second) + continue; /// Duplicate host, skip. + + ShardInfo info; info.shard_num = ++shard_num; - if (address.is_local) - info.local_addresses.push_back(address); - + if (address.is_local) + info.local_addresses.push_back(address); + auto pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, - address.host_name, - address.port, - address.default_database, - address.user, - address.password, + settings.distributed_connections_pool_size, + address.host_name, + address.port, + address.default_database, + address.user, + address.password, address.cluster, address.cluster_secret, - "server", - address.compression, - address.secure, - address.priority); - - info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing); - info.per_replica_pools = {std::move(pool)}; - - addresses_with_failover.emplace_back(Addresses{address}); - shards_info.emplace_back(std::move(info)); - } - } - - initMisc(); -} - - -Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector<size_t> & indices) -{ - for (size_t index : indices) - { - shards_info.emplace_back(from.shards_info.at(index)); - - if (!from.addresses_with_failover.empty()) - addresses_with_failover.emplace_back(from.addresses_with_failover.at(index)); - } - - initMisc(); -} - + "server", + address.compression, + address.secure, + address.priority); + + info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing); + info.per_replica_pools = {std::move(pool)}; + + addresses_with_failover.emplace_back(Addresses{address}); + shards_info.emplace_back(std::move(info)); + } + } + + initMisc(); +} + + +Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector<size_t> & indices) +{ + for (size_t index : indices) + { + shards_info.emplace_back(from.shards_info.at(index)); + + if (!from.addresses_with_failover.empty()) + addresses_with_failover.emplace_back(from.addresses_with_failover.at(index)); + } + + initMisc(); +} + const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const -{ - if (!has_internal_replication) - throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR); - +{ + if (!has_internal_replication) + throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR); + const auto & paths = insert_path_for_internal_replication; if (!use_compact_format) { @@ -697,12 +697,12 @@ const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool pr } return path; } - else + else { return paths.compact; } -} - +} + bool Cluster::maybeCrossReplication() const { /// Cluster can be used for cross-replication if some replicas have different default database names, @@ -718,6 +718,6 @@ bool Cluster::maybeCrossReplication() const return true; return false; -} +} } diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h index e2312932b4..a77eb3983d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h @@ -1,10 +1,10 @@ -#pragma once - -#include <Client/ConnectionPool.h> -#include <Client/ConnectionPoolWithFailover.h> +#pragma once + +#include <Client/ConnectionPool.h> +#include <Client/ConnectionPoolWithFailover.h> + +#include <Poco/Net/SocketAddress.h> -#include <Poco/Net/SocketAddress.h> - #include <map> namespace Poco @@ -15,33 +15,33 @@ namespace Poco } } -namespace DB -{ +namespace DB +{ struct Settings; -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/// Cluster contains connection pools to each node -/// With the local nodes, the connection is not established, but the request is executed directly. -/// Therefore we store only the number of local nodes -/// In the config, the cluster includes nodes <node> or <shard> -class Cluster -{ -public: +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/// Cluster contains connection pools to each node +/// With the local nodes, the connection is not established, but the request is executed directly. +/// Therefore we store only the number of local nodes +/// In the config, the cluster includes nodes <node> or <shard> +class Cluster +{ +public: Cluster(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix_, const String & cluster_name); - - /// Construct a cluster by the names of shards and replicas. - /// Local are treated as well as remote ones if treat_local_as_remote is true. + + /// Construct a cluster by the names of shards and replicas. + /// Local are treated as well as remote ones if treat_local_as_remote is true. /// Local are also treated as remote if treat_local_port_as_remote is set and the local address includes a port - /// 'clickhouse_port' - port that this server instance listen for queries. - /// This parameter is needed only to check that some address is local (points to ourself). + /// 'clickhouse_port' - port that this server instance listen for queries. + /// This parameter is needed only to check that some address is local (points to ourself). /// /// Used for remote() function. Cluster( @@ -54,111 +54,111 @@ public: bool treat_local_port_as_remote, bool secure = false, Int64 priority = 1); - - Cluster(const Cluster &)= delete; - Cluster & operator=(const Cluster &) = delete; - - /// is used to set a limit on the size of the timeout + + Cluster(const Cluster &)= delete; + Cluster & operator=(const Cluster &) = delete; + + /// is used to set a limit on the size of the timeout static Poco::Timespan saturate(Poco::Timespan v, Poco::Timespan limit); - -public: + +public: using SlotToShard = std::vector<UInt64>; - struct Address - { - /** In configuration file, - * addresses are located either in <node> elements: - * <node> - * <host>example01-01-1</host> - * <port>9000</port> - * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed --> - * </node> - * ... - * or in <shard> and inside in <replica> elements: - * <shard> - * <replica> - * <host>example01-01-1</host> - * <port>9000</port> - * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed --> - * </replica> - * </shard> - */ - - String host_name; - UInt16 port; - String user; - String password; + struct Address + { + /** In configuration file, + * addresses are located either in <node> elements: + * <node> + * <host>example01-01-1</host> + * <port>9000</port> + * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed --> + * </node> + * ... + * or in <shard> and inside in <replica> elements: + * <shard> + * <replica> + * <host>example01-01-1</host> + * <port>9000</port> + * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed --> + * </replica> + * </shard> + */ + + String host_name; + UInt16 port; + String user; + String password; /// For inter-server authorization String cluster; String cluster_secret; - UInt32 shard_index{}; /// shard serial number in configuration file, starting from 1. - UInt32 replica_index{}; /// replica serial number in this shard, starting from 1; zero means no replicas. - - /// This database is selected when no database is specified for Distributed table - String default_database; - /// The locality is determined at the initialization, and is not changed even if DNS is changed - bool is_local = false; - bool user_specified = false; - - Protocol::Compression compression = Protocol::Compression::Enable; - Protocol::Secure secure = Protocol::Secure::Disable; - - Int64 priority = 1; - - Address() = default; - - Address( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, + UInt32 shard_index{}; /// shard serial number in configuration file, starting from 1. + UInt32 replica_index{}; /// replica serial number in this shard, starting from 1; zero means no replicas. + + /// This database is selected when no database is specified for Distributed table + String default_database; + /// The locality is determined at the initialization, and is not changed even if DNS is changed + bool is_local = false; + bool user_specified = false; + + Protocol::Compression compression = Protocol::Compression::Enable; + Protocol::Secure secure = Protocol::Secure::Disable; + + Int64 priority = 1; + + Address() = default; + + Address( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, const String & cluster_, const String & cluster_secret_, - UInt32 shard_index_ = 0, - UInt32 replica_index_ = 0); - - Address( - const String & host_port_, - const String & user_, - const String & password_, - UInt16 clickhouse_port, + UInt32 shard_index_ = 0, + UInt32 replica_index_ = 0); + + Address( + const String & host_port_, + const String & user_, + const String & password_, + UInt16 clickhouse_port, bool treat_local_port_as_remote, - bool secure_ = false, + bool secure_ = false, Int64 priority_ = 1, UInt32 shard_index_ = 0, UInt32 replica_index_ = 0); - - /// Returns 'escaped_host_name:port' - String toString() const; - - /// Returns 'host_name:port' - String readableString() const; - - static String toString(const String & host_name, UInt16 port); - - static std::pair<String, UInt16> fromString(const String & host_port_string); - - /// Returns escaped shard{shard_index}_replica{replica_index} or escaped - /// user:password@resolved_host_address:resolved_host_port#default_database - /// depending on use_compact_format flag - String toFullString(bool use_compact_format) const; - - /// Returns address with only shard index and replica index or full address without shard index and replica index - static Address fromFullString(const String & address_full_string); - - /// Returns resolved address if it does resolve. - std::optional<Poco::Net::SocketAddress> getResolvedAddress() const; - - auto tuple() const { return std::tie(host_name, port, secure, user, password, default_database); } - bool operator==(const Address & other) const { return tuple() == other.tuple(); } - - private: - bool isLocal(UInt16 clickhouse_port) const; - }; - - using Addresses = std::vector<Address>; - using AddressesWithFailover = std::vector<Addresses>; - + + /// Returns 'escaped_host_name:port' + String toString() const; + + /// Returns 'host_name:port' + String readableString() const; + + static String toString(const String & host_name, UInt16 port); + + static std::pair<String, UInt16> fromString(const String & host_port_string); + + /// Returns escaped shard{shard_index}_replica{replica_index} or escaped + /// user:password@resolved_host_address:resolved_host_port#default_database + /// depending on use_compact_format flag + String toFullString(bool use_compact_format) const; + + /// Returns address with only shard index and replica index or full address without shard index and replica index + static Address fromFullString(const String & address_full_string); + + /// Returns resolved address if it does resolve. + std::optional<Poco::Net::SocketAddress> getResolvedAddress() const; + + auto tuple() const { return std::tie(host_name, port, secure, user, password, default_database); } + bool operator==(const Address & other) const { return tuple() == other.tuple(); } + + private: + bool isLocal(UInt16 clickhouse_port) const; + }; + + using Addresses = std::vector<Address>; + using AddressesWithFailover = std::vector<Addresses>; + /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication /// /// Contains different path for permutations of: @@ -178,129 +178,129 @@ public: std::string compact; }; - struct ShardInfo - { - public: - bool isLocal() const { return !local_addresses.empty(); } - bool hasRemoteConnections() const { return local_addresses.size() != per_replica_pools.size(); } - size_t getLocalNodeCount() const { return local_addresses.size(); } - bool hasInternalReplication() const { return has_internal_replication; } - /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication + struct ShardInfo + { + public: + bool isLocal() const { return !local_addresses.empty(); } + bool hasRemoteConnections() const { return local_addresses.size() != per_replica_pools.size(); } + size_t getLocalNodeCount() const { return local_addresses.size(); } + bool hasInternalReplication() const { return has_internal_replication; } + /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const; - - public: + + public: ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication; - /// Number of the shard, the indexation begins with 1 - UInt32 shard_num = 0; - UInt32 weight = 1; - Addresses local_addresses; - /// nullptr if there are no remote addresses - ConnectionPoolWithFailoverPtr pool; - /// Connection pool for each replica, contains nullptr for local replicas - ConnectionPoolPtrs per_replica_pools; - bool has_internal_replication = false; - }; - - using ShardsInfo = std::vector<ShardInfo>; - - String getHashOfAddresses() const { return hash_of_addresses; } - const ShardsInfo & getShardsInfo() const { return shards_info; } - const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; } - - const ShardInfo & getAnyShardInfo() const - { - if (shards_info.empty()) - throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR); - return shards_info.front(); - } - - /// The number of remote shards. - size_t getRemoteShardCount() const { return remote_shard_count; } - - /// The number of clickhouse nodes located locally - /// we access the local nodes directly. - size_t getLocalShardCount() const { return local_shard_count; } - - /// The number of all shards. - size_t getShardCount() const { return shards_info.size(); } - + /// Number of the shard, the indexation begins with 1 + UInt32 shard_num = 0; + UInt32 weight = 1; + Addresses local_addresses; + /// nullptr if there are no remote addresses + ConnectionPoolWithFailoverPtr pool; + /// Connection pool for each replica, contains nullptr for local replicas + ConnectionPoolPtrs per_replica_pools; + bool has_internal_replication = false; + }; + + using ShardsInfo = std::vector<ShardInfo>; + + String getHashOfAddresses() const { return hash_of_addresses; } + const ShardsInfo & getShardsInfo() const { return shards_info; } + const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; } + + const ShardInfo & getAnyShardInfo() const + { + if (shards_info.empty()) + throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR); + return shards_info.front(); + } + + /// The number of remote shards. + size_t getRemoteShardCount() const { return remote_shard_count; } + + /// The number of clickhouse nodes located locally + /// we access the local nodes directly. + size_t getLocalShardCount() const { return local_shard_count; } + + /// The number of all shards. + size_t getShardCount() const { return shards_info.size(); } + const String & getSecret() const { return secret; } - /// Get a subcluster consisting of one shard - index by count (from 0) of the shard of this cluster. - std::unique_ptr<Cluster> getClusterWithSingleShard(size_t index) const; - - /// Get a subcluster consisting of one or multiple shards - indexes by count (from 0) of the shard of this cluster. - std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const; - - /// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards. - std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const; - + /// Get a subcluster consisting of one shard - index by count (from 0) of the shard of this cluster. + std::unique_ptr<Cluster> getClusterWithSingleShard(size_t index) const; + + /// Get a subcluster consisting of one or multiple shards - indexes by count (from 0) of the shard of this cluster. + std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const; + + /// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards. + std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const; + /// Returns false if cluster configuration doesn't allow to use it for cross-replication. /// NOTE: true does not mean, that it's actually a cross-replication cluster. bool maybeCrossReplication() const; -private: - SlotToShard slot_to_shard; - -public: - const SlotToShard & getSlotToShard() const { return slot_to_shard; } - -private: - void initMisc(); - - /// For getClusterWithMultipleShards implementation. - struct SubclusterTag {}; - Cluster(SubclusterTag, const Cluster & from, const std::vector<size_t> & indices); - - /// For getClusterWithReplicasAsShards implementation - struct ReplicasAsShardsTag {}; - Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings); - +private: + SlotToShard slot_to_shard; + +public: + const SlotToShard & getSlotToShard() const { return slot_to_shard; } + +private: + void initMisc(); + + /// For getClusterWithMultipleShards implementation. + struct SubclusterTag {}; + Cluster(SubclusterTag, const Cluster & from, const std::vector<size_t> & indices); + + /// For getClusterWithReplicasAsShards implementation + struct ReplicasAsShardsTag {}; + Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings); + /// Inter-server secret String secret; - String hash_of_addresses; - /// Description of the cluster shards. - ShardsInfo shards_info; - /// Any remote shard. - ShardInfo * any_remote_shard_info = nullptr; - - /// Non-empty is either addresses or addresses_with_failover. - /// The size and order of the elements in the corresponding array corresponds to shards_info. - - /// An array of shards. For each shard, an array of replica addresses (servers that are considered identical). - AddressesWithFailover addresses_with_failover; - - size_t remote_shard_count = 0; - size_t local_shard_count = 0; + String hash_of_addresses; + /// Description of the cluster shards. + ShardsInfo shards_info; + /// Any remote shard. + ShardInfo * any_remote_shard_info = nullptr; + + /// Non-empty is either addresses or addresses_with_failover. + /// The size and order of the elements in the corresponding array corresponds to shards_info. + + /// An array of shards. For each shard, an array of replica addresses (servers that are considered identical). + AddressesWithFailover addresses_with_failover; + + size_t remote_shard_count = 0; + size_t local_shard_count = 0; String name; -}; - -using ClusterPtr = std::shared_ptr<Cluster>; - - -class Clusters -{ -public: +}; + +using ClusterPtr = std::shared_ptr<Cluster>; + + +class Clusters +{ +public: Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix = "remote_servers"); - - Clusters(const Clusters &) = delete; - Clusters & operator=(const Clusters &) = delete; - - ClusterPtr getCluster(const std::string & cluster_name) const; - void setCluster(const String & cluster_name, const ClusterPtr & cluster); - + + Clusters(const Clusters &) = delete; + Clusters & operator=(const Clusters &) = delete; + + ClusterPtr getCluster(const std::string & cluster_name) const; + void setCluster(const String & cluster_name, const ClusterPtr & cluster); + void updateClusters(const Poco::Util::AbstractConfiguration & new_config, const Settings & settings, const String & config_prefix, Poco::Util::AbstractConfiguration * old_config = nullptr); - -public: - using Impl = std::map<String, ClusterPtr>; - - Impl getContainer() const; - -protected: - Impl impl; - mutable std::mutex mutex; -}; - -} + +public: + using Impl = std::map<String, ClusterPtr>; + + Impl getContainer() const; + +protected: + Impl impl; + mutable std::mutex mutex; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h index 49cd7bafc3..2d0041bd96 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h @@ -1,37 +1,37 @@ -#pragma once - +#pragma once + #include <Columns/FilterDescription.h> -#include <DataStreams/IBlockStream_fwd.h> -#include <Interpreters/AggregateDescription.h> +#include <DataStreams/IBlockStream_fwd.h> +#include <Interpreters/AggregateDescription.h> #include <Interpreters/DatabaseCatalog.h> #include <Interpreters/SubqueryForSet.h> #include <Interpreters/TreeRewriter.h> #include <Interpreters/WindowDescription.h> #include <Interpreters/join_common.h> -#include <Parsers/IAST_fwd.h> -#include <Storages/IStorage_fwd.h> -#include <Storages/SelectQueryInfo.h> - -namespace DB -{ - -class Block; +#include <Parsers/IAST_fwd.h> +#include <Storages/IStorage_fwd.h> +#include <Storages/SelectQueryInfo.h> + +namespace DB +{ + +class Block; struct Settings; - -struct ExpressionActionsChain; -class ExpressionActions; -using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; -using ManyExpressionActions = std::vector<ExpressionActionsPtr>; - -struct ASTTableJoin; -class IJoin; -using JoinPtr = std::shared_ptr<IJoin>; - -class ASTFunction; -class ASTExpressionList; -class ASTSelectQuery; -struct ASTTablesInSelectQueryElement; - + +struct ExpressionActionsChain; +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; +using ManyExpressionActions = std::vector<ExpressionActionsPtr>; + +struct ASTTableJoin; +class IJoin; +using JoinPtr = std::shared_ptr<IJoin>; + +class ASTFunction; +class ASTExpressionList; +class ASTSelectQuery; +struct ASTTablesInSelectQueryElement; + struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>; @@ -41,95 +41,95 @@ using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>; class ActionsDAG; using ActionsDAGPtr = std::shared_ptr<ActionsDAG>; -/// Create columns in block or return false if not possible -bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false); - -/// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately. -struct ExpressionAnalyzerData -{ +/// Create columns in block or return false if not possible +bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false); + +/// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately. +struct ExpressionAnalyzerData +{ ~ExpressionAnalyzerData(); - SubqueriesForSets subqueries_for_sets; - PreparedSets prepared_sets; - + SubqueriesForSets subqueries_for_sets; + PreparedSets prepared_sets; + std::unique_ptr<QueryPlan> joined_plan; /// Columns after ARRAY JOIN. If there is no ARRAY JOIN, it's source_columns. NamesAndTypesList columns_after_array_join; /// Columns after Columns after ARRAY JOIN and JOIN. If there is no JOIN, it's columns_after_array_join. NamesAndTypesList columns_after_join; - /// Columns after ARRAY JOIN, JOIN, and/or aggregation. - NamesAndTypesList aggregated_columns; + /// Columns after ARRAY JOIN, JOIN, and/or aggregation. + NamesAndTypesList aggregated_columns; /// Columns after window functions. NamesAndTypesList columns_after_window; - - bool has_aggregation = false; - NamesAndTypesList aggregation_keys; + + bool has_aggregation = false; + NamesAndTypesList aggregation_keys; bool has_const_aggregation_keys = false; - AggregateDescriptions aggregate_descriptions; - + AggregateDescriptions aggregate_descriptions; + WindowDescriptions window_descriptions; NamesAndTypesList window_columns; - bool has_global_subqueries = false; - - /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. - TemporaryTablesMapping external_tables; -}; - - -/** Transforms an expression from a syntax tree into a sequence of actions to execute it. - * - * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. - */ + bool has_global_subqueries = false; + + /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. + TemporaryTablesMapping external_tables; +}; + + +/** Transforms an expression from a syntax tree into a sequence of actions to execute it. + * + * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. + */ class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::noncopyable, protected WithContext -{ -private: - /// Extracts settings to enlight which are used (and avoid copy of others). - struct ExtractedSettings - { - const bool use_index_for_in_with_subqueries; - const SizeLimits size_limits_for_set; +{ +private: + /// Extracts settings to enlight which are used (and avoid copy of others). + struct ExtractedSettings + { + const bool use_index_for_in_with_subqueries; + const SizeLimits size_limits_for_set; const UInt64 distributed_group_by_no_merge; - + ExtractedSettings(const Settings & settings_); - }; - -public: - /// Ctor for non-select queries. Generally its usage is: - /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions(); + }; + +public: + /// Ctor for non-select queries. Generally its usage is: + /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions(); ExpressionAnalyzer(const ASTPtr & query_, const TreeRewriterResultPtr & syntax_analyzer_result_, ContextPtr context_) : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false, {}, {}) { } - + ~ExpressionAnalyzer(); - void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types); - - /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression. - /// If add_aliases, only the calculated values in the desired order and add aliases. - /// If also project_result, than only aliases remain in the output block. - /// Otherwise, only temporary columns will be deleted from the block. + void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types); + + /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression. + /// If add_aliases, only the calculated values in the desired order and add aliases. + /// If also project_result, than only aliases remain in the output block. + /// Otherwise, only temporary columns will be deleted from the block. ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true); ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no); - - /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants. - /// Does not execute subqueries. + + /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants. + /// Does not execute subqueries. ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {}); - - /** Sets that require a subquery to be create. - * Only the sets needed to perform actions returned from already executed `append*` or `getActions`. - * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions` - * and create all the returned sets before performing the actions. - */ + + /** Sets that require a subquery to be create. + * Only the sets needed to perform actions returned from already executed `append*` or `getActions`. + * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions` + * and create all the returned sets before performing the actions. + */ SubqueriesForSets & getSubqueriesForSets() { return subqueries_for_sets; } - + PreparedSets & getPreparedSets() { return prepared_sets; } - /// Get intermediates for tests - const ExpressionAnalyzerData & getAnalyzedData() const { return *this; } - + /// Get intermediates for tests + const ExpressionAnalyzerData & getAnalyzedData() const { return *this; } + /// A list of windows for window functions. const WindowDescriptions & windowDescriptions() const { return window_descriptions; } @@ -148,76 +148,76 @@ public: */ SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name); -protected: - ExpressionAnalyzer( - const ASTPtr & query_, +protected: + ExpressionAnalyzer( + const ASTPtr & query_, const TreeRewriterResultPtr & syntax_analyzer_result_, ContextPtr context_, - size_t subquery_depth_, + size_t subquery_depth_, bool do_global_, SubqueriesForSets subqueries_for_sets_, PreparedSets prepared_sets_); - - ASTPtr query; - const ExtractedSettings settings; - size_t subquery_depth; - + + ASTPtr query; + const ExtractedSettings settings; + size_t subquery_depth; + TreeRewriterResultPtr syntax; - - const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. - const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; } - const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; } - const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; } - /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. - void initGlobalSubqueriesAndExternalTables(bool do_global); - + + const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. + const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; } + const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; } + const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; } + /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. + void initGlobalSubqueriesAndExternalTables(bool do_global); + ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; - + void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); - - /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in - * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the - * prepared sets would not be applicable for MergeTree index optimization. - */ + + /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in + * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the + * prepared sets would not be applicable for MergeTree index optimization. + */ void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); - + void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); - /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, - * Create a set of columns aggregated_columns resulting after the aggregation, if any, - * or after all the actions that are normally performed before aggregation. - * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. - */ - void analyzeAggregation(); + /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, + * Create a set of columns aggregated_columns resulting after the aggregation, if any, + * or after all the actions that are normally performed before aggregation. + * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. + */ + void analyzeAggregation(); bool makeAggregateDescriptions(ActionsDAGPtr & actions); - - const ASTSelectQuery * getSelectQuery() const; - + + const ASTSelectQuery * getSelectQuery() const; + bool isRemoteStorage() const { return syntax->is_remote_storage; } -}; - -class SelectQueryExpressionAnalyzer; - -/// Result of SelectQueryExpressionAnalyzer: expressions for InterpreterSelectQuery -struct ExpressionAnalysisResult -{ +}; + +class SelectQueryExpressionAnalyzer; + +/// Result of SelectQueryExpressionAnalyzer: expressions for InterpreterSelectQuery +struct ExpressionAnalysisResult +{ std::string dump() const; - /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. - bool first_stage = false; - /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing. - bool second_stage = false; - - bool need_aggregate = false; - bool has_order_by = false; + /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. + bool first_stage = false; + /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing. + bool second_stage = false; + + bool need_aggregate = false; + bool has_order_by = false; bool has_window = false; - + String where_column_name; - bool remove_where_filter = false; - bool optimize_read_in_order = false; - bool optimize_aggregation_in_order = false; + bool remove_where_filter = false; + bool optimize_read_in_order = false; + bool optimize_aggregation_in_order = false; bool join_has_delayed_stream = false; - + ActionsDAGPtr before_array_join; ArrayJoinActionPtr array_join; ActionsDAGPtr before_join; @@ -230,63 +230,63 @@ struct ExpressionAnalysisResult ActionsDAGPtr before_order_by; ActionsDAGPtr before_limit_by; ActionsDAGPtr final_projection; - + /// Columns from the SELECT list, before renaming them to aliases. Used to /// perform SELECT DISTINCT. - Names selected_columns; - + Names selected_columns; + /// Columns to read from storage if any. Names required_columns; - /// Columns will be removed after prewhere actions execution. + /// Columns will be removed after prewhere actions execution. NameSet columns_to_remove_after_prewhere; - + PrewhereInfoPtr prewhere_info; FilterDAGInfoPtr filter_info; - ConstantFilterDescription prewhere_constant_filter_description; - ConstantFilterDescription where_constant_filter_description; - /// Actions by every element of ORDER BY - ManyExpressionActions order_by_elements_actions; - ManyExpressionActions group_by_elements_actions; - - ExpressionAnalysisResult() = default; - - ExpressionAnalysisResult( - SelectQueryExpressionAnalyzer & query_analyzer, + ConstantFilterDescription prewhere_constant_filter_description; + ConstantFilterDescription where_constant_filter_description; + /// Actions by every element of ORDER BY + ManyExpressionActions order_by_elements_actions; + ManyExpressionActions group_by_elements_actions; + + ExpressionAnalysisResult() = default; + + ExpressionAnalysisResult( + SelectQueryExpressionAnalyzer & query_analyzer, const StorageMetadataPtr & metadata_snapshot, - bool first_stage, - bool second_stage, - bool only_types, + bool first_stage, + bool second_stage, + bool only_types, const FilterDAGInfoPtr & filter_info, - const Block & source_header); - - /// Filter for row-level security. - bool hasFilter() const { return filter_info.get(); } - - bool hasJoin() const { return join.get(); } - bool hasPrewhere() const { return prewhere_info.get(); } - bool hasWhere() const { return before_where.get(); } - bool hasHaving() const { return before_having.get(); } - bool hasLimitBy() const { return before_limit_by.get(); } - - void removeExtraColumns() const; - void checkActions() const; + const Block & source_header); + + /// Filter for row-level security. + bool hasFilter() const { return filter_info.get(); } + + bool hasJoin() const { return join.get(); } + bool hasPrewhere() const { return prewhere_info.get(); } + bool hasWhere() const { return before_where.get(); } + bool hasHaving() const { return before_having.get(); } + bool hasLimitBy() const { return before_limit_by.get(); } + + void removeExtraColumns() const; + void checkActions() const; void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query); -}; - -/// SelectQuery specific ExpressionAnalyzer part. -class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer -{ -public: - friend struct ExpressionAnalysisResult; - - SelectQueryExpressionAnalyzer( - const ASTPtr & query_, +}; + +/// SelectQuery specific ExpressionAnalyzer part. +class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer +{ +public: + friend struct ExpressionAnalysisResult; + + SelectQueryExpressionAnalyzer( + const ASTPtr & query_, const TreeRewriterResultPtr & syntax_analyzer_result_, ContextPtr context_, const StorageMetadataPtr & metadata_snapshot_, - const NameSet & required_result_columns_ = {}, - bool do_global_ = false, + const NameSet & required_result_columns_ = {}, + bool do_global_ = false, const SelectQueryOptions & options_ = {}, SubqueriesForSets subqueries_for_sets_ = {}, PreparedSets prepared_sets_ = {}) @@ -301,79 +301,79 @@ public: , metadata_snapshot(metadata_snapshot_) , required_result_columns(required_result_columns_) , query_options(options_) - { - } - - /// Does the expression have aggregate functions or a GROUP BY or HAVING section. - bool hasAggregation() const { return has_aggregation; } + { + } + + /// Does the expression have aggregate functions or a GROUP BY or HAVING section. + bool hasAggregation() const { return has_aggregation; } bool hasWindow() const { return !syntax->window_function_asts.empty(); } - bool hasGlobalSubqueries() { return has_global_subqueries; } - bool hasTableJoin() const { return syntax->ast_join; } - - const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; } + bool hasGlobalSubqueries() { return has_global_subqueries; } + bool hasTableJoin() const { return syntax->ast_join; } + + const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; } bool hasConstAggregationKeys() const { return has_const_aggregation_keys; } - const AggregateDescriptions & aggregates() const { return aggregate_descriptions; } - - const PreparedSets & getPreparedSets() const { return prepared_sets; } + const AggregateDescriptions & aggregates() const { return aggregate_descriptions; } + + const PreparedSets & getPreparedSets() const { return prepared_sets; } std::unique_ptr<QueryPlan> getJoinedPlan(); - - /// Tables that will need to be sent to remote servers for distributed query processing. - const TemporaryTablesMapping & getExternalTables() const { return external_tables; } - + + /// Tables that will need to be sent to remote servers for distributed query processing. + const TemporaryTablesMapping & getExternalTables() const { return external_tables; } + ActionsDAGPtr simpleSelectActions(); - - /// These appends are public only for tests - void appendSelect(ExpressionActionsChain & chain, bool only_types); - /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. + + /// These appends are public only for tests + void appendSelect(ExpressionActionsChain & chain, bool only_types); + /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const; - + /// Create Set-s that we make from IN section to use index on them. void makeSetsForIndex(const ASTPtr & node); -private: +private: StorageMetadataPtr metadata_snapshot; - /// If non-empty, ignore all expressions not from this list. - NameSet required_result_columns; - SelectQueryOptions query_options; - + /// If non-empty, ignore all expressions not from this list. + NameSet required_result_columns; + SelectQueryOptions query_options; + JoinPtr makeTableJoin( const ASTTablesInSelectQueryElement & join_element, const ColumnsWithTypeAndName & left_sample_columns); - - const ASTSelectQuery * getAggregatingQuery() const; - - /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query. - * - * Example usage: - * ExpressionActionsChain chain; - * analyzer.appendWhere(chain); - * chain.addStep(); - * analyzer.appendSelect(chain); - * analyzer.appendOrderBy(chain); - * chain.finalize(); - * - * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way - * shouldn't be executed, they are only needed to get a list of columns with their types. - */ - - /// Before aggregation: + + const ASTSelectQuery * getAggregatingQuery() const; + + /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query. + * + * Example usage: + * ExpressionActionsChain chain; + * analyzer.appendWhere(chain); + * chain.addStep(); + * analyzer.appendSelect(chain); + * analyzer.appendOrderBy(chain); + * chain.finalize(); + * + * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way + * shouldn't be executed, they are only needed to get a list of columns with their types. + */ + + /// Before aggregation: ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types); - bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); + bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); JoinPtr appendJoin(ExpressionActionsChain & chain); - /// remove_filter is set in ExpressionActionsChain::finalize(); - /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). + /// remove_filter is set in ExpressionActionsChain::finalize(); + /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); - bool appendWhere(ExpressionActionsChain & chain, bool only_types); - bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &); - void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); + bool appendWhere(ExpressionActionsChain & chain, bool only_types); + bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &); + void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); void appendWindowFunctionsArguments(ExpressionActionsChain & chain, bool only_types); - - /// After aggregation: - bool appendHaving(ExpressionActionsChain & chain, bool only_types); - /// appendSelect + + /// After aggregation: + bool appendHaving(ExpressionActionsChain & chain, bool only_types); + /// appendSelect ActionsDAGPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); - bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); - /// appendProjectResult -}; - -} + bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); + /// appendProjectResult +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h index bbc8b67d4f..2215402e1d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h @@ -1,51 +1,51 @@ -#pragma once - -#include <memory> -#include <vector> - -#include <Core/Names.h> -#include <Columns/IColumn.h> - -namespace DB -{ - -class Block; -struct ExtraBlock; +#pragma once + +#include <memory> +#include <vector> + +#include <Core/Names.h> +#include <Columns/IColumn.h> + +namespace DB +{ + +class Block; +struct ExtraBlock; using ExtraBlockPtr = std::shared_ptr<ExtraBlock>; - + class TableJoin; class NotJoinedBlocks; -class IJoin -{ -public: - virtual ~IJoin() = default; - +class IJoin +{ +public: + virtual ~IJoin() = default; + virtual const TableJoin & getTableJoin() const = 0; - /// Add block of data from right hand of JOIN. - /// @returns false, if some limit was exceeded and you should not insert more data. - virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; - - /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock). - /// Could be called from different threads in parallel. - virtual void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) = 0; - + /// Add block of data from right hand of JOIN. + /// @returns false, if some limit was exceeded and you should not insert more data. + virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; + + /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock). + /// Could be called from different threads in parallel. + virtual void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) = 0; + /// Set/Get totals for right table - virtual void setTotals(const Block & block) = 0; + virtual void setTotals(const Block & block) = 0; virtual const Block & getTotals() const = 0; - - virtual size_t getTotalRowCount() const = 0; - virtual size_t getTotalByteCount() const = 0; + + virtual size_t getTotalRowCount() const = 0; + virtual size_t getTotalByteCount() const = 0; virtual bool alwaysReturnsEmptySet() const = 0; - + /// StorageJoin/Dictionary is already filled. No need to call addJoinedBlock. /// Different query plan is used for such joins. virtual bool isFilled() const { return false; } virtual std::shared_ptr<NotJoinedBlocks> getNonJoinedBlocks(const Block &, UInt64) const = 0; -}; - -using JoinPtr = std::shared_ptr<IJoin>; - -} +}; + +using JoinPtr = std::shared_ptr<IJoin>; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp index 16b59e6622..a61b0d6dd9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp @@ -1,69 +1,69 @@ -#include "InternalTextLogsQueue.h" -#include <DataTypes/DataTypeDateTime.h> -#include <DataTypes/DataTypeString.h> -#include <DataTypes/DataTypeEnum.h> -#include <DataTypes/DataTypesNumber.h> -#include <common/logger_useful.h> - -#include <Poco/Message.h> - - -namespace DB -{ - -InternalTextLogsQueue::InternalTextLogsQueue() - : ConcurrentBoundedQueue<MutableColumns>(std::numeric_limits<int>::max()), - max_priority(Poco::Message::Priority::PRIO_INFORMATION) {} - - -Block InternalTextLogsQueue::getSampleBlock() -{ - return Block { - {std::make_shared<DataTypeDateTime>(), "event_time"}, - {std::make_shared<DataTypeUInt32>(), "event_time_microseconds"}, - {std::make_shared<DataTypeString>(), "host_name"}, - {std::make_shared<DataTypeString>(), "query_id"}, - {std::make_shared<DataTypeUInt64>(), "thread_id"}, - {std::make_shared<DataTypeInt8>(), "priority"}, - {std::make_shared<DataTypeString>(), "source"}, - {std::make_shared<DataTypeString>(), "text"} - }; -} - -MutableColumns InternalTextLogsQueue::getSampleColumns() -{ - static Block sample_block = getSampleBlock(); - return sample_block.cloneEmptyColumns(); -} - -void InternalTextLogsQueue::pushBlock(Block && log_block) -{ - static Block sample_block = getSampleBlock(); - - if (blocksHaveEqualStructure(sample_block, log_block)) - emplace(log_block.mutateColumns()); - else - LOG_WARNING(&Poco::Logger::get("InternalTextLogsQueue"), "Log block have different structure"); -} - -const char * InternalTextLogsQueue::getPriorityName(int priority) -{ - /// See Poco::Message::Priority - - static constexpr const char * const PRIORITIES[] = - { - "Unknown", - "Fatal", - "Critical", - "Error", - "Warning", - "Notice", - "Information", - "Debug", - "Trace" - }; - - return (priority >= 1 && priority <= 8) ? PRIORITIES[priority] : PRIORITIES[0]; -} - -} +#include "InternalTextLogsQueue.h" +#include <DataTypes/DataTypeDateTime.h> +#include <DataTypes/DataTypeString.h> +#include <DataTypes/DataTypeEnum.h> +#include <DataTypes/DataTypesNumber.h> +#include <common/logger_useful.h> + +#include <Poco/Message.h> + + +namespace DB +{ + +InternalTextLogsQueue::InternalTextLogsQueue() + : ConcurrentBoundedQueue<MutableColumns>(std::numeric_limits<int>::max()), + max_priority(Poco::Message::Priority::PRIO_INFORMATION) {} + + +Block InternalTextLogsQueue::getSampleBlock() +{ + return Block { + {std::make_shared<DataTypeDateTime>(), "event_time"}, + {std::make_shared<DataTypeUInt32>(), "event_time_microseconds"}, + {std::make_shared<DataTypeString>(), "host_name"}, + {std::make_shared<DataTypeString>(), "query_id"}, + {std::make_shared<DataTypeUInt64>(), "thread_id"}, + {std::make_shared<DataTypeInt8>(), "priority"}, + {std::make_shared<DataTypeString>(), "source"}, + {std::make_shared<DataTypeString>(), "text"} + }; +} + +MutableColumns InternalTextLogsQueue::getSampleColumns() +{ + static Block sample_block = getSampleBlock(); + return sample_block.cloneEmptyColumns(); +} + +void InternalTextLogsQueue::pushBlock(Block && log_block) +{ + static Block sample_block = getSampleBlock(); + + if (blocksHaveEqualStructure(sample_block, log_block)) + emplace(log_block.mutateColumns()); + else + LOG_WARNING(&Poco::Logger::get("InternalTextLogsQueue"), "Log block have different structure"); +} + +const char * InternalTextLogsQueue::getPriorityName(int priority) +{ + /// See Poco::Message::Priority + + static constexpr const char * const PRIORITIES[] = + { + "Unknown", + "Fatal", + "Critical", + "Error", + "Warning", + "Notice", + "Information", + "Debug", + "Trace" + }; + + return (priority >= 1 && priority <= 8) ? PRIORITIES[priority] : PRIORITIES[0]; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h index af3f0c624f..28841598d3 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h @@ -1,31 +1,31 @@ -#pragma once -#include <Common/ConcurrentBoundedQueue.h> -#include <Core/Block.h> - - -namespace DB -{ - -class InternalTextLogsQueue : public ConcurrentBoundedQueue<MutableColumns> -{ -public: - /// You should not push logs in the queue if their priority greater max_priority - int max_priority; - - InternalTextLogsQueue(); - - static Block getSampleBlock(); - static MutableColumns getSampleColumns(); - - /// Is used to pass block from remote server to the client - void pushBlock(Block && log_block); - - /// Converts priority from Poco::Message::Priority to a string - static const char * getPriorityName(int priority); -}; - -using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>; - -} - - +#pragma once +#include <Common/ConcurrentBoundedQueue.h> +#include <Core/Block.h> + + +namespace DB +{ + +class InternalTextLogsQueue : public ConcurrentBoundedQueue<MutableColumns> +{ +public: + /// You should not push logs in the queue if their priority greater max_priority + int max_priority; + + InternalTextLogsQueue(); + + static Block getSampleBlock(); + static MutableColumns getSampleColumns(); + + /// Is used to pass block from remote server to the client + void pushBlock(Block && log_block); + + /// Converts priority from Poco::Message::Priority to a string + static const char * getPriorityName(int priority); +}; + +using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>; + +} + + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h index e6043473d4..f486752e19 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h @@ -1,71 +1,71 @@ -#pragma once - -#include <Parsers/IAST.h> -#include <DataTypes/IDataType.h> -#include <memory> -#include <unordered_map> -#include <DataTypes/DataTypeLowCardinality.h> - +#pragma once -namespace DB -{ - -struct PreparedSetKey -{ - /// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired - /// data types of set elements (two different Sets can be required for two tuples with the same contents - /// if left hand sides of the IN operators have different types). - static PreparedSetKey forLiteral(const IAST & ast, DataTypes types_) - { +#include <Parsers/IAST.h> +#include <DataTypes/IDataType.h> +#include <memory> +#include <unordered_map> +#include <DataTypes/DataTypeLowCardinality.h> + + +namespace DB +{ + +struct PreparedSetKey +{ + /// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired + /// data types of set elements (two different Sets can be required for two tuples with the same contents + /// if left hand sides of the IN operators have different types). + static PreparedSetKey forLiteral(const IAST & ast, DataTypes types_) + { /// Remove LowCardinality types from type list because Set doesn't support LowCardinality keys now, - /// just converts LowCardinality to ordinary types. - for (auto & type : types_) - type = recursiveRemoveLowCardinality(type); - - PreparedSetKey key; - key.ast_hash = ast.getTreeHash(); - key.types = std::move(types_); - return key; - } - - /// Prepared sets for subqueries are indexed only by the AST contents because the type of the resulting - /// set is fully determined by the subquery. - static PreparedSetKey forSubquery(const IAST & ast) - { - PreparedSetKey key; - key.ast_hash = ast.getTreeHash(); - return key; - } - - IAST::Hash ast_hash; - DataTypes types; /// Empty for subqueries. - - bool operator==(const PreparedSetKey & other) const - { - if (ast_hash != other.ast_hash) - return false; - - if (types.size() != other.types.size()) - return false; - - for (size_t i = 0; i < types.size(); ++i) - { - if (!types[i]->equals(*other.types[i])) - return false; - } - - return true; - } - - struct Hash - { - UInt64 operator()(const PreparedSetKey & key) const { return key.ast_hash.first; } - }; -}; - -class Set; -using SetPtr = std::shared_ptr<Set>; - -using PreparedSets = std::unordered_map<PreparedSetKey, SetPtr, PreparedSetKey::Hash>; - -} + /// just converts LowCardinality to ordinary types. + for (auto & type : types_) + type = recursiveRemoveLowCardinality(type); + + PreparedSetKey key; + key.ast_hash = ast.getTreeHash(); + key.types = std::move(types_); + return key; + } + + /// Prepared sets for subqueries are indexed only by the AST contents because the type of the resulting + /// set is fully determined by the subquery. + static PreparedSetKey forSubquery(const IAST & ast) + { + PreparedSetKey key; + key.ast_hash = ast.getTreeHash(); + return key; + } + + IAST::Hash ast_hash; + DataTypes types; /// Empty for subqueries. + + bool operator==(const PreparedSetKey & other) const + { + if (ast_hash != other.ast_hash) + return false; + + if (types.size() != other.types.size()) + return false; + + for (size_t i = 0; i < types.size(); ++i) + { + if (!types[i]->equals(*other.types[i])) + return false; + } + + return true; + } + + struct Hash + { + UInt64 operator()(const PreparedSetKey & key) const { return key.ast_hash.first; } + }; +}; + +class Set; +using SetPtr = std::shared_ptr<Set>; + +using PreparedSets = std::unordered_map<PreparedSetKey, SetPtr, PreparedSetKey::Hash>; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp index fe9b6806d8..2e8f986ca6 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp @@ -1,43 +1,43 @@ -#include "ProfileEventsExt.h" -#include <Common/typeid_cast.h> -#include <Columns/ColumnsNumber.h> -#include <Columns/ColumnString.h> -#include <Columns/ColumnArray.h> +#include "ProfileEventsExt.h" +#include <Common/typeid_cast.h> +#include <Columns/ColumnsNumber.h> +#include <Columns/ColumnString.h> +#include <Columns/ColumnArray.h> #include <Columns/ColumnMap.h> -#include <DataTypes/DataTypesNumber.h> -#include <DataTypes/DataTypeString.h> -#include <DataTypes/DataTypeArray.h> - -namespace ProfileEvents -{ - -/// Put implementation here to avoid extra linking dependencies for clickhouse_common_io +#include <DataTypes/DataTypesNumber.h> +#include <DataTypes/DataTypeString.h> +#include <DataTypes/DataTypeArray.h> + +namespace ProfileEvents +{ + +/// Put implementation here to avoid extra linking dependencies for clickhouse_common_io void dumpToMapColumn(const Counters & counters, DB::IColumn * column, bool nonzero_only) -{ +{ auto * column_map = column ? &typeid_cast<DB::ColumnMap &>(*column) : nullptr; if (!column_map) return; - + auto & offsets = column_map->getNestedColumn().getOffsets(); auto & tuple_column = column_map->getNestedData(); auto & key_column = tuple_column.getColumn(0); auto & value_column = tuple_column.getColumn(1); - size_t size = 0; - for (Event event = 0; event < Counters::num_counters; ++event) - { - UInt64 value = counters[event].load(std::memory_order_relaxed); - - if (nonzero_only && 0 == value) - continue; - + size_t size = 0; + for (Event event = 0; event < Counters::num_counters; ++event) + { + UInt64 value = counters[event].load(std::memory_order_relaxed); + + if (nonzero_only && 0 == value) + continue; + const char * desc = ProfileEvents::getName(event); key_column.insertData(desc, strlen(desc)); value_column.insert(value); size++; - } - + } + offsets.push_back(offsets.back() + size); -} - -} +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h index 56f5c81bc4..7d513f0cd0 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h @@ -1,12 +1,12 @@ -#pragma once -#include <Common/ProfileEvents.h> -#include <Columns/IColumn.h> - - -namespace ProfileEvents -{ - +#pragma once +#include <Common/ProfileEvents.h> +#include <Columns/IColumn.h> + + +namespace ProfileEvents +{ + /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters & counters, DB::IColumn * column, bool nonzero_only = true); - -} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp index fc3226743f..2cbb963444 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp @@ -2,43 +2,43 @@ #include <Columns/ColumnArray.h> #include <Columns/ColumnFixedString.h> #include <Columns/ColumnString.h> -#include <Columns/ColumnsNumber.h> +#include <Columns/ColumnsNumber.h> #include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeDateTime64.h> #include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeNullable.h> -#include <DataTypes/DataTypeDateTime.h> +#include <DataTypes/DataTypeDateTime.h> #include <DataTypes/DataTypeEnum.h> #include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeMap.h> #include <DataTypes/DataTypeLowCardinality.h> -#include <DataTypes/DataTypeString.h> +#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypesNumber.h> #include <Interpreters/ProfileEventsExt.h> -#include <Interpreters/QueryLog.h> -#include <Poco/Net/IPAddress.h> +#include <Interpreters/QueryLog.h> +#include <Poco/Net/IPAddress.h> #include <Common/ClickHouseRevision.h> #include <Common/IPv6ToBinary.h> #include <Common/ProfileEvents.h> #include <Common/typeid_cast.h> - - -namespace DB -{ - + + +namespace DB +{ + NamesAndTypesList QueryLogElement::getNamesAndTypes() -{ - auto query_status_datatype = std::make_shared<DataTypeEnum8>( - DataTypeEnum8::Values - { - {"QueryStart", static_cast<Int8>(QUERY_START)}, - {"QueryFinish", static_cast<Int8>(QUERY_FINISH)}, - {"ExceptionBeforeStart", static_cast<Int8>(EXCEPTION_BEFORE_START)}, - {"ExceptionWhileProcessing", static_cast<Int8>(EXCEPTION_WHILE_PROCESSING)} - }); - - return - { +{ + auto query_status_datatype = std::make_shared<DataTypeEnum8>( + DataTypeEnum8::Values + { + {"QueryStart", static_cast<Int8>(QUERY_START)}, + {"QueryFinish", static_cast<Int8>(QUERY_FINISH)}, + {"ExceptionBeforeStart", static_cast<Int8>(EXCEPTION_BEFORE_START)}, + {"ExceptionWhileProcessing", static_cast<Int8>(EXCEPTION_WHILE_PROCESSING)} + }); + + return + { {"type", std::move(query_status_datatype)}, {"event_date", std::make_shared<DataTypeDate>()}, {"event_time", std::make_shared<DataTypeDateTime>()}, @@ -46,7 +46,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"query_start_time", std::make_shared<DataTypeDateTime>()}, {"query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)}, {"query_duration_ms", std::make_shared<DataTypeUInt64>()}, - + {"read_rows", std::make_shared<DataTypeUInt64>()}, {"read_bytes", std::make_shared<DataTypeUInt64>()}, {"written_rows", std::make_shared<DataTypeUInt64>()}, @@ -54,7 +54,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"result_rows", std::make_shared<DataTypeUInt64>()}, {"result_bytes", std::make_shared<DataTypeUInt64>()}, {"memory_usage", std::make_shared<DataTypeUInt64>()}, - + {"current_database", std::make_shared<DataTypeString>()}, {"query", std::make_shared<DataTypeString>()}, {"formatted_query", std::make_shared<DataTypeString>()}, @@ -73,7 +73,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"exception_code", std::make_shared<DataTypeInt32>()}, {"exception", std::make_shared<DataTypeString>()}, {"stack_trace", std::make_shared<DataTypeString>()}, - + {"is_initial_query", std::make_shared<DataTypeUInt8>()}, {"user", std::make_shared<DataTypeString>()}, {"query_id", std::make_shared<DataTypeString>()}, @@ -98,9 +98,9 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"http_referer", std::make_shared<DataTypeString>()}, {"forwarded_for", std::make_shared<DataTypeString>()}, {"quota_key", std::make_shared<DataTypeString>()}, - + {"revision", std::make_shared<DataTypeUInt32>()}, - + {"log_comment", std::make_shared<DataTypeString>()}, {"thread_ids", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())}, @@ -116,10 +116,10 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"used_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, {"used_storages", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, {"used_table_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())} - }; + }; + +} -} - NamesAndAliases QueryLogElement::getNamesAndAliases() { return @@ -130,30 +130,30 @@ NamesAndAliases QueryLogElement::getNamesAndAliases() {"Settings.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapValues(Settings)"} }; } - -void QueryLogElement::appendToBlock(MutableColumns & columns) const -{ - size_t i = 0; - - columns[i++]->insert(type); + +void QueryLogElement::appendToBlock(MutableColumns & columns) const +{ + size_t i = 0; + + columns[i++]->insert(type); columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); - columns[i++]->insert(event_time); + columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); - columns[i++]->insert(query_start_time); + columns[i++]->insert(query_start_time); columns[i++]->insert(query_start_time_microseconds); - columns[i++]->insert(query_duration_ms); - - columns[i++]->insert(read_rows); - columns[i++]->insert(read_bytes); - columns[i++]->insert(written_rows); - columns[i++]->insert(written_bytes); - columns[i++]->insert(result_rows); - columns[i++]->insert(result_bytes); - - columns[i++]->insert(memory_usage); - + columns[i++]->insert(query_duration_ms); + + columns[i++]->insert(read_rows); + columns[i++]->insert(read_bytes); + columns[i++]->insert(written_rows); + columns[i++]->insert(written_bytes); + columns[i++]->insert(result_rows); + columns[i++]->insert(result_bytes); + + columns[i++]->insert(memory_usage); + columns[i++]->insertData(current_database.data(), current_database.size()); - columns[i++]->insertData(query.data(), query.size()); + columns[i++]->insertData(query.data(), query.size()); columns[i++]->insertData(formatted_query.data(), formatted_query.size()); columns[i++]->insert(normalized_query_hash); columns[i++]->insertData(query_kind.data(), query_kind.size()); @@ -184,43 +184,43 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const fill_column(query_views, column_views); } - columns[i++]->insert(exception_code); - columns[i++]->insertData(exception.data(), exception.size()); - columns[i++]->insertData(stack_trace.data(), stack_trace.size()); - - appendClientInfo(client_info, columns, i); - + columns[i++]->insert(exception_code); + columns[i++]->insertData(exception.data(), exception.size()); + columns[i++]->insertData(stack_trace.data(), stack_trace.size()); + + appendClientInfo(client_info, columns, i); + columns[i++]->insert(ClickHouseRevision::getVersionRevision()); - + columns[i++]->insertData(log_comment.data(), log_comment.size()); - { - Array threads_array; - threads_array.reserve(thread_ids.size()); - for (const UInt64 thread_id : thread_ids) - threads_array.emplace_back(thread_id); - columns[i++]->insert(threads_array); - } - - if (profile_counters) - { + { + Array threads_array; + threads_array.reserve(thread_ids.size()); + for (const UInt64 thread_id : thread_ids) + threads_array.emplace_back(thread_id); + columns[i++]->insert(threads_array); + } + + if (profile_counters) + { auto * column = columns[i++].get(); ProfileEvents::dumpToMapColumn(*profile_counters, column, true); - } - else - { - columns[i++]->insertDefault(); - } - - if (query_settings) - { + } + else + { + columns[i++]->insertDefault(); + } + + if (query_settings) + { auto * column = columns[i++].get(); query_settings->dumpToMapColumn(column, true); - } - else - { - columns[i++]->insertDefault(); - } + } + else + { + columns[i++]->insertDefault(); + } { auto & column_aggregate_function_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]); @@ -255,39 +255,39 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const fill_column(used_storages, column_storage_factory_objects); fill_column(used_table_functions, column_table_function_factory_objects); } -} - -void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i) -{ - columns[i++]->insert(client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY); - - columns[i++]->insert(client_info.current_user); - columns[i++]->insert(client_info.current_query_id); - columns[i++]->insertData(IPv6ToBinary(client_info.current_address.host()).data(), 16); - columns[i++]->insert(client_info.current_address.port()); - - columns[i++]->insert(client_info.initial_user); - columns[i++]->insert(client_info.initial_query_id); - columns[i++]->insertData(IPv6ToBinary(client_info.initial_address.host()).data(), 16); - columns[i++]->insert(client_info.initial_address.port()); +} + +void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i) +{ + columns[i++]->insert(client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY); + + columns[i++]->insert(client_info.current_user); + columns[i++]->insert(client_info.current_query_id); + columns[i++]->insertData(IPv6ToBinary(client_info.current_address.host()).data(), 16); + columns[i++]->insert(client_info.current_address.port()); + + columns[i++]->insert(client_info.initial_user); + columns[i++]->insert(client_info.initial_query_id); + columns[i++]->insertData(IPv6ToBinary(client_info.initial_address.host()).data(), 16); + columns[i++]->insert(client_info.initial_address.port()); columns[i++]->insert(client_info.initial_query_start_time); columns[i++]->insert(client_info.initial_query_start_time_microseconds); - - columns[i++]->insert(UInt64(client_info.interface)); - - columns[i++]->insert(client_info.os_user); - columns[i++]->insert(client_info.client_hostname); - columns[i++]->insert(client_info.client_name); + + columns[i++]->insert(UInt64(client_info.interface)); + + columns[i++]->insert(client_info.os_user); + columns[i++]->insert(client_info.client_hostname); + columns[i++]->insert(client_info.client_name); columns[i++]->insert(client_info.client_tcp_protocol_version); - columns[i++]->insert(client_info.client_version_major); - columns[i++]->insert(client_info.client_version_minor); - columns[i++]->insert(client_info.client_version_patch); - - columns[i++]->insert(UInt64(client_info.http_method)); - columns[i++]->insert(client_info.http_user_agent); + columns[i++]->insert(client_info.client_version_major); + columns[i++]->insert(client_info.client_version_minor); + columns[i++]->insert(client_info.client_version_patch); + + columns[i++]->insert(UInt64(client_info.http_method)); + columns[i++]->insert(client_info.http_user_agent); columns[i++]->insert(client_info.http_referer); columns[i++]->insert(client_info.forwarded_for); - - columns[i++]->insert(client_info.quota_key); -} -} + + columns[i++]->insert(client_info.quota_key); +} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h index 93c8388f9d..2713febe1b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h @@ -1,59 +1,59 @@ -#pragma once - +#pragma once + #include <Core/NamesAndAliases.h> -#include <Interpreters/SystemLog.h> -#include <Interpreters/ClientInfo.h> - -namespace ProfileEvents -{ - class Counters; -} - - -namespace DB -{ - - -/** Allows to log information about queries execution: - * - info about start of query execution; - * - performance metrics (are set at the end of query execution); - * - info about errors of query execution. - */ - -/// A struct which will be inserted as row into query_log table -struct QueryLogElement -{ - using Type = QueryLogElementType; - - Type type = QUERY_START; - - /// Depending on the type of query and type of stage, not all the fields may be filled. - - time_t event_time{}; +#include <Interpreters/SystemLog.h> +#include <Interpreters/ClientInfo.h> + +namespace ProfileEvents +{ + class Counters; +} + + +namespace DB +{ + + +/** Allows to log information about queries execution: + * - info about start of query execution; + * - performance metrics (are set at the end of query execution); + * - info about errors of query execution. + */ + +/// A struct which will be inserted as row into query_log table +struct QueryLogElement +{ + using Type = QueryLogElementType; + + Type type = QUERY_START; + + /// Depending on the type of query and type of stage, not all the fields may be filled. + + time_t event_time{}; Decimal64 event_time_microseconds{}; - time_t query_start_time{}; + time_t query_start_time{}; Decimal64 query_start_time_microseconds{}; - UInt64 query_duration_ms{}; - - /// The data fetched from DB to execute the query - UInt64 read_rows{}; - UInt64 read_bytes{}; - - /// The data written to DB - UInt64 written_rows{}; - UInt64 written_bytes{}; - - /// The data sent to the client - UInt64 result_rows{}; - UInt64 result_bytes{}; - - UInt64 memory_usage{}; - + UInt64 query_duration_ms{}; + + /// The data fetched from DB to execute the query + UInt64 read_rows{}; + UInt64 read_bytes{}; + + /// The data written to DB + UInt64 written_rows{}; + UInt64 written_bytes{}; + + /// The data sent to the client + UInt64 result_rows{}; + UInt64 result_bytes{}; + + UInt64 memory_usage{}; + String current_database; - String query; + String query; String formatted_query; UInt64 normalized_query_hash{}; - + String query_kind; std::set<String> query_databases; std::set<String> query_tables; @@ -71,32 +71,32 @@ struct QueryLogElement std::unordered_set<String> used_storages; std::unordered_set<String> used_table_functions; - Int32 exception_code{}; // because ErrorCodes are int - String exception; - String stack_trace; - - ClientInfo client_info; - + Int32 exception_code{}; // because ErrorCodes are int + String exception; + String stack_trace; + + ClientInfo client_info; + String log_comment; - std::vector<UInt64> thread_ids; - std::shared_ptr<ProfileEvents::Counters> profile_counters; - std::shared_ptr<Settings> query_settings; - - static std::string name() { return "QueryLog"; } - + std::vector<UInt64> thread_ids; + std::shared_ptr<ProfileEvents::Counters> profile_counters; + std::shared_ptr<Settings> query_settings; + + static std::string name() { return "QueryLog"; } + static NamesAndTypesList getNamesAndTypes(); static NamesAndAliases getNamesAndAliases(); - void appendToBlock(MutableColumns & columns) const; - - static void appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i); -}; - - -/// Instead of typedef - to allow forward declaration. -class QueryLog : public SystemLog<QueryLogElement> -{ - using SystemLog<QueryLogElement>::SystemLog; -}; - -} + void appendToBlock(MutableColumns & columns) const; + + static void appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i); +}; + + +/// Instead of typedef - to allow forward declaration. +class QueryLog : public SystemLog<QueryLogElement> +{ + using SystemLog<QueryLogElement>::SystemLog; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp index 0c9a6ab316..7ca3c10045 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp @@ -1,27 +1,27 @@ -#include "QueryThreadLog.h" +#include "QueryThreadLog.h" #include <array> #include <Columns/ColumnFixedString.h> #include <Columns/ColumnString.h> -#include <Columns/ColumnsNumber.h> +#include <Columns/ColumnsNumber.h> #include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeMap.h> -#include <DataTypes/DataTypeDateTime.h> +#include <DataTypes/DataTypeDateTime.h> #include <DataTypes/DataTypeDateTime64.h> #include <DataTypes/DataTypeFactory.h> -#include <DataTypes/DataTypeString.h> +#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypesNumber.h> #include <Interpreters/ProfileEventsExt.h> -#include <Interpreters/QueryLog.h> +#include <Interpreters/QueryLog.h> #include <Poco/Net/IPAddress.h> -#include <Common/ClickHouseRevision.h> - - -namespace DB -{ +#include <Common/ClickHouseRevision.h> + + +namespace DB +{ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes() -{ +{ return { {"event_date", std::make_shared<DataTypeDate>()}, {"event_time", std::make_shared<DataTypeDateTime>()}, @@ -29,21 +29,21 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes() {"query_start_time", std::make_shared<DataTypeDateTime>()}, {"query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)}, {"query_duration_ms", std::make_shared<DataTypeUInt64>()}, - + {"read_rows", std::make_shared<DataTypeUInt64>()}, {"read_bytes", std::make_shared<DataTypeUInt64>()}, {"written_rows", std::make_shared<DataTypeUInt64>()}, {"written_bytes", std::make_shared<DataTypeUInt64>()}, {"memory_usage", std::make_shared<DataTypeInt64>()}, {"peak_memory_usage", std::make_shared<DataTypeInt64>()}, - + {"thread_name", std::make_shared<DataTypeString>()}, {"thread_id", std::make_shared<DataTypeUInt64>()}, {"master_thread_id", std::make_shared<DataTypeUInt64>()}, {"current_database", std::make_shared<DataTypeString>()}, {"query", std::make_shared<DataTypeString>()}, {"normalized_query_hash", std::make_shared<DataTypeUInt64>()}, - + {"is_initial_query", std::make_shared<DataTypeUInt8>()}, {"user", std::make_shared<DataTypeString>()}, {"query_id", std::make_shared<DataTypeString>()}, @@ -68,13 +68,13 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes() {"http_referer", std::make_shared<DataTypeString>()}, {"forwarded_for", std::make_shared<DataTypeString>()}, {"quota_key", std::make_shared<DataTypeString>()}, - + {"revision", std::make_shared<DataTypeUInt32>()}, - + {"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())}, - }; -} - + }; +} + NamesAndAliases QueryThreadLogElement::getNamesAndAliases() { return @@ -84,46 +84,46 @@ NamesAndAliases QueryThreadLogElement::getNamesAndAliases() }; } -void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const -{ - size_t i = 0; - +void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const +{ + size_t i = 0; + columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); - columns[i++]->insert(event_time); + columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); - columns[i++]->insert(query_start_time); + columns[i++]->insert(query_start_time); columns[i++]->insert(query_start_time_microseconds); - columns[i++]->insert(query_duration_ms); - - columns[i++]->insert(read_rows); - columns[i++]->insert(read_bytes); - columns[i++]->insert(written_rows); - columns[i++]->insert(written_bytes); - - columns[i++]->insert(memory_usage); - columns[i++]->insert(peak_memory_usage); - - columns[i++]->insertData(thread_name.data(), thread_name.size()); - columns[i++]->insert(thread_id); - columns[i++]->insert(master_thread_id); - + columns[i++]->insert(query_duration_ms); + + columns[i++]->insert(read_rows); + columns[i++]->insert(read_bytes); + columns[i++]->insert(written_rows); + columns[i++]->insert(written_bytes); + + columns[i++]->insert(memory_usage); + columns[i++]->insert(peak_memory_usage); + + columns[i++]->insertData(thread_name.data(), thread_name.size()); + columns[i++]->insert(thread_id); + columns[i++]->insert(master_thread_id); + columns[i++]->insertData(current_database.data(), current_database.size()); - columns[i++]->insertData(query.data(), query.size()); + columns[i++]->insertData(query.data(), query.size()); columns[i++]->insert(normalized_query_hash); - - QueryLogElement::appendClientInfo(client_info, columns, i); - + + QueryLogElement::appendClientInfo(client_info, columns, i); + columns[i++]->insert(ClickHouseRevision::getVersionRevision()); - - if (profile_counters) - { + + if (profile_counters) + { auto * column = columns[i++].get(); ProfileEvents::dumpToMapColumn(*profile_counters, column, true); - } - else - { - columns[i++]->insertDefault(); - } -} - -} + } + else + { + columns[i++]->insertDefault(); + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h index a2585d7814..57e93edbaf 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h @@ -1,66 +1,66 @@ -#pragma once - -#include <Interpreters/SystemLog.h> -#include <Interpreters/ClientInfo.h> - - -namespace ProfileEvents -{ - class Counters; -} - - -namespace DB -{ - -struct QueryThreadLogElement -{ - time_t event_time{}; +#pragma once + +#include <Interpreters/SystemLog.h> +#include <Interpreters/ClientInfo.h> + + +namespace ProfileEvents +{ + class Counters; +} + + +namespace DB +{ + +struct QueryThreadLogElement +{ + time_t event_time{}; Decimal64 event_time_microseconds{}; - /// When query was attached to current thread - time_t query_start_time{}; + /// When query was attached to current thread + time_t query_start_time{}; /// same as above but adds microsecond precision Decimal64 query_start_time_microseconds{}; - /// Real time spent by the thread to execute the query - UInt64 query_duration_ms{}; - - /// The data fetched from DB in current thread to execute the query - UInt64 read_rows{}; - UInt64 read_bytes{}; - - /// The data written to DB - UInt64 written_rows{}; - UInt64 written_bytes{}; - - Int64 memory_usage{}; - Int64 peak_memory_usage{}; - - String thread_name; - UInt64 thread_id{}; - UInt64 master_thread_id{}; - + /// Real time spent by the thread to execute the query + UInt64 query_duration_ms{}; + + /// The data fetched from DB in current thread to execute the query + UInt64 read_rows{}; + UInt64 read_bytes{}; + + /// The data written to DB + UInt64 written_rows{}; + UInt64 written_bytes{}; + + Int64 memory_usage{}; + Int64 peak_memory_usage{}; + + String thread_name; + UInt64 thread_id{}; + UInt64 master_thread_id{}; + String current_database; - String query; + String query; UInt64 normalized_query_hash{}; - ClientInfo client_info; - - std::shared_ptr<ProfileEvents::Counters> profile_counters; - - static std::string name() { return "QueryThreadLog"; } - + ClientInfo client_info; + + std::shared_ptr<ProfileEvents::Counters> profile_counters; + + static std::string name() { return "QueryThreadLog"; } + static NamesAndTypesList getNamesAndTypes(); static NamesAndAliases getNamesAndAliases(); - void appendToBlock(MutableColumns & columns) const; -}; - - -class QueryThreadLog : public SystemLog<QueryThreadLogElement> -{ - using SystemLog<QueryThreadLogElement>::SystemLog; -}; - - -} - - + void appendToBlock(MutableColumns & columns) const; +}; + + +class QueryThreadLog : public SystemLog<QueryThreadLogElement> +{ + using SystemLog<QueryThreadLogElement>::SystemLog; +}; + + +} + + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h index a5173c872d..709ecdc239 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h @@ -1,38 +1,38 @@ -#pragma once - -#include <Core/QueryProcessingStage.h> +#pragma once + +#include <Core/QueryProcessingStage.h> #include <optional> - -namespace DB -{ - -/** - * to_stage - * - the stage to which the query is to be executed. By default - till to the end. - * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing. - * - * subquery_depth - * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed; - * for INSERT SELECT, a value 1 is passed instead of 0. - * - * only_analyze - * - the object was created only for query analysis. - * - * is_subquery - * - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results. + +namespace DB +{ + +/** + * to_stage + * - the stage to which the query is to be executed. By default - till to the end. + * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing. + * + * subquery_depth + * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed; + * for INSERT SELECT, a value 1 is passed instead of 0. + * + * only_analyze + * - the object was created only for query analysis. + * + * is_subquery + * - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results. * * is_internal * - the object was created only for internal queries. - */ -struct SelectQueryOptions -{ - QueryProcessingStage::Enum to_stage; - size_t subquery_depth; - bool only_analyze = false; - bool modify_inplace = false; - bool remove_duplicates = false; - bool ignore_quota = false; - bool ignore_limits = false; + */ +struct SelectQueryOptions +{ + QueryProcessingStage::Enum to_stage; + size_t subquery_depth; + bool only_analyze = false; + bool modify_inplace = false; + bool remove_duplicates = false; + bool ignore_quota = false; + bool ignore_limits = false; /// This flag is needed to analyze query ignoring table projections. /// It is needed because we build another one InterpreterSelectQuery while analyzing projections. /// It helps to avoid infinite recursion. @@ -45,7 +45,7 @@ struct SelectQueryOptions bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select bool with_all_cols = false; /// asterisk include materialized and aliased columns - + /// These two fields are used to evaluate shardNum() and shardCount() function when /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local /// instance might have multiple shards and scalars can only hold one value. @@ -58,49 +58,49 @@ struct SelectQueryOptions bool is_subquery_ = false) : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_) {} - - SelectQueryOptions copy() const { return *this; } - - SelectQueryOptions subquery() const - { - SelectQueryOptions out = *this; - out.to_stage = QueryProcessingStage::Complete; - ++out.subquery_depth; + + SelectQueryOptions copy() const { return *this; } + + SelectQueryOptions subquery() const + { + SelectQueryOptions out = *this; + out.to_stage = QueryProcessingStage::Complete; + ++out.subquery_depth; out.is_subquery = true; - return out; - } - - SelectQueryOptions & analyze(bool dry_run = true) - { - only_analyze = dry_run; - return *this; - } - - SelectQueryOptions & modify(bool value = true) - { - modify_inplace = value; - return *this; - } - - SelectQueryOptions & noModify() { return modify(false); } - - SelectQueryOptions & removeDuplicates(bool value = true) - { - remove_duplicates = value; - return *this; - } - - SelectQueryOptions & noSubquery() - { - subquery_depth = 0; - return *this; - } - - SelectQueryOptions & ignoreLimits(bool value = true) - { - ignore_limits = value; - return *this; - } + return out; + } + + SelectQueryOptions & analyze(bool dry_run = true) + { + only_analyze = dry_run; + return *this; + } + + SelectQueryOptions & modify(bool value = true) + { + modify_inplace = value; + return *this; + } + + SelectQueryOptions & noModify() { return modify(false); } + + SelectQueryOptions & removeDuplicates(bool value = true) + { + remove_duplicates = value; + return *this; + } + + SelectQueryOptions & noSubquery() + { + subquery_depth = 0; + return *this; + } + + SelectQueryOptions & ignoreLimits(bool value = true) + { + ignore_limits = value; + return *this; + } SelectQueryOptions & ignoreProjections(bool value = true) { @@ -138,6 +138,6 @@ struct SelectQueryOptions shard_count = shard_count_; return *this; } -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp index 0b3d63100a..bfb969cab2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp @@ -1,84 +1,84 @@ -#include <Interpreters/StorageID.h> -#include <Parsers/ASTQueryWithTableAndOutput.h> -#include <Parsers/ASTIdentifier.h> -#include <Common/quoteString.h> -#include <IO/WriteHelpers.h> +#include <Interpreters/StorageID.h> +#include <Parsers/ASTQueryWithTableAndOutput.h> +#include <Parsers/ASTIdentifier.h> +#include <Common/quoteString.h> +#include <IO/WriteHelpers.h> #include <IO/ReadHelpers.h> -#include <Interpreters/DatabaseAndTableWithAlias.h> +#include <Interpreters/DatabaseAndTableWithAlias.h> #include <Poco/Util/AbstractConfiguration.h> - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int UNKNOWN_DATABASE; -} - -StorageID::StorageID(const ASTQueryWithTableAndOutput & query) -{ - database_name = query.database; - table_name = query.table; - uuid = query.uuid; - assertNotEmpty(); -} - + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_DATABASE; +} + +StorageID::StorageID(const ASTQueryWithTableAndOutput & query) +{ + database_name = query.database; + table_name = query.table; + uuid = query.uuid; + assertNotEmpty(); +} + StorageID::StorageID(const ASTTableIdentifier & table_identifier_node) -{ - DatabaseAndTableWithAlias database_table(table_identifier_node); - database_name = database_table.database; - table_name = database_table.table; - uuid = database_table.uuid; - assertNotEmpty(); -} - -StorageID::StorageID(const ASTPtr & node) -{ +{ + DatabaseAndTableWithAlias database_table(table_identifier_node); + database_name = database_table.database; + table_name = database_table.table; + uuid = database_table.uuid; + assertNotEmpty(); +} + +StorageID::StorageID(const ASTPtr & node) +{ if (const auto * identifier = node->as<ASTTableIdentifier>()) - *this = StorageID(*identifier); - else if (const auto * simple_query = dynamic_cast<const ASTQueryWithTableAndOutput *>(node.get())) - *this = StorageID(*simple_query); - else - throw Exception("Unexpected AST", ErrorCodes::LOGICAL_ERROR); -} - -String StorageID::getTableName() const -{ - assertNotEmpty(); - return table_name; -} - -String StorageID::getDatabaseName() const -{ - assertNotEmpty(); - if (database_name.empty()) - throw Exception("Database name is empty", ErrorCodes::UNKNOWN_DATABASE); - return database_name; -} - -String StorageID::getNameForLogs() const -{ - assertNotEmpty(); - return (database_name.empty() ? "" : backQuoteIfNeed(database_name) + ".") + backQuoteIfNeed(table_name) + *this = StorageID(*identifier); + else if (const auto * simple_query = dynamic_cast<const ASTQueryWithTableAndOutput *>(node.get())) + *this = StorageID(*simple_query); + else + throw Exception("Unexpected AST", ErrorCodes::LOGICAL_ERROR); +} + +String StorageID::getTableName() const +{ + assertNotEmpty(); + return table_name; +} + +String StorageID::getDatabaseName() const +{ + assertNotEmpty(); + if (database_name.empty()) + throw Exception("Database name is empty", ErrorCodes::UNKNOWN_DATABASE); + return database_name; +} + +String StorageID::getNameForLogs() const +{ + assertNotEmpty(); + return (database_name.empty() ? "" : backQuoteIfNeed(database_name) + ".") + backQuoteIfNeed(table_name) + (hasUUID() ? " (" + toString(uuid) + ")" : ""); -} - -bool StorageID::operator<(const StorageID & rhs) const -{ - assertNotEmpty(); - /// It's needed for ViewDependencies - if (!hasUUID() && !rhs.hasUUID()) - /// If both IDs don't have UUID, compare them like pair of strings - return std::tie(database_name, table_name) < std::tie(rhs.database_name, rhs.table_name); - else if (hasUUID() && rhs.hasUUID()) - /// If both IDs have UUID, compare UUIDs and ignore database and table name - return uuid < rhs.uuid; - else - /// All IDs without UUID are less, then all IDs with UUID - return !hasUUID(); -} - +} + +bool StorageID::operator<(const StorageID & rhs) const +{ + assertNotEmpty(); + /// It's needed for ViewDependencies + if (!hasUUID() && !rhs.hasUUID()) + /// If both IDs don't have UUID, compare them like pair of strings + return std::tie(database_name, table_name) < std::tie(rhs.database_name, rhs.table_name); + else if (hasUUID() && rhs.hasUUID()) + /// If both IDs have UUID, compare UUIDs and ignore database and table name + return uuid < rhs.uuid; + else + /// All IDs without UUID are less, then all IDs with UUID + return !hasUUID(); +} + bool StorageID::operator==(const StorageID & rhs) const { assertNotEmpty(); @@ -88,15 +88,15 @@ bool StorageID::operator==(const StorageID & rhs) const return std::tie(database_name, table_name) == std::tie(rhs.database_name, rhs.table_name); } -String StorageID::getFullTableName() const -{ - return backQuoteIfNeed(getDatabaseName()) + "." + backQuoteIfNeed(table_name); -} - +String StorageID::getFullTableName() const +{ + return backQuoteIfNeed(getDatabaseName()) + "." + backQuoteIfNeed(table_name); +} + String StorageID::getFullNameNotQuoted() const { return getDatabaseName() + "." + table_name; -} +} StorageID StorageID::fromDictionaryConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h index 57ac1d94fb..974f5bd3e5 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h @@ -1,37 +1,37 @@ -#pragma once - -#include <Core/Block.h> -#include <Storages/IStorage_fwd.h> - - -namespace DB -{ - +#pragma once + +#include <Core/Block.h> +#include <Storages/IStorage_fwd.h> + + +namespace DB +{ + class QueryPlan; - + class Set; using SetPtr = std::shared_ptr<Set>; -/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. -struct SubqueryForSet -{ +/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. +struct SubqueryForSet +{ SubqueryForSet(); ~SubqueryForSet(); SubqueryForSet(SubqueryForSet &&); SubqueryForSet & operator= (SubqueryForSet &&); - /// The source is obtained using the InterpreterSelectQuery subquery. + /// The source is obtained using the InterpreterSelectQuery subquery. std::unique_ptr<QueryPlan> source; - - /// If set, build it from result. - SetPtr set; - - /// If set, put the result into the table. - /// This is a temporary table for transferring to remote servers for distributed query processing. - StoragePtr table; -}; - -/// ID of subquery -> what to do with it. -using SubqueriesForSets = std::unordered_map<String, SubqueryForSet>; - -} + + /// If set, build it from result. + SetPtr set; + + /// If set, put the result into the table. + /// This is a temporary table for transferring to remote servers for distributed query processing. + StoragePtr table; +}; + +/// ID of subquery -> what to do with it. +using SubqueriesForSets = std::unordered_map<String, SubqueryForSet>; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp index cbeb8a9407..64851f1cb2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp @@ -1,115 +1,115 @@ -#include <Interpreters/TablesStatus.h> -#include <IO/ReadBuffer.h> -#include <IO/WriteBuffer.h> -#include <IO/ReadHelpers.h> -#include <IO/WriteHelpers.h> - -namespace DB -{ -namespace ErrorCodes -{ - extern const int TOO_LARGE_ARRAY_SIZE; - extern const int LOGICAL_ERROR; -} - -void TableStatus::write(WriteBuffer & out) const -{ - writeBinary(is_replicated, out); - if (is_replicated) - { - writeVarUInt(absolute_delay, out); - } -} - -void TableStatus::read(ReadBuffer & in) -{ - absolute_delay = 0; - readBinary(is_replicated, in); - if (is_replicated) - { - readVarUInt(absolute_delay, in); - } -} - -void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const -{ - if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "Logical error: method TablesStatusRequest::write is called for unsupported server revision", - ErrorCodes::LOGICAL_ERROR); - - writeVarUInt(tables.size(), out); - for (const auto & table_name : tables) - { - writeBinary(table_name.database, out); - writeBinary(table_name.table, out); - } -} - -void TablesStatusRequest::read(ReadBuffer & in, UInt64 client_protocol_revision) -{ - if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusRequest::read is called for unsupported client revision", - ErrorCodes::LOGICAL_ERROR); - - size_t size = 0; - readVarUInt(size, in); - - if (size > DEFAULT_MAX_STRING_SIZE) - throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE); - - for (size_t i = 0; i < size; ++i) - { - QualifiedTableName table_name; - readBinary(table_name.database, in); - readBinary(table_name.table, in); - tables.emplace(std::move(table_name)); - } -} - -void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revision) const -{ - if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusResponse::write is called for unsupported client revision", - ErrorCodes::LOGICAL_ERROR); - - writeVarUInt(table_states_by_id.size(), out); - for (const auto & kv: table_states_by_id) - { - const QualifiedTableName & table_name = kv.first; - writeBinary(table_name.database, out); - writeBinary(table_name.table, out); - - const TableStatus & status = kv.second; - status.write(out); - } -} - -void TablesStatusResponse::read(ReadBuffer & in, UInt64 server_protocol_revision) -{ - if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusResponse::read is called for unsupported server revision", - ErrorCodes::LOGICAL_ERROR); - - size_t size = 0; - readVarUInt(size, in); - - if (size > DEFAULT_MAX_STRING_SIZE) - throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE); - - for (size_t i = 0; i < size; ++i) - { - QualifiedTableName table_name; - readBinary(table_name.database, in); - readBinary(table_name.table, in); - - TableStatus status; - status.read(in); - table_states_by_id.emplace(std::move(table_name), std::move(status)); - } -} - -} +#include <Interpreters/TablesStatus.h> +#include <IO/ReadBuffer.h> +#include <IO/WriteBuffer.h> +#include <IO/ReadHelpers.h> +#include <IO/WriteHelpers.h> + +namespace DB +{ +namespace ErrorCodes +{ + extern const int TOO_LARGE_ARRAY_SIZE; + extern const int LOGICAL_ERROR; +} + +void TableStatus::write(WriteBuffer & out) const +{ + writeBinary(is_replicated, out); + if (is_replicated) + { + writeVarUInt(absolute_delay, out); + } +} + +void TableStatus::read(ReadBuffer & in) +{ + absolute_delay = 0; + readBinary(is_replicated, in); + if (is_replicated) + { + readVarUInt(absolute_delay, in); + } +} + +void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const +{ + if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) + throw Exception( + "Logical error: method TablesStatusRequest::write is called for unsupported server revision", + ErrorCodes::LOGICAL_ERROR); + + writeVarUInt(tables.size(), out); + for (const auto & table_name : tables) + { + writeBinary(table_name.database, out); + writeBinary(table_name.table, out); + } +} + +void TablesStatusRequest::read(ReadBuffer & in, UInt64 client_protocol_revision) +{ + if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) + throw Exception( + "method TablesStatusRequest::read is called for unsupported client revision", + ErrorCodes::LOGICAL_ERROR); + + size_t size = 0; + readVarUInt(size, in); + + if (size > DEFAULT_MAX_STRING_SIZE) + throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE); + + for (size_t i = 0; i < size; ++i) + { + QualifiedTableName table_name; + readBinary(table_name.database, in); + readBinary(table_name.table, in); + tables.emplace(std::move(table_name)); + } +} + +void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revision) const +{ + if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) + throw Exception( + "method TablesStatusResponse::write is called for unsupported client revision", + ErrorCodes::LOGICAL_ERROR); + + writeVarUInt(table_states_by_id.size(), out); + for (const auto & kv: table_states_by_id) + { + const QualifiedTableName & table_name = kv.first; + writeBinary(table_name.database, out); + writeBinary(table_name.table, out); + + const TableStatus & status = kv.second; + status.write(out); + } +} + +void TablesStatusResponse::read(ReadBuffer & in, UInt64 server_protocol_revision) +{ + if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) + throw Exception( + "method TablesStatusResponse::read is called for unsupported server revision", + ErrorCodes::LOGICAL_ERROR); + + size_t size = 0; + readVarUInt(size, in); + + if (size > DEFAULT_MAX_STRING_SIZE) + throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE); + + for (size_t i = 0; i < size; ++i) + { + QualifiedTableName table_name; + readBinary(table_name.database, in); + readBinary(table_name.table, in); + + TableStatus status; + status.read(in); + table_states_by_id.emplace(std::move(table_name), std::move(status)); + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h index 4e1c0d185a..85290f69c8 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h @@ -1,52 +1,52 @@ -#pragma once - -#include <unordered_set> -#include <unordered_map> - +#pragma once + +#include <unordered_set> +#include <unordered_map> + #include <common/types.h> -#include <Core/QualifiedTableName.h> - -namespace DB -{ - -namespace ErrorCodes -{ -} - -class ReadBuffer; -class WriteBuffer; - - -/// The following are request-response messages for TablesStatus request of the client-server protocol. -/// Client can ask for about a set of tables and the server will respond with the following information for each table: -/// - Is the table Replicated? -/// - If yes, replication delay for that table. -/// -/// For nonexistent tables there will be no TableStatus entry in the response. - -struct TableStatus -{ - bool is_replicated = false; - UInt32 absolute_delay = 0; - - void write(WriteBuffer & out) const; - void read(ReadBuffer & in); -}; - -struct TablesStatusRequest -{ - std::unordered_set<QualifiedTableName> tables; - - void write(WriteBuffer & out, UInt64 server_protocol_revision) const; - void read(ReadBuffer & in, UInt64 client_protocol_revision); -}; - -struct TablesStatusResponse -{ - std::unordered_map<QualifiedTableName, TableStatus> table_states_by_id; - - void write(WriteBuffer & out, UInt64 client_protocol_revision) const; - void read(ReadBuffer & in, UInt64 server_protocol_revision); -}; - -} +#include <Core/QualifiedTableName.h> + +namespace DB +{ + +namespace ErrorCodes +{ +} + +class ReadBuffer; +class WriteBuffer; + + +/// The following are request-response messages for TablesStatus request of the client-server protocol. +/// Client can ask for about a set of tables and the server will respond with the following information for each table: +/// - Is the table Replicated? +/// - If yes, replication delay for that table. +/// +/// For nonexistent tables there will be no TableStatus entry in the response. + +struct TableStatus +{ + bool is_replicated = false; + UInt32 absolute_delay = 0; + + void write(WriteBuffer & out) const; + void read(ReadBuffer & in); +}; + +struct TablesStatusRequest +{ + std::unordered_set<QualifiedTableName> tables; + + void write(WriteBuffer & out, UInt64 server_protocol_revision) const; + void read(ReadBuffer & in, UInt64 client_protocol_revision); +}; + +struct TablesStatusResponse +{ + std::unordered_map<QualifiedTableName, TableStatus> table_states_by_id; + + void write(WriteBuffer & out, UInt64 client_protocol_revision) const; + void read(ReadBuffer & in, UInt64 server_protocol_revision); +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp index c42b8572f4..52f9c6b6fb 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp @@ -1,28 +1,28 @@ -#include <Common/ThreadStatus.h> - +#include <Common/ThreadStatus.h> + #include <DataStreams/PushingToViewsBlockOutputStream.h> -#include <Interpreters/Context.h> +#include <Interpreters/Context.h> #include <Interpreters/OpenTelemetrySpanLog.h> #include <Interpreters/ProcessList.h> -#include <Interpreters/QueryThreadLog.h> +#include <Interpreters/QueryThreadLog.h> #include <Interpreters/QueryViewsLog.h> #include <Parsers/formatAST.h> -#include <Common/CurrentThread.h> -#include <Common/Exception.h> +#include <Common/CurrentThread.h> +#include <Common/Exception.h> #include <Common/ProfileEvents.h> -#include <Common/QueryProfiler.h> +#include <Common/QueryProfiler.h> #include <Common/SensitiveDataMasker.h> -#include <Common/ThreadProfileEvents.h> -#include <Common/TraceCollector.h> -#include <common/errnoToString.h> - -#if defined(OS_LINUX) -# include <Common/hasLinuxCapability.h> - -# include <sys/time.h> -# include <sys/resource.h> -#endif - +#include <Common/ThreadProfileEvents.h> +#include <Common/TraceCollector.h> +#include <common/errnoToString.h> + +#if defined(OS_LINUX) +# include <Common/hasLinuxCapability.h> + +# include <sys/time.h> +# include <sys/resource.h> +#endif + namespace ProfileEvents { extern const Event SelectedRows; @@ -30,26 +30,26 @@ extern const Event SelectedBytes; extern const Event InsertedRows; extern const Event InsertedBytes; } - - -/// Implement some methods of ThreadStatus and CurrentThread here to avoid extra linking dependencies in clickhouse_common_io -/// TODO It doesn't make sense. - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int CANNOT_SET_THREAD_PRIORITY; -} - + + +/// Implement some methods of ThreadStatus and CurrentThread here to avoid extra linking dependencies in clickhouse_common_io +/// TODO It doesn't make sense. + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int CANNOT_SET_THREAD_PRIORITY; +} + void ThreadStatus::applyQuerySettings() { auto query_context_ptr = query_context.lock(); assert(query_context_ptr); const Settings & settings = query_context_ptr->getSettingsRef(); - + query_id = query_context_ptr->getCurrentQueryId(); initQueryProfiler(); @@ -74,21 +74,21 @@ void ThreadStatus::applyQuerySettings() void ThreadStatus::attachQueryContext(ContextPtr query_context_) -{ +{ query_context = query_context_; if (global_context.expired()) global_context = query_context_->getGlobalContext(); - - if (thread_group) - { - std::lock_guard lock(thread_group->mutex); - - thread_group->query_context = query_context; + + if (thread_group) + { + std::lock_guard lock(thread_group->mutex); + + thread_group->query_context = query_context; if (thread_group->global_context.expired()) - thread_group->global_context = global_context; - } - + thread_group->global_context = global_context; + } + // Generate new span for thread manually here, because we can't depend // on OpenTelemetrySpanHolder due to link order issues. // FIXME why and how is this different from setupState()? @@ -99,43 +99,43 @@ void ThreadStatus::attachQueryContext(ContextPtr query_context_) } applyQuerySettings(); -} - -void CurrentThread::defaultThreadDeleter() -{ - if (unlikely(!current_thread)) - return; - current_thread->detachQuery(true, true); -} - -void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_) -{ - assertState({ThreadState::DetachedFromQuery}, __PRETTY_FUNCTION__); - - /// Attach or init current thread to thread group and copy useful information from it - thread_group = thread_group_; - - performance_counters.setParent(&thread_group->performance_counters); - memory_tracker.setParent(&thread_group->memory_tracker); - - { - std::lock_guard lock(thread_group->mutex); - - /// NOTE: thread may be attached multiple times if it is reused from a thread pool. - thread_group->thread_ids.emplace_back(thread_id); - - logs_queue_ptr = thread_group->logs_queue_ptr; +} + +void CurrentThread::defaultThreadDeleter() +{ + if (unlikely(!current_thread)) + return; + current_thread->detachQuery(true, true); +} + +void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_) +{ + assertState({ThreadState::DetachedFromQuery}, __PRETTY_FUNCTION__); + + /// Attach or init current thread to thread group and copy useful information from it + thread_group = thread_group_; + + performance_counters.setParent(&thread_group->performance_counters); + memory_tracker.setParent(&thread_group->memory_tracker); + + { + std::lock_guard lock(thread_group->mutex); + + /// NOTE: thread may be attached multiple times if it is reused from a thread pool. + thread_group->thread_ids.emplace_back(thread_id); + + logs_queue_ptr = thread_group->logs_queue_ptr; fatal_error_callback = thread_group->fatal_error_callback; - query_context = thread_group->query_context; - + query_context = thread_group->query_context; + if (global_context.expired()) - global_context = thread_group->global_context; - } - + global_context = thread_group->global_context; + } + if (auto query_context_ptr = query_context.lock()) { applyQuerySettings(); - + // Generate new span for thread manually here, because we can't depend // on OpenTelemetrySpanHolder due to link order issues. thread_trace_context = query_context_ptr->query_trace_context; @@ -149,35 +149,35 @@ void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_) thread_trace_context.trace_id = 0; } - initPerformanceCounters(); - - thread_state = ThreadState::AttachedToQuery; -} - -void ThreadStatus::initializeQuery() -{ - setupState(std::make_shared<ThreadGroupStatus>()); - - /// No need to lock on mutex here - thread_group->memory_tracker.setDescription("(for query)"); - thread_group->master_thread_id = thread_id; -} - -void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool check_detached) -{ - if (thread_state == ThreadState::AttachedToQuery) - { - if (check_detached) - throw Exception("Can't attach query to the thread, it is already attached", ErrorCodes::LOGICAL_ERROR); - return; - } - - if (!thread_group_) - throw Exception("Attempt to attach to nullptr thread group", ErrorCodes::LOGICAL_ERROR); - - setupState(thread_group_); -} - + initPerformanceCounters(); + + thread_state = ThreadState::AttachedToQuery; +} + +void ThreadStatus::initializeQuery() +{ + setupState(std::make_shared<ThreadGroupStatus>()); + + /// No need to lock on mutex here + thread_group->memory_tracker.setDescription("(for query)"); + thread_group->master_thread_id = thread_id; +} + +void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool check_detached) +{ + if (thread_state == ThreadState::AttachedToQuery) + { + if (check_detached) + throw Exception("Can't attach query to the thread, it is already attached", ErrorCodes::LOGICAL_ERROR); + return; + } + + if (!thread_group_) + throw Exception("Attempt to attach to nullptr thread group", ErrorCodes::LOGICAL_ERROR); + + setupState(thread_group_); +} + inline UInt64 time_in_nanoseconds(std::chrono::time_point<std::chrono::system_clock> timepoint) { return std::chrono::duration_cast<std::chrono::nanoseconds>(timepoint.time_since_epoch()).count(); @@ -194,16 +194,16 @@ inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count(); } -void ThreadStatus::initPerformanceCounters() -{ - performance_counters_finalized = false; - - /// Clear stats from previous query if a new query is started - /// TODO: make separate query_thread_performance_counters and thread_performance_counters - performance_counters.resetCounters(); - memory_tracker.resetCounters(); - memory_tracker.setDescription("(for thread)"); - +void ThreadStatus::initPerformanceCounters() +{ + performance_counters_finalized = false; + + /// Clear stats from previous query if a new query is started + /// TODO: make separate query_thread_performance_counters and thread_performance_counters + performance_counters.resetCounters(); + memory_tracker.resetCounters(); + memory_tracker.setDescription("(for thread)"); + // query_start_time_{microseconds, nanoseconds} are all constructed from the same time point // to ensure that they are all equal up to the precision of a second. const auto now = std::chrono::system_clock::now(); @@ -211,78 +211,78 @@ void ThreadStatus::initPerformanceCounters() query_start_time_nanoseconds = time_in_nanoseconds(now); query_start_time = time_in_seconds(now); query_start_time_microseconds = time_in_microseconds(now); - ++queries_started; - + ++queries_started; + // query_start_time_nanoseconds cannot be used here since RUsageCounters expect CLOCK_MONOTONIC *last_rusage = RUsageCounters::current(); - + if (auto query_context_ptr = query_context.lock()) - { + { const Settings & settings = query_context_ptr->getSettingsRef(); - if (settings.metrics_perf_events_enabled) - { - try - { - current_thread_counters.initializeProfileEvents( - settings.metrics_perf_events_list); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - - if (!taskstats) - { - try - { - taskstats = TasksStatsCounters::create(thread_id); - } - catch (...) - { - tryLogCurrentException(log); - } - } - if (taskstats) - taskstats->reset(); -} - -void ThreadStatus::finalizePerformanceCounters() -{ - if (performance_counters_finalized) - return; - - performance_counters_finalized = true; - updatePerformanceCounters(); - - // We want to close perf file descriptors if the perf events were enabled for - // one query. What this code does in practice is less clear -- e.g., if I run - // 'select 1 settings metrics_perf_events_enabled = 1', I still get - // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*. - bool close_perf_descriptors = true; + if (settings.metrics_perf_events_enabled) + { + try + { + current_thread_counters.initializeProfileEvents( + settings.metrics_perf_events_list); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + } + + if (!taskstats) + { + try + { + taskstats = TasksStatsCounters::create(thread_id); + } + catch (...) + { + tryLogCurrentException(log); + } + } + if (taskstats) + taskstats->reset(); +} + +void ThreadStatus::finalizePerformanceCounters() +{ + if (performance_counters_finalized) + return; + + performance_counters_finalized = true; + updatePerformanceCounters(); + + // We want to close perf file descriptors if the perf events were enabled for + // one query. What this code does in practice is less clear -- e.g., if I run + // 'select 1 settings metrics_perf_events_enabled = 1', I still get + // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*. + bool close_perf_descriptors = true; if (auto query_context_ptr = query_context.lock()) close_perf_descriptors = !query_context_ptr->getSettingsRef().metrics_perf_events_enabled; - - try - { - current_thread_counters.finalizeProfileEvents(performance_counters); - if (close_perf_descriptors) - current_thread_counters.closeEventDescriptors(); - } - catch (...) - { - tryLogCurrentException(log); - } - - try - { + + try + { + current_thread_counters.finalizeProfileEvents(performance_counters); + if (close_perf_descriptors) + current_thread_counters.closeEventDescriptors(); + } + catch (...) + { + tryLogCurrentException(log); + } + + try + { auto global_context_ptr = global_context.lock(); auto query_context_ptr = query_context.lock(); if (global_context_ptr && query_context_ptr) - { + { const auto & settings = query_context_ptr->getSettingsRef(); - if (settings.log_queries && settings.log_query_threads) + if (settings.log_queries && settings.log_query_threads) { const auto now = std::chrono::system_clock::now(); Int64 query_duration_ms = (time_in_microseconds(now) - query_start_time_microseconds) / 1000; @@ -292,14 +292,14 @@ void ThreadStatus::finalizePerformanceCounters() logToQueryThreadLog(*thread_log, query_context_ptr->getCurrentDatabase(), now); } } - } - } - catch (...) - { - tryLogCurrentException(log); - } -} - + } + } + catch (...) + { + tryLogCurrentException(log); + } +} + void ThreadStatus::resetPerformanceCountersLastUsage() { *last_rusage = RUsageCounters::current(); @@ -307,55 +307,55 @@ void ThreadStatus::resetPerformanceCountersLastUsage() taskstats->reset(); } -void ThreadStatus::initQueryProfiler() -{ +void ThreadStatus::initQueryProfiler() +{ if (!query_profiled_enabled) return; - /// query profilers are useless without trace collector + /// query profilers are useless without trace collector auto global_context_ptr = global_context.lock(); if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) - return; - + return; + auto query_context_ptr = query_context.lock(); assert(query_context_ptr); const auto & settings = query_context_ptr->getSettingsRef(); - - try - { - if (settings.query_profiler_real_time_period_ns > 0) - query_profiler_real = std::make_unique<QueryProfilerReal>(thread_id, - /* period */ static_cast<UInt32>(settings.query_profiler_real_time_period_ns)); - - if (settings.query_profiler_cpu_time_period_ns > 0) + + try + { + if (settings.query_profiler_real_time_period_ns > 0) + query_profiler_real = std::make_unique<QueryProfilerReal>(thread_id, + /* period */ static_cast<UInt32>(settings.query_profiler_real_time_period_ns)); + + if (settings.query_profiler_cpu_time_period_ns > 0) query_profiler_cpu = std::make_unique<QueryProfilerCPU>(thread_id, - /* period */ static_cast<UInt32>(settings.query_profiler_cpu_time_period_ns)); - } - catch (...) - { - /// QueryProfiler is optional. - tryLogCurrentException("ThreadStatus", "Cannot initialize QueryProfiler"); - } -} - -void ThreadStatus::finalizeQueryProfiler() -{ - query_profiler_real.reset(); - query_profiler_cpu.reset(); -} - -void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) -{ + /* period */ static_cast<UInt32>(settings.query_profiler_cpu_time_period_ns)); + } + catch (...) + { + /// QueryProfiler is optional. + tryLogCurrentException("ThreadStatus", "Cannot initialize QueryProfiler"); + } +} + +void ThreadStatus::finalizeQueryProfiler() +{ + query_profiler_real.reset(); + query_profiler_cpu.reset(); +} + +void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) +{ MemoryTracker::LockExceptionInThread lock(VariableContext::Global); - if (exit_if_already_detached && thread_state == ThreadState::DetachedFromQuery) - { - thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery; - return; - } - - assertState({ThreadState::AttachedToQuery}, __PRETTY_FUNCTION__); - + if (exit_if_already_detached && thread_state == ThreadState::DetachedFromQuery) + { + thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery; + return; + } + + assertState({ThreadState::AttachedToQuery}, __PRETTY_FUNCTION__); + std::shared_ptr<OpenTelemetrySpanLog> opentelemetry_span_log; auto query_context_ptr = query_context.lock(); if (thread_trace_context.trace_id != UUID() && query_context_ptr) @@ -393,41 +393,41 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) opentelemetry_span_log->add(span); } - finalizeQueryProfiler(); - finalizePerformanceCounters(); - - /// Detach from thread group - performance_counters.setParent(&ProfileEvents::global_counters); - memory_tracker.reset(); - + finalizeQueryProfiler(); + finalizePerformanceCounters(); + + /// Detach from thread group + performance_counters.setParent(&ProfileEvents::global_counters); + memory_tracker.reset(); + /// Must reset pointer to thread_group's memory_tracker, because it will be destroyed two lines below (will reset to its parent). memory_tracker.setParent(thread_group->memory_tracker.getParent()); - - query_id.clear(); + + query_id.clear(); query_context.reset(); thread_trace_context.trace_id = 0; thread_trace_context.span_id = 0; - thread_group.reset(); - - thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery; - -#if defined(__linux__) - if (os_thread_priority) - { - LOG_TRACE(log, "Resetting nice"); - - if (0 != setpriority(PRIO_PROCESS, thread_id, 0)) + thread_group.reset(); + + thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery; + +#if defined(__linux__) + if (os_thread_priority) + { + LOG_TRACE(log, "Resetting nice"); + + if (0 != setpriority(PRIO_PROCESS, thread_id, 0)) LOG_ERROR(log, "Cannot 'setpriority' back to zero: {}", errnoToString(ErrorCodes::CANNOT_SET_THREAD_PRIORITY, errno)); - - os_thread_priority = 0; - } -#endif -} - + + os_thread_priority = 0; + } +#endif +} + void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point<std::chrono::system_clock> now) -{ - QueryThreadLogElement elem; - +{ + QueryThreadLogElement elem; + // construct current_time and current_time_microseconds using the same time point // so that the two times will always be equal up to a precision of a second. auto current_time = time_in_seconds(now); @@ -435,49 +435,49 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String elem.event_time = current_time; elem.event_time_microseconds = current_time_microseconds; - elem.query_start_time = query_start_time; + elem.query_start_time = query_start_time; elem.query_start_time_microseconds = query_start_time_microseconds; elem.query_duration_ms = (time_in_nanoseconds(now) - query_start_time_nanoseconds) / 1000000U; - - elem.read_rows = progress_in.read_rows.load(std::memory_order_relaxed); - elem.read_bytes = progress_in.read_bytes.load(std::memory_order_relaxed); - - /// TODO: Use written_rows and written_bytes when run time progress is implemented - elem.written_rows = progress_out.read_rows.load(std::memory_order_relaxed); - elem.written_bytes = progress_out.read_bytes.load(std::memory_order_relaxed); - elem.memory_usage = memory_tracker.get(); - elem.peak_memory_usage = memory_tracker.getPeak(); - - elem.thread_name = getThreadName(); - elem.thread_id = thread_id; - + + elem.read_rows = progress_in.read_rows.load(std::memory_order_relaxed); + elem.read_bytes = progress_in.read_bytes.load(std::memory_order_relaxed); + + /// TODO: Use written_rows and written_bytes when run time progress is implemented + elem.written_rows = progress_out.read_rows.load(std::memory_order_relaxed); + elem.written_bytes = progress_out.read_bytes.load(std::memory_order_relaxed); + elem.memory_usage = memory_tracker.get(); + elem.peak_memory_usage = memory_tracker.getPeak(); + + elem.thread_name = getThreadName(); + elem.thread_id = thread_id; + elem.current_database = current_database; - if (thread_group) - { - { - std::lock_guard lock(thread_group->mutex); - - elem.master_thread_id = thread_group->master_thread_id; - elem.query = thread_group->query; + if (thread_group) + { + { + std::lock_guard lock(thread_group->mutex); + + elem.master_thread_id = thread_group->master_thread_id; + elem.query = thread_group->query; elem.normalized_query_hash = thread_group->normalized_query_hash; - } - } - + } + } + auto query_context_ptr = query_context.lock(); if (query_context_ptr) - { + { elem.client_info = query_context_ptr->getClientInfo(); - + if (query_context_ptr->getSettingsRef().log_profile_events != 0) - { - /// NOTE: Here we are in the same thread, so we can make memcpy() - elem.profile_counters = std::make_shared<ProfileEvents::Counters>(performance_counters.getPartiallyAtomicSnapshot()); - } - } - - thread_log.add(elem); -} - + { + /// NOTE: Here we are in the same thread, so we can make memcpy() + elem.profile_counters = std::make_shared<ProfileEvents::Counters>(performance_counters.getPartiallyAtomicSnapshot()); + } + } + + thread_log.add(elem); +} + static String getCleanQueryAst(const ASTPtr q, ContextPtr context) { String res = serializeAST(*q, true); @@ -536,90 +536,90 @@ void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo) views_log->add(element); } -void CurrentThread::initializeQuery() -{ - if (unlikely(!current_thread)) - return; - current_thread->initializeQuery(); - current_thread->deleter = CurrentThread::defaultThreadDeleter; -} - -void CurrentThread::attachTo(const ThreadGroupStatusPtr & thread_group) -{ - if (unlikely(!current_thread)) - return; - current_thread->attachQuery(thread_group, true); - current_thread->deleter = CurrentThread::defaultThreadDeleter; -} - -void CurrentThread::attachToIfDetached(const ThreadGroupStatusPtr & thread_group) -{ - if (unlikely(!current_thread)) - return; - current_thread->attachQuery(thread_group, false); - current_thread->deleter = CurrentThread::defaultThreadDeleter; -} - +void CurrentThread::initializeQuery() +{ + if (unlikely(!current_thread)) + return; + current_thread->initializeQuery(); + current_thread->deleter = CurrentThread::defaultThreadDeleter; +} + +void CurrentThread::attachTo(const ThreadGroupStatusPtr & thread_group) +{ + if (unlikely(!current_thread)) + return; + current_thread->attachQuery(thread_group, true); + current_thread->deleter = CurrentThread::defaultThreadDeleter; +} + +void CurrentThread::attachToIfDetached(const ThreadGroupStatusPtr & thread_group) +{ + if (unlikely(!current_thread)) + return; + current_thread->attachQuery(thread_group, false); + current_thread->deleter = CurrentThread::defaultThreadDeleter; +} + void CurrentThread::attachQueryContext(ContextPtr query_context) -{ - if (unlikely(!current_thread)) - return; - current_thread->attachQueryContext(query_context); -} - -void CurrentThread::finalizePerformanceCounters() -{ - if (unlikely(!current_thread)) - return; - current_thread->finalizePerformanceCounters(); -} - -void CurrentThread::detachQuery() -{ - if (unlikely(!current_thread)) - return; - current_thread->detachQuery(false); -} - -void CurrentThread::detachQueryIfNotDetached() -{ - if (unlikely(!current_thread)) - return; - current_thread->detachQuery(true); -} - - +{ + if (unlikely(!current_thread)) + return; + current_thread->attachQueryContext(query_context); +} + +void CurrentThread::finalizePerformanceCounters() +{ + if (unlikely(!current_thread)) + return; + current_thread->finalizePerformanceCounters(); +} + +void CurrentThread::detachQuery() +{ + if (unlikely(!current_thread)) + return; + current_thread->detachQuery(false); +} + +void CurrentThread::detachQueryIfNotDetached() +{ + if (unlikely(!current_thread)) + return; + current_thread->detachQuery(true); +} + + CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context) -{ - CurrentThread::initializeQuery(); - CurrentThread::attachQueryContext(query_context); +{ + CurrentThread::initializeQuery(); + CurrentThread::attachQueryContext(query_context); if (!query_context->hasQueryContext()) query_context->makeQueryContext(); -} - -void CurrentThread::QueryScope::logPeakMemoryUsage() -{ - auto group = CurrentThread::getGroup(); - if (!group) - return; - - log_peak_memory_usage_in_destructor = false; - group->memory_tracker.logPeakMemoryUsage(); -} - -CurrentThread::QueryScope::~QueryScope() -{ - try - { - if (log_peak_memory_usage_in_destructor) - logPeakMemoryUsage(); - - CurrentThread::detachQueryIfNotDetached(); - } - catch (...) - { - tryLogCurrentException("CurrentThread", __PRETTY_FUNCTION__); - } -} - -} +} + +void CurrentThread::QueryScope::logPeakMemoryUsage() +{ + auto group = CurrentThread::getGroup(); + if (!group) + return; + + log_peak_memory_usage_in_destructor = false; + group->memory_tracker.logPeakMemoryUsage(); +} + +CurrentThread::QueryScope::~QueryScope() +{ + try + { + if (log_peak_memory_usage_in_destructor) + logPeakMemoryUsage(); + + CurrentThread::detachQueryIfNotDetached(); + } + catch (...) + { + tryLogCurrentException("CurrentThread", __PRETTY_FUNCTION__); + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h index f3ad109c16..0dca00c285 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h @@ -1,63 +1,63 @@ -#pragma once - -#include <Core/Block.h> -#include <Core/NamesAndTypes.h> -#include <Interpreters/Aliases.h> +#pragma once + +#include <Core/Block.h> +#include <Core/NamesAndTypes.h> +#include <Interpreters/Aliases.h> #include <Interpreters/Context_fwd.h> #include <Interpreters/DatabaseAndTableWithAlias.h> -#include <Interpreters/SelectQueryOptions.h> -#include <Storages/IStorage_fwd.h> - -namespace DB -{ - -class ASTFunction; -struct ASTTablesInSelectQueryElement; -class TableJoin; -struct Settings; -struct SelectQueryOptions; -using Scalars = std::map<String, Block>; +#include <Interpreters/SelectQueryOptions.h> +#include <Storages/IStorage_fwd.h> + +namespace DB +{ + +class ASTFunction; +struct ASTTablesInSelectQueryElement; +class TableJoin; +struct Settings; +struct SelectQueryOptions; +using Scalars = std::map<String, Block>; struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>; - + struct TreeRewriterResult -{ - ConstStoragePtr storage; +{ + ConstStoragePtr storage; StorageMetadataPtr metadata_snapshot; - std::shared_ptr<TableJoin> analyzed_join; - const ASTTablesInSelectQueryElement * ast_join = nullptr; - - NamesAndTypesList source_columns; - NameSet source_columns_set; /// Set of names of source_columns. - /// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns. - NamesAndTypesList required_source_columns; + std::shared_ptr<TableJoin> analyzed_join; + const ASTTablesInSelectQueryElement * ast_join = nullptr; + + NamesAndTypesList source_columns; + NameSet source_columns_set; /// Set of names of source_columns. + /// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns. + NamesAndTypesList required_source_columns; /// Same as above but also record alias columns which are expanded. This is for RBAC access check. Names required_source_columns_before_expanding_alias_columns; - + /// Set of alias columns that are expanded to their alias expressions. We still need the original columns to check access permission. NameSet expanded_aliases; - Aliases aliases; - std::vector<const ASTFunction *> aggregates; - + Aliases aliases; + std::vector<const ASTFunction *> aggregates; + std::vector<const ASTFunction *> window_function_asts; - /// Which column is needed to be ARRAY-JOIN'ed to get the specified. - /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v". - NameToNameMap array_join_result_to_source; - - /// For the ARRAY JOIN section, mapping from the alias to the full column name. - /// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here. - /// Note: not used further. - NameToNameMap array_join_alias_to_name; - - /// The backward mapping for array_join_alias_to_name. - /// Note: not used further. - NameToNameMap array_join_name_to_alias; - - /// Predicate optimizer overrides the sub queries - bool rewrite_subqueries = false; - + /// Which column is needed to be ARRAY-JOIN'ed to get the specified. + /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v". + NameToNameMap array_join_result_to_source; + + /// For the ARRAY JOIN section, mapping from the alias to the full column name. + /// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here. + /// Note: not used further. + NameToNameMap array_join_alias_to_name; + + /// The backward mapping for array_join_alias_to_name. + /// Note: not used further. + NameToNameMap array_join_name_to_alias; + + /// Predicate optimizer overrides the sub queries + bool rewrite_subqueries = false; + /// Whether the query contains explicit columns like "SELECT column1 + column2 FROM table1". /// Queries like "SELECT count() FROM table1", "SELECT 1" don't contain explicit columns. bool has_explicit_columns = false; @@ -65,46 +65,46 @@ struct TreeRewriterResult /// Whether it's possible to use the trivial count optimization, /// i.e. use a fast call of IStorage::totalRows() (or IStorage::totalRowsByPartitionPredicate()) /// instead of actual retrieving columns and counting rows. - bool optimize_trivial_count = false; - + bool optimize_trivial_count = false; + /// Cache isRemote() call for storage, because it may be too heavy. bool is_remote_storage = false; - /// Results of scalar sub queries - Scalars scalars; - + /// Results of scalar sub queries + Scalars scalars; + TreeRewriterResult( const NamesAndTypesList & source_columns_, ConstStoragePtr storage_ = {}, const StorageMetadataPtr & metadata_snapshot_ = {}, bool add_special = true); - - void collectSourceColumns(bool add_special); - void collectUsedColumns(const ASTPtr & query, bool is_select); - Names requiredSourceColumns() const { return required_source_columns.getNames(); } + + void collectSourceColumns(bool add_special); + void collectUsedColumns(const ASTPtr & query, bool is_select); + Names requiredSourceColumns() const { return required_source_columns.getNames(); } const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; } NameSet getArrayJoinSourceNameSet() const; Names getExpandedAliases() const { return {expanded_aliases.begin(), expanded_aliases.end()}; } - const Scalars & getScalars() const { return scalars; } -}; - + const Scalars & getScalars() const { return scalars; } +}; + using TreeRewriterResultPtr = std::shared_ptr<const TreeRewriterResult>; - + /// Tree Rewriter in terms of CMU slides @sa https://15721.courses.cs.cmu.edu/spring2020/slides/19-optimizer1.pdf /// /// Optimises AST tree and collect information for further expression analysis in ExpressionAnalyzer. -/// Result AST has the following invariants: -/// * all aliases are substituted -/// * qualified names are translated -/// * scalar subqueries are executed replaced with constants -/// * unneeded columns are removed from SELECT clause -/// * duplicated columns are removed from ORDER BY, LIMIT BY, USING(...). +/// Result AST has the following invariants: +/// * all aliases are substituted +/// * qualified names are translated +/// * scalar subqueries are executed replaced with constants +/// * unneeded columns are removed from SELECT clause +/// * duplicated columns are removed from ORDER BY, LIMIT BY, USING(...). class TreeRewriter : WithContext -{ -public: +{ +public: explicit TreeRewriter(ContextPtr context_) : WithContext(context_) {} - - /// Analyze and rewrite not select query + + /// Analyze and rewrite not select query TreeRewriterResultPtr analyze( ASTPtr & query, const NamesAndTypesList & source_columns_, @@ -112,18 +112,18 @@ public: const StorageMetadataPtr & metadata_snapshot = {}, bool allow_aggregations = false, bool allow_self_aliases = true) const; - - /// Analyze and rewrite select query + + /// Analyze and rewrite select query TreeRewriterResultPtr analyzeSelect( - ASTPtr & query, + ASTPtr & query, TreeRewriterResult && result, - const SelectQueryOptions & select_options = {}, - const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns = {}, - const Names & required_result_columns = {}, - std::shared_ptr<TableJoin> table_join = {}) const; - -private: + const SelectQueryOptions & select_options = {}, + const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns = {}, + const Names & required_result_columns = {}, + std::shared_ptr<TableJoin> table_join = {}) const; + +private: static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases); -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h index 068f124d2f..ec2e1d3bd5 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h @@ -1,43 +1,43 @@ -#pragma once - +#pragma once + #include <Columns/ColumnsNumber.h> #include <Core/Block.h> -#include <Interpreters/IJoin.h> +#include <Interpreters/IJoin.h> #include <Interpreters/ActionsDAG.h> #include <Interpreters/ExpressionActions.h> - -namespace DB -{ - -struct ColumnWithTypeAndName; + +namespace DB +{ + +struct ColumnWithTypeAndName; class TableJoin; -class IColumn; -using ColumnRawPtrs = std::vector<const IColumn *>; +class IColumn; +using ColumnRawPtrs = std::vector<const IColumn *>; using UInt8ColumnDataPtr = const ColumnUInt8::Container *; - -namespace JoinCommon -{ + +namespace JoinCommon +{ bool canBecomeNullable(const DataTypePtr & type); DataTypePtr convertTypeToNullable(const DataTypePtr & type); void convertColumnToNullable(ColumnWithTypeAndName & column); -void convertColumnsToNullable(Block & block, size_t starting_pos = 0); -void removeColumnNullability(ColumnWithTypeAndName & column); +void convertColumnsToNullable(Block & block, size_t starting_pos = 0); +void removeColumnNullability(ColumnWithTypeAndName & column); void changeColumnRepresentation(const ColumnPtr & src_column, ColumnPtr & dst_column); ColumnPtr emptyNotNullableClone(const ColumnPtr & column); ColumnPtr materializeColumn(const Block & block, const String & name); -Columns materializeColumns(const Block & block, const Names & names); -ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); -ColumnRawPtrs getRawPointers(const Columns & columns); -void removeLowCardinalityInplace(Block & block); +Columns materializeColumns(const Block & block, const Names & names); +ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); +ColumnRawPtrs getRawPointers(const Columns & columns); +void removeLowCardinalityInplace(Block & block); void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type = true); void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys); - -ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right); - + +ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right); + /// Throw an exception if join condition column is not UIint8 void checkTypesOfMasks(const Block & block_left, const String & condition_name_left, const Block & block_right, const String & condition_name_right); - + /// Throw an exception if blocks have different types of key columns . Compare up to Nullability. void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right, const Names & key_names_right); @@ -46,9 +46,9 @@ void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const String & condition_name_left, const Block & block_right, const Names & key_names_right, const String & condition_name_right); -void createMissedColumns(Block & block); +void createMissedColumns(Block & block); void joinTotals(Block left_totals, Block right_totals, const TableJoin & table_join, Block & out_block); - + void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count); bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type); @@ -61,8 +61,8 @@ void splitAdditionalColumns(const Names & key_names, const Block & sample_block, void changeLowCardinalityInplace(ColumnWithTypeAndName & column); -} - +} + /// Creates result from right table data in RIGHT and FULL JOIN when keys are not present in left table. class NotJoinedBlocks final { @@ -118,4 +118,4 @@ private: void setRightIndex(size_t right_pos, size_t result_position); }; -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp index 8864103184..4b70f25f5c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp @@ -1,114 +1,114 @@ -#include <Parsers/ASTAlterQuery.h> +#include <Parsers/ASTAlterQuery.h> #include <IO/Operators.h> -#include <iomanip> -#include <Common/quoteString.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int UNEXPECTED_AST_STRUCTURE; -} - -ASTPtr ASTAlterCommand::clone() const -{ - auto res = std::make_shared<ASTAlterCommand>(*this); - res->children.clear(); - - if (col_decl) - { - res->col_decl = col_decl->clone(); - res->children.push_back(res->col_decl); - } - if (column) - { - res->column = column->clone(); - res->children.push_back(res->column); - } - if (order_by) - { - res->order_by = order_by->clone(); - res->children.push_back(res->order_by); - } - if (partition) - { - res->partition = partition->clone(); - res->children.push_back(res->partition); - } - if (predicate) - { - res->predicate = predicate->clone(); - res->children.push_back(res->predicate); - } - if (ttl) - { - res->ttl = ttl->clone(); - res->children.push_back(res->ttl); - } - if (settings_changes) - { - res->settings_changes = settings_changes->clone(); - res->children.push_back(res->settings_changes); - } +#include <iomanip> +#include <Common/quoteString.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNEXPECTED_AST_STRUCTURE; +} + +ASTPtr ASTAlterCommand::clone() const +{ + auto res = std::make_shared<ASTAlterCommand>(*this); + res->children.clear(); + + if (col_decl) + { + res->col_decl = col_decl->clone(); + res->children.push_back(res->col_decl); + } + if (column) + { + res->column = column->clone(); + res->children.push_back(res->column); + } + if (order_by) + { + res->order_by = order_by->clone(); + res->children.push_back(res->order_by); + } + if (partition) + { + res->partition = partition->clone(); + res->children.push_back(res->partition); + } + if (predicate) + { + res->predicate = predicate->clone(); + res->children.push_back(res->predicate); + } + if (ttl) + { + res->ttl = ttl->clone(); + res->children.push_back(res->ttl); + } + if (settings_changes) + { + res->settings_changes = settings_changes->clone(); + res->children.push_back(res->settings_changes); + } if (settings_resets) { res->settings_resets = settings_resets->clone(); res->children.push_back(res->settings_resets); } - if (values) - { - res->values = values->clone(); - res->children.push_back(res->values); - } - if (rename_to) - { - res->rename_to = rename_to->clone(); - res->children.push_back(res->rename_to); - } - - return res; -} - -void ASTAlterCommand::formatImpl( - const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const -{ - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - - if (type == ASTAlterCommand::ADD_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); - col_decl->formatImpl(settings, state, frame); - - if (first) - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); - else if (column) /// AFTER - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); - column->formatImpl(settings, state, frame); - } - } - else if (type == ASTAlterCommand::DROP_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str - << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); - column->formatImpl(settings, state, frame); - if (partition) - { + if (values) + { + res->values = values->clone(); + res->children.push_back(res->values); + } + if (rename_to) + { + res->rename_to = rename_to->clone(); + res->children.push_back(res->rename_to); + } + + return res; +} + +void ASTAlterCommand::formatImpl( + const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + if (type == ASTAlterCommand::ADD_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); + col_decl->formatImpl(settings, state, frame); + + if (first) + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); + else if (column) /// AFTER + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::DROP_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str + << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + if (partition) + { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - } - else if (type == ASTAlterCommand::MODIFY_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); - col_decl->formatImpl(settings, state, frame); - + partition->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::MODIFY_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + col_decl->formatImpl(settings, state, frame); + if (!remove_property.empty()) - { + { settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE " << remove_property; - } + } else { if (first) @@ -119,7 +119,7 @@ void ASTAlterCommand::formatImpl( column->formatImpl(settings, state, frame); } } - } + } else if (type == ASTAlterCommand::MATERIALIZE_COLUMN) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str @@ -131,69 +131,69 @@ void ASTAlterCommand::formatImpl( partition->formatImpl(settings, state, frame); } } - else if (type == ASTAlterCommand::COMMENT_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); - column->formatImpl(settings, state, frame); - settings.ostr << " " << (settings.hilite ? hilite_none : ""); - comment->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::MODIFY_ORDER_BY) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : ""); - order_by->formatImpl(settings, state, frame); - } + else if (type == ASTAlterCommand::COMMENT_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + settings.ostr << " " << (settings.hilite ? hilite_none : ""); + comment->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::MODIFY_ORDER_BY) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : ""); + order_by->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::MODIFY_SAMPLE_BY) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY SAMPLE BY " << (settings.hilite ? hilite_none : ""); sample_by->formatImpl(settings, state, frame); } - else if (type == ASTAlterCommand::ADD_INDEX) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); - index_decl->formatImpl(settings, state, frame); - + else if (type == ASTAlterCommand::ADD_INDEX) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); + index_decl->formatImpl(settings, state, frame); + if (first) settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); else if (index) /// AFTER - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); - index->formatImpl(settings, state, frame); - } - } - else if (type == ASTAlterCommand::DROP_INDEX) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str - << (clear_index ? "CLEAR " : "DROP ") << "INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); - index->formatImpl(settings, state, frame); - if (partition) - { + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + index->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::DROP_INDEX) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str + << (clear_index ? "CLEAR " : "DROP ") << "INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + index->formatImpl(settings, state, frame); + if (partition) + { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - } - else if (type == ASTAlterCommand::MATERIALIZE_INDEX) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str - << "MATERIALIZE INDEX " << (settings.hilite ? hilite_none : ""); - index->formatImpl(settings, state, frame); - if (partition) - { + partition->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::MATERIALIZE_INDEX) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str + << "MATERIALIZE INDEX " << (settings.hilite ? hilite_none : ""); + index->formatImpl(settings, state, frame); + if (partition) + { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - } - else if (type == ASTAlterCommand::ADD_CONSTRAINT) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD CONSTRAINT " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); - constraint_decl->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::DROP_CONSTRAINT) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str - << "DROP CONSTRAINT " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); - constraint->formatImpl(settings, state, frame); - } + partition->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::ADD_CONSTRAINT) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD CONSTRAINT " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); + constraint_decl->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::DROP_CONSTRAINT) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str + << "DROP CONSTRAINT " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + constraint->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::ADD_PROJECTION) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD PROJECTION " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); @@ -229,100 +229,100 @@ void ASTAlterCommand::formatImpl( partition->formatImpl(settings, state, frame); } } - else if (type == ASTAlterCommand::DROP_PARTITION) - { + else if (type == ASTAlterCommand::DROP_PARTITION) + { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (detach ? "DETACH" : "DROP") << (part ? " PART " : " PARTITION ") - << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::DROP_DETACHED_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP DETACHED" << (part ? " PART " : " PARTITION ") - << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::ATTACH_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH " - << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::MOVE_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MOVE " - << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - settings.ostr << " TO "; - switch (move_destination_type) - { - case DataDestinationType::DISK: - settings.ostr << "DISK "; - break; - case DataDestinationType::VOLUME: - settings.ostr << "VOLUME "; - break; - case DataDestinationType::TABLE: - settings.ostr << "TABLE "; - if (!to_database.empty()) - { - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(to_database) - << (settings.hilite ? hilite_none : "") << "."; - } - settings.ostr << (settings.hilite ? hilite_identifier : "") - << backQuoteIfNeed(to_table) - << (settings.hilite ? hilite_none : ""); - return; - default: - break; - } - if (move_destination_type != DataDestinationType::TABLE) - { - settings.ostr << quoteString(move_destination_name); - } - } - else if (type == ASTAlterCommand::REPLACE_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (replace ? "REPLACE" : "ATTACH") << " PARTITION " - << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); - if (!from_database.empty()) - { - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_database) - << (settings.hilite ? hilite_none : "") << "."; - } - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_table) << (settings.hilite ? hilite_none : ""); - } - else if (type == ASTAlterCommand::FETCH_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH " + << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::DROP_DETACHED_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP DETACHED" << (part ? " PART " : " PARTITION ") + << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::ATTACH_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH " + << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::MOVE_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MOVE " + << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + settings.ostr << " TO "; + switch (move_destination_type) + { + case DataDestinationType::DISK: + settings.ostr << "DISK "; + break; + case DataDestinationType::VOLUME: + settings.ostr << "VOLUME "; + break; + case DataDestinationType::TABLE: + settings.ostr << "TABLE "; + if (!to_database.empty()) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(to_database) + << (settings.hilite ? hilite_none : "") << "."; + } + settings.ostr << (settings.hilite ? hilite_identifier : "") + << backQuoteIfNeed(to_table) + << (settings.hilite ? hilite_none : ""); + return; + default: + break; + } + if (move_destination_type != DataDestinationType::TABLE) + { + settings.ostr << quoteString(move_destination_name); + } + } + else if (type == ASTAlterCommand::REPLACE_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (replace ? "REPLACE" : "ATTACH") << " PARTITION " + << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); + if (!from_database.empty()) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_database) + << (settings.hilite ? hilite_none : "") << "."; + } + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_table) << (settings.hilite ? hilite_none : ""); + } + else if (type == ASTAlterCommand::FETCH_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH " << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - settings.ostr << (settings.hilite ? hilite_keyword : "") + partition->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") << DB::quote << from; - } - else if (type == ASTAlterCommand::FREEZE_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - - if (!with_name.empty()) - { - settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "") + } + else if (type == ASTAlterCommand::FREEZE_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + + if (!with_name.empty()) + { + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "") << " " << DB::quote << with_name; - } - } - else if (type == ASTAlterCommand::FREEZE_ALL) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE"; - - if (!with_name.empty()) - { - settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "") + } + } + else if (type == ASTAlterCommand::FREEZE_ALL) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE"; + + if (!with_name.empty()) + { + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "") << " " << DB::quote << with_name; - } - } + } + } else if (type == ASTAlterCommand::UNFREEZE_PARTITION) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UNFREEZE PARTITION " << (settings.hilite ? hilite_none : ""); @@ -344,8 +344,8 @@ void ASTAlterCommand::formatImpl( << " " << DB::quote << with_name; } } - else if (type == ASTAlterCommand::DELETE) - { + else if (type == ASTAlterCommand::DELETE) + { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE" << (settings.hilite ? hilite_none : ""); if (partition) @@ -355,73 +355,73 @@ void ASTAlterCommand::formatImpl( } settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); - predicate->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::UPDATE) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UPDATE " << (settings.hilite ? hilite_none : ""); - update_assignments->formatImpl(settings, state, frame); - + predicate->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::UPDATE) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UPDATE " << (settings.hilite ? hilite_none : ""); + update_assignments->formatImpl(settings, state, frame); + if (partition) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : ""); partition->formatImpl(settings, state, frame); } - settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); - predicate->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::MODIFY_TTL) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : ""); - ttl->formatImpl(settings, state, frame); - } + settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); + predicate->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::MODIFY_TTL) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : ""); + ttl->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::REMOVE_TTL) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REMOVE TTL" << (settings.hilite ? hilite_none : ""); } - else if (type == ASTAlterCommand::MATERIALIZE_TTL) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MATERIALIZE TTL" - << (settings.hilite ? hilite_none : ""); - if (partition) - { + else if (type == ASTAlterCommand::MATERIALIZE_TTL) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MATERIALIZE TTL" + << (settings.hilite ? hilite_none : ""); + if (partition) + { settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - } - else if (type == ASTAlterCommand::MODIFY_SETTING) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY SETTING " << (settings.hilite ? hilite_none : ""); - settings_changes->formatImpl(settings, state, frame); - } + partition->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::MODIFY_SETTING) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY SETTING " << (settings.hilite ? hilite_none : ""); + settings_changes->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::RESET_SETTING) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "RESET SETTING " << (settings.hilite ? hilite_none : ""); settings_resets->formatImpl(settings, state, frame); } - else if (type == ASTAlterCommand::MODIFY_QUERY) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY QUERY " << settings.nl_or_ws << (settings.hilite ? hilite_none : ""); - select->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::LIVE_VIEW_REFRESH) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REFRESH " << (settings.hilite ? hilite_none : ""); - } - else if (type == ASTAlterCommand::RENAME_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "RENAME COLUMN " << (if_exists ? "IF EXISTS " : "") - << (settings.hilite ? hilite_none : ""); - column->formatImpl(settings, state, frame); - - settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO "; - rename_to->formatImpl(settings, state, frame); - } - else - throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); -} - + else if (type == ASTAlterCommand::MODIFY_QUERY) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY QUERY " << settings.nl_or_ws << (settings.hilite ? hilite_none : ""); + select->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::LIVE_VIEW_REFRESH) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REFRESH " << (settings.hilite ? hilite_none : ""); + } + else if (type == ASTAlterCommand::RENAME_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "RENAME COLUMN " << (if_exists ? "IF EXISTS " : "") + << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + + settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO "; + rename_to->formatImpl(settings, state, frame); + } + else + throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); +} + bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const { if (command_list) @@ -438,7 +438,7 @@ bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) con } return false; } - + bool ASTAlterQuery::isSettingsAlter() const { return isOneCommandTypeOnly(ASTAlterCommand::MODIFY_SETTING); @@ -450,50 +450,50 @@ bool ASTAlterQuery::isFreezeAlter() const || isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_ALL); } -/** Get the text that identifies this element. */ -String ASTAlterQuery::getID(char delim) const -{ - return "AlterQuery" + (delim + database) + delim + table; -} - -ASTPtr ASTAlterQuery::clone() const -{ - auto res = std::make_shared<ASTAlterQuery>(*this); - res->children.clear(); - - if (command_list) - res->set(res->command_list, command_list->clone()); - - return res; -} - -void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const -{ - frame.need_parens = false; - - std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); - - if (is_live_view) - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER LIVE VIEW " << (settings.hilite ? hilite_none : ""); - else - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : ""); - - if (!table.empty()) - { - if (!database.empty()) - { - settings.ostr << indent_str << backQuoteIfNeed(database); - settings.ostr << "."; - } - settings.ostr << indent_str << backQuoteIfNeed(table); - } - formatOnCluster(settings); - settings.ostr << settings.nl_or_ws; - - FormatStateStacked frame_nested = frame; - frame_nested.need_parens = false; - ++frame_nested.indent; - static_cast<IAST *>(command_list)->formatImpl(settings, state, frame_nested); -} - -} +/** Get the text that identifies this element. */ +String ASTAlterQuery::getID(char delim) const +{ + return "AlterQuery" + (delim + database) + delim + table; +} + +ASTPtr ASTAlterQuery::clone() const +{ + auto res = std::make_shared<ASTAlterQuery>(*this); + res->children.clear(); + + if (command_list) + res->set(res->command_list, command_list->clone()); + + return res; +} + +void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + frame.need_parens = false; + + std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); + + if (is_live_view) + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER LIVE VIEW " << (settings.hilite ? hilite_none : ""); + else + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : ""); + + if (!table.empty()) + { + if (!database.empty()) + { + settings.ostr << indent_str << backQuoteIfNeed(database); + settings.ostr << "."; + } + settings.ostr << indent_str << backQuoteIfNeed(table); + } + formatOnCluster(settings); + settings.ostr << settings.nl_or_ws; + + FormatStateStacked frame_nested = frame; + frame_nested.need_parens = false; + ++frame_nested.indent; + static_cast<IAST *>(command_list)->formatImpl(settings, state, frame_nested); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h index ac6ee30fe0..a5410e797c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h @@ -1,118 +1,118 @@ -#pragma once - +#pragma once + #include <Parsers/ASTExpressionList.h> #include <Parsers/ASTQueryWithOnCluster.h> -#include <Parsers/ASTQueryWithTableAndOutput.h> -#include <Parsers/ASTTTLElement.h> +#include <Parsers/ASTQueryWithTableAndOutput.h> +#include <Parsers/ASTTTLElement.h> #include <Parsers/IAST.h> - - -namespace DB -{ - -/** ALTER query: - * ALTER TABLE [db.]name_type - * ADD COLUMN col_name type [AFTER col_after], - * DROP COLUMN col_drop [FROM PARTITION partition], - * MODIFY COLUMN col_name type, - * DROP PARTITION partition, - * COMMENT_COLUMN col_name 'comment', - * ALTER LIVE VIEW [db.]name_type - * REFRESH - */ - -class ASTAlterCommand : public IAST -{ -public: - enum Type - { - ADD_COLUMN, - DROP_COLUMN, - MODIFY_COLUMN, - COMMENT_COLUMN, - RENAME_COLUMN, + + +namespace DB +{ + +/** ALTER query: + * ALTER TABLE [db.]name_type + * ADD COLUMN col_name type [AFTER col_after], + * DROP COLUMN col_drop [FROM PARTITION partition], + * MODIFY COLUMN col_name type, + * DROP PARTITION partition, + * COMMENT_COLUMN col_name 'comment', + * ALTER LIVE VIEW [db.]name_type + * REFRESH + */ + +class ASTAlterCommand : public IAST +{ +public: + enum Type + { + ADD_COLUMN, + DROP_COLUMN, + MODIFY_COLUMN, + COMMENT_COLUMN, + RENAME_COLUMN, MATERIALIZE_COLUMN, - MODIFY_ORDER_BY, + MODIFY_ORDER_BY, MODIFY_SAMPLE_BY, - MODIFY_TTL, - MATERIALIZE_TTL, - MODIFY_SETTING, + MODIFY_TTL, + MATERIALIZE_TTL, + MODIFY_SETTING, RESET_SETTING, - MODIFY_QUERY, + MODIFY_QUERY, REMOVE_TTL, - - ADD_INDEX, - DROP_INDEX, - MATERIALIZE_INDEX, - - ADD_CONSTRAINT, - DROP_CONSTRAINT, - + + ADD_INDEX, + DROP_INDEX, + MATERIALIZE_INDEX, + + ADD_CONSTRAINT, + DROP_CONSTRAINT, + ADD_PROJECTION, DROP_PROJECTION, MATERIALIZE_PROJECTION, - DROP_PARTITION, - DROP_DETACHED_PARTITION, - ATTACH_PARTITION, - MOVE_PARTITION, - REPLACE_PARTITION, - FETCH_PARTITION, - FREEZE_PARTITION, - FREEZE_ALL, + DROP_PARTITION, + DROP_DETACHED_PARTITION, + ATTACH_PARTITION, + MOVE_PARTITION, + REPLACE_PARTITION, + FETCH_PARTITION, + FREEZE_PARTITION, + FREEZE_ALL, UNFREEZE_PARTITION, UNFREEZE_ALL, - - DELETE, - UPDATE, - - NO_TYPE, - - LIVE_VIEW_REFRESH, - }; - - Type type = NO_TYPE; - - /** The ADD COLUMN query stores the name and type of the column to add - * This field is not used in the DROP query - * In MODIFY query, the column name and the new type are stored here - */ - ASTPtr col_decl; - - /** The ADD COLUMN and MODIFY COLUMN query here optionally stores the name of the column following AFTER - * The DROP query stores the column name for deletion here - * Also used for RENAME COLUMN. - */ - ASTPtr column; - - /** For MODIFY ORDER BY - */ - ASTPtr order_by; - + + DELETE, + UPDATE, + + NO_TYPE, + + LIVE_VIEW_REFRESH, + }; + + Type type = NO_TYPE; + + /** The ADD COLUMN query stores the name and type of the column to add + * This field is not used in the DROP query + * In MODIFY query, the column name and the new type are stored here + */ + ASTPtr col_decl; + + /** The ADD COLUMN and MODIFY COLUMN query here optionally stores the name of the column following AFTER + * The DROP query stores the column name for deletion here + * Also used for RENAME COLUMN. + */ + ASTPtr column; + + /** For MODIFY ORDER BY + */ + ASTPtr order_by; + /** For MODIFY SAMPLE BY */ ASTPtr sample_by; - /** The ADD INDEX query stores the IndexDeclaration there. - */ - ASTPtr index_decl; - - /** The ADD INDEX query stores the name of the index following AFTER. - * The DROP INDEX query stores the name for deletion. - * The MATERIALIZE INDEX query stores the name of the index to materialize. - * The CLEAR INDEX query stores the name of the index to clear. - */ - ASTPtr index; - - /** The ADD CONSTRAINT query stores the ConstraintDeclaration there. - */ - ASTPtr constraint_decl; - - /** The DROP CONSTRAINT query stores the name for deletion. - */ - ASTPtr constraint; - + /** The ADD INDEX query stores the IndexDeclaration there. + */ + ASTPtr index_decl; + + /** The ADD INDEX query stores the name of the index following AFTER. + * The DROP INDEX query stores the name for deletion. + * The MATERIALIZE INDEX query stores the name of the index to materialize. + * The CLEAR INDEX query stores the name of the index to clear. + */ + ASTPtr index; + + /** The ADD CONSTRAINT query stores the ConstraintDeclaration there. + */ + ASTPtr constraint_decl; + + /** The DROP CONSTRAINT query stores the name for deletion. + */ + ASTPtr constraint; + /** The ADD PROJECTION query stores the ProjectionDeclaration there. */ ASTPtr projection_decl; @@ -125,114 +125,114 @@ public: ASTPtr projection; /** Used in DROP PARTITION, ATTACH PARTITION FROM, UPDATE, DELETE queries. - * The value or ID of the partition is stored here. - */ - ASTPtr partition; - - /// For DELETE/UPDATE WHERE: the predicate that filters the rows to delete/update. - ASTPtr predicate; - - /// A list of expressions of the form `column = expr` for the UPDATE command. - ASTPtr update_assignments; - - /// A column comment - ASTPtr comment; - - /// For MODIFY TTL query - ASTPtr ttl; - - /// FOR MODIFY_SETTING - ASTPtr settings_changes; - + * The value or ID of the partition is stored here. + */ + ASTPtr partition; + + /// For DELETE/UPDATE WHERE: the predicate that filters the rows to delete/update. + ASTPtr predicate; + + /// A list of expressions of the form `column = expr` for the UPDATE command. + ASTPtr update_assignments; + + /// A column comment + ASTPtr comment; + + /// For MODIFY TTL query + ASTPtr ttl; + + /// FOR MODIFY_SETTING + ASTPtr settings_changes; + /// FOR RESET_SETTING ASTPtr settings_resets; - /// For MODIFY_QUERY - ASTPtr select; - - /** In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here - */ - ASTPtr values; - - bool detach = false; /// true for DETACH PARTITION - - bool part = false; /// true for ATTACH PART, DROP DETACHED PART and MOVE - - bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata) - - bool clear_index = false; /// for CLEAR INDEX (do not drop index from metadata) - + /// For MODIFY_QUERY + ASTPtr select; + + /** In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here + */ + ASTPtr values; + + bool detach = false; /// true for DETACH PARTITION + + bool part = false; /// true for ATTACH PART, DROP DETACHED PART and MOVE + + bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata) + + bool clear_index = false; /// for CLEAR INDEX (do not drop index from metadata) + bool clear_projection = false; /// for CLEAR PROJECTION (do not drop projection from metadata) - bool if_not_exists = false; /// option for ADD_COLUMN - - bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN - - bool first = false; /// option for ADD_COLUMN, MODIFY_COLUMN - - DataDestinationType move_destination_type; /// option for MOVE PART/PARTITION - - String move_destination_name; /// option for MOVE PART/PARTITION - - /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition. - */ - String from; - + bool if_not_exists = false; /// option for ADD_COLUMN + + bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN + + bool first = false; /// option for ADD_COLUMN, MODIFY_COLUMN + + DataDestinationType move_destination_type; /// option for MOVE PART/PARTITION + + String move_destination_name; /// option for MOVE PART/PARTITION + + /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition. + */ + String from; + /** * For FREEZE PARTITION - place local backup to directory with specified name. * For UNFREEZE - delete local backup at directory with specified name. - */ - String with_name; - - /// REPLACE(ATTACH) PARTITION partition FROM db.table - String from_database; - String from_table; - /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table - bool replace = true; - /// MOVE PARTITION partition TO TABLE db.table - String to_database; - String to_table; - - /// Target column name - ASTPtr rename_to; - + */ + String with_name; + + /// REPLACE(ATTACH) PARTITION partition FROM db.table + String from_database; + String from_table; + /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table + bool replace = true; + /// MOVE PARTITION partition TO TABLE db.table + String to_database; + String to_table; + + /// Target column name + ASTPtr rename_to; + /// Which property user want to remove String remove_property; - String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast<int>(type))); } - - ASTPtr clone() const override; - -protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; -}; - -class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster -{ -public: - bool is_live_view{false}; /// true for ALTER LIVE VIEW - + String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast<int>(type))); } + + ASTPtr clone() const override; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster +{ +public: + bool is_live_view{false}; /// true for ALTER LIVE VIEW + ASTExpressionList * command_list = nullptr; - + bool isSettingsAlter() const; bool isFreezeAlter() const; - String getID(char) const override; - - ASTPtr clone() const override; - - ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override - { - return removeOnCluster<ASTAlterQuery>(clone(), new_database); - } - + String getID(char) const override; + + ASTPtr clone() const override; + + ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override + { + return removeOnCluster<ASTAlterQuery>(clone(), new_database); + } + const char * getQueryKindString() const override { return "Alter"; } -protected: - void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; bool isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const; -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h index 464697be4f..fdd1179ec9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h @@ -1,53 +1,53 @@ -#pragma once - -#include <Parsers/ASTQueryWithTableAndOutput.h> -#include <Common/quoteString.h> - - -namespace DB -{ - -struct ASTCheckQuery : public ASTQueryWithTableAndOutput -{ - ASTPtr partition; - - /** Get the text that identifies this element. */ - String getID(char delim) const override { return "CheckQuery" + (delim + database) + delim + table; } - - ASTPtr clone() const override - { - auto res = std::make_shared<ASTCheckQuery>(*this); - res->children.clear(); - cloneOutputOptions(*res); - return res; - } - -protected: - void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override - { - std::string nl_or_nothing = settings.one_line ? "" : "\n"; - - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - std::string nl_or_ws = settings.one_line ? " " : "\n"; - - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (settings.hilite ? hilite_none : ""); - - if (!table.empty()) - { - if (!database.empty()) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(database) << (settings.hilite ? hilite_none : ""); - settings.ostr << "."; - } - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(table) << (settings.hilite ? hilite_none : ""); - } - - if (partition) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PARTITION " << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - } -}; - -} +#pragma once + +#include <Parsers/ASTQueryWithTableAndOutput.h> +#include <Common/quoteString.h> + + +namespace DB +{ + +struct ASTCheckQuery : public ASTQueryWithTableAndOutput +{ + ASTPtr partition; + + /** Get the text that identifies this element. */ + String getID(char delim) const override { return "CheckQuery" + (delim + database) + delim + table; } + + ASTPtr clone() const override + { + auto res = std::make_shared<ASTCheckQuery>(*this); + res->children.clear(); + cloneOutputOptions(*res); + return res; + } + +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + std::string nl_or_nothing = settings.one_line ? "" : "\n"; + + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + std::string nl_or_ws = settings.one_line ? " " : "\n"; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (settings.hilite ? hilite_none : ""); + + if (!table.empty()) + { + if (!database.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(database) << (settings.hilite ? hilite_none : ""); + settings.ostr << "."; + } + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(table) << (settings.hilite ? hilite_none : ""); + } + + if (partition) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp index c30a296053..4c14230e92 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp @@ -1,97 +1,97 @@ -#include <Parsers/ASTColumnDeclaration.h> -#include <Common/quoteString.h> +#include <Parsers/ASTColumnDeclaration.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ - -ASTPtr ASTColumnDeclaration::clone() const -{ - const auto res = std::make_shared<ASTColumnDeclaration>(*this); - res->children.clear(); - - if (type) - { - // Type may be an ASTFunction (e.g. `create table t (a Decimal(9,0))`), - // so we have to clone it properly as well. - res->type = type->clone(); - res->children.push_back(res->type); - } - - if (default_expression) - { - res->default_expression = default_expression->clone(); - res->children.push_back(res->default_expression); - } - - if (comment) - { - res->comment = comment->clone(); - res->children.push_back(res->comment); - } - - if (codec) - { - res->codec = codec->clone(); - res->children.push_back(res->codec); - } - - if (ttl) - { - res->ttl = ttl->clone(); - res->children.push_back(res->ttl); - } - - return res; -} - -void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const -{ - frame.need_parens = false; - - /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. - settings.ostr << backQuote(name); - - if (type) - { - settings.ostr << ' '; + + +namespace DB +{ + +ASTPtr ASTColumnDeclaration::clone() const +{ + const auto res = std::make_shared<ASTColumnDeclaration>(*this); + res->children.clear(); + + if (type) + { + // Type may be an ASTFunction (e.g. `create table t (a Decimal(9,0))`), + // so we have to clone it properly as well. + res->type = type->clone(); + res->children.push_back(res->type); + } + + if (default_expression) + { + res->default_expression = default_expression->clone(); + res->children.push_back(res->default_expression); + } + + if (comment) + { + res->comment = comment->clone(); + res->children.push_back(res->comment); + } + + if (codec) + { + res->codec = codec->clone(); + res->children.push_back(res->codec); + } + + if (ttl) + { + res->ttl = ttl->clone(); + res->children.push_back(res->ttl); + } + + return res; +} + +void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + frame.need_parens = false; + + /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. + settings.ostr << backQuote(name); + + if (type) + { + settings.ostr << ' '; FormatStateStacked type_frame = frame; type_frame.indent = 0; type->formatImpl(settings, state, type_frame); - } - - if (null_modifier) - { - settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") - << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : ""); - } - - if (default_expression) - { - settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; - default_expression->formatImpl(settings, state, frame); - } - - if (comment) - { - settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' '; - comment->formatImpl(settings, state, frame); - } - - if (codec) - { - settings.ostr << ' '; - codec->formatImpl(settings, state, frame); - } - - if (ttl) - { - settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' '; - ttl->formatImpl(settings, state, frame); - } -} - -} + } + + if (null_modifier) + { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") + << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : ""); + } + + if (default_expression) + { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; + default_expression->formatImpl(settings, state, frame); + } + + if (comment) + { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' '; + comment->formatImpl(settings, state, frame); + } + + if (codec) + { + settings.ostr << ' '; + codec->formatImpl(settings, state, frame); + } + + if (ttl) + { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' '; + ttl->formatImpl(settings, state, frame); + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp index 45b03bbc23..6ea6c81c3b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp @@ -1,77 +1,77 @@ -#include <Parsers/ASTDropQuery.h> -#include <Common/quoteString.h> +#include <Parsers/ASTDropQuery.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int SYNTAX_ERROR; -} - - -String ASTDropQuery::getID(char delim) const -{ - if (kind == ASTDropQuery::Kind::Drop) - return "DropQuery" + (delim + database) + delim + table; - else if (kind == ASTDropQuery::Kind::Detach) - return "DetachQuery" + (delim + database) + delim + table; - else if (kind == ASTDropQuery::Kind::Truncate) - return "TruncateQuery" + (delim + database) + delim + table; - else - throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); -} - -ASTPtr ASTDropQuery::clone() const -{ - auto res = std::make_shared<ASTDropQuery>(*this); - cloneOutputOptions(*res); - return res; -} - -void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - settings.ostr << (settings.hilite ? hilite_keyword : ""); - if (kind == ASTDropQuery::Kind::Drop) - settings.ostr << "DROP "; - else if (kind == ASTDropQuery::Kind::Detach) - settings.ostr << "DETACH "; - else if (kind == ASTDropQuery::Kind::Truncate) - settings.ostr << "TRUNCATE "; - else - throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); - - if (temporary) - settings.ostr << "TEMPORARY "; - - if (table.empty() && !database.empty()) - settings.ostr << "DATABASE "; - else if (is_dictionary) - settings.ostr << "DICTIONARY "; - else if (is_view) - settings.ostr << "VIEW "; - else - settings.ostr << "TABLE "; - - if (if_exists) - settings.ostr << "IF EXISTS "; - - settings.ostr << (settings.hilite ? hilite_none : ""); - - if (table.empty() && !database.empty()) - settings.ostr << backQuoteIfNeed(database); - else - settings.ostr << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); - - formatOnCluster(settings); - + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + + +String ASTDropQuery::getID(char delim) const +{ + if (kind == ASTDropQuery::Kind::Drop) + return "DropQuery" + (delim + database) + delim + table; + else if (kind == ASTDropQuery::Kind::Detach) + return "DetachQuery" + (delim + database) + delim + table; + else if (kind == ASTDropQuery::Kind::Truncate) + return "TruncateQuery" + (delim + database) + delim + table; + else + throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); +} + +ASTPtr ASTDropQuery::clone() const +{ + auto res = std::make_shared<ASTDropQuery>(*this); + cloneOutputOptions(*res); + return res; +} + +void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : ""); + if (kind == ASTDropQuery::Kind::Drop) + settings.ostr << "DROP "; + else if (kind == ASTDropQuery::Kind::Detach) + settings.ostr << "DETACH "; + else if (kind == ASTDropQuery::Kind::Truncate) + settings.ostr << "TRUNCATE "; + else + throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); + + if (temporary) + settings.ostr << "TEMPORARY "; + + if (table.empty() && !database.empty()) + settings.ostr << "DATABASE "; + else if (is_dictionary) + settings.ostr << "DICTIONARY "; + else if (is_view) + settings.ostr << "VIEW "; + else + settings.ostr << "TABLE "; + + if (if_exists) + settings.ostr << "IF EXISTS "; + + settings.ostr << (settings.hilite ? hilite_none : ""); + + if (table.empty() && !database.empty()) + settings.ostr << backQuoteIfNeed(database); + else + settings.ostr << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + + formatOnCluster(settings); + if (permanently) settings.ostr << " PERMANENTLY"; - if (no_delay) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " NO DELAY" << (settings.hilite ? hilite_none : ""); -} - -} + if (no_delay) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " NO DELAY" << (settings.hilite ? hilite_none : ""); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h index b062eaf3e8..6e5fd5854d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h @@ -1,54 +1,54 @@ -#pragma once - -#include <Parsers/ASTQueryWithTableAndOutput.h> -#include <Parsers/ASTQueryWithOnCluster.h> - - -namespace DB -{ - -/** DROP query - */ -class ASTDropQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster -{ -public: - enum Kind - { - Drop, - Detach, - Truncate, - }; - - Kind kind; - bool if_exists{false}; - - /// Useful if we already have a DDL lock - bool no_ddl_lock{false}; - - /// We dropping dictionary, so print correct word - bool is_dictionary{false}; - - /// Same as above - bool is_view{false}; - - bool no_delay{false}; - +#pragma once + +#include <Parsers/ASTQueryWithTableAndOutput.h> +#include <Parsers/ASTQueryWithOnCluster.h> + + +namespace DB +{ + +/** DROP query + */ +class ASTDropQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster +{ +public: + enum Kind + { + Drop, + Detach, + Truncate, + }; + + Kind kind; + bool if_exists{false}; + + /// Useful if we already have a DDL lock + bool no_ddl_lock{false}; + + /// We dropping dictionary, so print correct word + bool is_dictionary{false}; + + /// Same as above + bool is_view{false}; + + bool no_delay{false}; + // We detach the object permanently, so it will not be reattached back during server restart. bool permanently{false}; - /** Get the text that identifies this element. */ - String getID(char) const override; - ASTPtr clone() const override; - - ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override - { - return removeOnCluster<ASTDropQuery>(clone(), new_database); - } - + /** Get the text that identifies this element. */ + String getID(char) const override; + ASTPtr clone() const override; + + ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override + { + return removeOnCluster<ASTDropQuery>(clone(), new_database); + } + const char * getQueryKindString() const override { return "Drop"; } -protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; -}; - -} +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h index dffcad80a4..5c50a8cd82 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h @@ -1,87 +1,87 @@ -#pragma once - -#include <Parsers/ASTQueryWithOutput.h> - - -namespace DB -{ - - -/// AST, EXPLAIN or other query with meaning of explanation query instead of execution -class ASTExplainQuery : public ASTQueryWithOutput -{ -public: - enum ExplainKind - { - ParsedAST, /// 'EXPLAIN AST SELECT ...' - AnalyzedSyntax, /// 'EXPLAIN SYNTAX SELECT ...' - QueryPlan, /// 'EXPLAIN SELECT ...' - QueryPipeline, /// 'EXPLAIN PIPELINE ...' +#pragma once + +#include <Parsers/ASTQueryWithOutput.h> + + +namespace DB +{ + + +/// AST, EXPLAIN or other query with meaning of explanation query instead of execution +class ASTExplainQuery : public ASTQueryWithOutput +{ +public: + enum ExplainKind + { + ParsedAST, /// 'EXPLAIN AST SELECT ...' + AnalyzedSyntax, /// 'EXPLAIN SYNTAX SELECT ...' + QueryPlan, /// 'EXPLAIN SELECT ...' + QueryPipeline, /// 'EXPLAIN PIPELINE ...' QueryEstimates, /// 'EXPLAIN ESTIMATE ...' - }; - + }; + explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {} - + String getID(char delim) const override { return "Explain" + (delim + toString(kind)); } - ExplainKind getKind() const { return kind; } - ASTPtr clone() const override - { - auto res = std::make_shared<ASTExplainQuery>(*this); - res->children.clear(); - res->children.push_back(children[0]->clone()); - cloneOutputOptions(*res); - return res; - } - - void setExplainedQuery(ASTPtr query_) - { - children.emplace_back(query_); - query = std::move(query_); - } - - void setSettings(ASTPtr settings_) - { - children.emplace_back(settings_); - ast_settings = std::move(settings_); - } - - const ASTPtr & getExplainedQuery() const { return query; } - const ASTPtr & getSettings() const { return ast_settings; } - -protected: - void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override - { + ExplainKind getKind() const { return kind; } + ASTPtr clone() const override + { + auto res = std::make_shared<ASTExplainQuery>(*this); + res->children.clear(); + res->children.push_back(children[0]->clone()); + cloneOutputOptions(*res); + return res; + } + + void setExplainedQuery(ASTPtr query_) + { + children.emplace_back(query_); + query = std::move(query_); + } + + void setSettings(ASTPtr settings_) + { + children.emplace_back(settings_); + ast_settings = std::move(settings_); + } + + const ASTPtr & getExplainedQuery() const { return query; } + const ASTPtr & getSettings() const { return ast_settings; } + +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : ""); - - if (ast_settings) - { - settings.ostr << ' '; - ast_settings->formatImpl(settings, state, frame); - } - - settings.ostr << settings.nl_or_ws; - query->formatImpl(settings, state, frame); - } - -private: - ExplainKind kind; - - ASTPtr query; - ASTPtr ast_settings; - + + if (ast_settings) + { + settings.ostr << ' '; + ast_settings->formatImpl(settings, state, frame); + } + + settings.ostr << settings.nl_or_ws; + query->formatImpl(settings, state, frame); + } + +private: + ExplainKind kind; + + ASTPtr query; + ASTPtr ast_settings; + static String toString(ExplainKind kind) - { - switch (kind) - { + { + switch (kind) + { case ParsedAST: return "EXPLAIN AST"; case AnalyzedSyntax: return "EXPLAIN SYNTAX"; - case QueryPlan: return "EXPLAIN"; - case QueryPipeline: return "EXPLAIN PIPELINE"; + case QueryPlan: return "EXPLAIN"; + case QueryPipeline: return "EXPLAIN PIPELINE"; case QueryEstimates: return "EXPLAIN ESTIMATE"; - } - - __builtin_unreachable(); - } -}; - -} + } + + __builtin_unreachable(); + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp index ca10f23e58..72bdd7d6b0 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp @@ -1,28 +1,28 @@ -#include <Parsers/ASTKillQueryQuery.h> +#include <Parsers/ASTKillQueryQuery.h> #include <IO/Operators.h> - -namespace DB -{ - -String ASTKillQueryQuery::getID(char delim) const -{ - return String("KillQueryQuery") + delim + (where_expression ? where_expression->getID() : "") + delim + String(sync ? "SYNC" : "ASYNC"); -} - -void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const -{ - settings.ostr << (settings.hilite ? hilite_keyword : "") << "KILL " - << (type == Type::Query ? "QUERY" : "MUTATION"); - - formatOnCluster(settings); - - if (where_expression) - { - settings.ostr << " WHERE " << (settings.hilite ? hilite_none : ""); - where_expression->formatImpl(settings, state, frame); - } - - settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << (test ? "TEST" : (sync ? "SYNC" : "ASYNC")) << (settings.hilite ? hilite_none : ""); -} - -} + +namespace DB +{ + +String ASTKillQueryQuery::getID(char delim) const +{ + return String("KillQueryQuery") + delim + (where_expression ? where_expression->getID() : "") + delim + String(sync ? "SYNC" : "ASYNC"); +} + +void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "KILL " + << (type == Type::Query ? "QUERY" : "MUTATION"); + + formatOnCluster(settings); + + if (where_expression) + { + settings.ostr << " WHERE " << (settings.hilite ? hilite_none : ""); + where_expression->formatImpl(settings, state, frame); + } + + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << (test ? "TEST" : (sync ? "SYNC" : "ASYNC")) << (settings.hilite ? hilite_none : ""); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h index fa0dbcda16..c1b3956962 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h @@ -1,45 +1,45 @@ #pragma once -#include <Parsers/IAST.h> -#include <Parsers/ASTQueryWithOutput.h> -#include <Parsers/ASTQueryWithOnCluster.h> - -namespace DB -{ - -class ASTKillQueryQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster -{ -public: - enum class Type - { - Query, /// KILL QUERY - Mutation, /// KILL MUTATION - }; - - Type type = Type::Query; - ASTPtr where_expression; // expression to filter processes from system.processes table - bool sync = false; // SYNC or ASYNC mode - bool test = false; // does it TEST mode? (doesn't cancel queries just checks and shows them) - - ASTPtr clone() const override - { - auto clone = std::make_shared<ASTKillQueryQuery>(*this); - if (where_expression) - { - clone->where_expression = where_expression->clone(); - clone->children = {clone->where_expression}; - } - - return clone; - } - - String getID(char) const override; - - void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - - ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override - { - return removeOnCluster<ASTKillQueryQuery>(clone()); - } -}; - -} +#include <Parsers/IAST.h> +#include <Parsers/ASTQueryWithOutput.h> +#include <Parsers/ASTQueryWithOnCluster.h> + +namespace DB +{ + +class ASTKillQueryQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster +{ +public: + enum class Type + { + Query, /// KILL QUERY + Mutation, /// KILL MUTATION + }; + + Type type = Type::Query; + ASTPtr where_expression; // expression to filter processes from system.processes table + bool sync = false; // SYNC or ASYNC mode + bool test = false; // does it TEST mode? (doesn't cancel queries just checks and shows them) + + ASTPtr clone() const override + { + auto clone = std::make_shared<ASTKillQueryQuery>(*this); + if (where_expression) + { + clone->where_expression = where_expression->clone(); + clone->children = {clone->where_expression}; + } + + return clone; + } + + String getID(char) const override; + + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + + ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override + { + return removeOnCluster<ASTKillQueryQuery>(clone()); + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp index 1af215c1ef..6423e247ec 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp @@ -1,34 +1,34 @@ -#include <Parsers/ASTOptimizeQuery.h> -#include <Common/quoteString.h> +#include <Parsers/ASTOptimizeQuery.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - -namespace DB -{ - -void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const -{ - settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "") - << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); - - formatOnCluster(settings); - - if (partition) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " PARTITION " << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - - if (final) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FINAL" << (settings.hilite ? hilite_none : ""); - - if (deduplicate) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : ""); + +namespace DB +{ + +void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + + formatOnCluster(settings); + + if (partition) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + + if (final) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FINAL" << (settings.hilite ? hilite_none : ""); + + if (deduplicate) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : ""); if (deduplicate_by_columns) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " BY " << (settings.hilite ? hilite_none : ""); deduplicate_by_columns->formatImpl(settings, state, frame); } -} - -} +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h index f00cd48002..f4981d156c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h @@ -1,59 +1,59 @@ -#pragma once - -#include <Parsers/IAST.h> -#include <Parsers/ASTQueryWithTableAndOutput.h> -#include <Parsers/ASTQueryWithOnCluster.h> - -namespace DB -{ - - -/** OPTIMIZE query - */ -class ASTOptimizeQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster -{ -public: - /// The partition to optimize can be specified. - ASTPtr partition; - /// A flag can be specified - perform optimization "to the end" instead of one step. +#pragma once + +#include <Parsers/IAST.h> +#include <Parsers/ASTQueryWithTableAndOutput.h> +#include <Parsers/ASTQueryWithOnCluster.h> + +namespace DB +{ + + +/** OPTIMIZE query + */ +class ASTOptimizeQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster +{ +public: + /// The partition to optimize can be specified. + ASTPtr partition; + /// A flag can be specified - perform optimization "to the end" instead of one step. bool final = false; - /// Do deduplicate (default: false) + /// Do deduplicate (default: false) bool deduplicate = false; /// Deduplicate by columns. ASTPtr deduplicate_by_columns; - - /** Get the text that identifies this element. */ - String getID(char delim) const override - { - return "OptimizeQuery" + (delim + database) + delim + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); - } - - ASTPtr clone() const override - { - auto res = std::make_shared<ASTOptimizeQuery>(*this); - res->children.clear(); - - if (partition) - { - res->partition = partition->clone(); - res->children.push_back(res->partition); - } - + + /** Get the text that identifies this element. */ + String getID(char delim) const override + { + return "OptimizeQuery" + (delim + database) + delim + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); + } + + ASTPtr clone() const override + { + auto res = std::make_shared<ASTOptimizeQuery>(*this); + res->children.clear(); + + if (partition) + { + res->partition = partition->clone(); + res->children.push_back(res->partition); + } + if (deduplicate_by_columns) { res->deduplicate_by_columns = deduplicate_by_columns->clone(); res->children.push_back(res->deduplicate_by_columns); } - return res; - } - - void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - - ASTPtr getRewrittenASTWithoutOnCluster(const std::string &new_database) const override - { - return removeOnCluster<ASTOptimizeQuery>(clone(), new_database); - } -}; - -} + return res; + } + + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + + ASTPtr getRewrittenASTWithoutOnCluster(const std::string &new_database) const override + { + return removeOnCluster<ASTOptimizeQuery>(clone(), new_database); + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp index fa968c2ac8..06bfe4f521 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp @@ -1,45 +1,45 @@ -#include <Parsers/ASTPartition.h> -#include <IO/WriteHelpers.h> +#include <Parsers/ASTPartition.h> +#include <IO/WriteHelpers.h> #include <IO/Operators.h> - -namespace DB -{ - -String ASTPartition::getID(char delim) const -{ - if (value) - return "Partition"; - else - return "Partition_ID" + (delim + id); -} - -ASTPtr ASTPartition::clone() const -{ - auto res = std::make_shared<ASTPartition>(*this); - res->children.clear(); - - if (value) - { - res->value = value->clone(); - res->children.push_back(res->value); - } - - return res; -} - -void ASTPartition::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const -{ - if (value) - { - value->formatImpl(settings, state, frame); - } - else - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "ID " << (settings.hilite ? hilite_none : ""); - WriteBufferFromOwnString id_buf; - writeQuoted(id, id_buf); - settings.ostr << id_buf.str(); - } -} - -} + +namespace DB +{ + +String ASTPartition::getID(char delim) const +{ + if (value) + return "Partition"; + else + return "Partition_ID" + (delim + id); +} + +ASTPtr ASTPartition::clone() const +{ + auto res = std::make_shared<ASTPartition>(*this); + res->children.clear(); + + if (value) + { + res->value = value->clone(); + res->children.push_back(res->value); + } + + return res; +} + +void ASTPartition::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + if (value) + { + value->formatImpl(settings, state, frame); + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "ID " << (settings.hilite ? hilite_none : ""); + WriteBufferFromOwnString id_buf; + writeQuoted(id, id_buf); + settings.ostr << id_buf.str(); + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h index c0d71861a4..8a837a1045 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h @@ -1,27 +1,27 @@ -#pragma once - -#include <Parsers/IAST.h> -#include <common/StringRef.h> - - -namespace DB -{ - -/// Either a (possibly compound) expression representing a partition value or a partition ID. -class ASTPartition : public IAST -{ -public: - ASTPtr value; - String fields_str; /// The extent of comma-separated partition expression fields without parentheses. - size_t fields_count = 0; - - String id; - - String getID(char) const override; - ASTPtr clone() const override; - -protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; -}; - -} +#pragma once + +#include <Parsers/IAST.h> +#include <common/StringRef.h> + + +namespace DB +{ + +/// Either a (possibly compound) expression representing a partition value or a partition ID. +class ASTPartition : public IAST +{ +public: + ASTPtr value; + String fields_str; /// The extent of comma-separated partition expression fields without parentheses. + size_t fields_count = 0; + + String id; + + String getID(char) const override; + ASTPtr clone() const override; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp index d895c5b69f..fc5385e4a5 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp @@ -1,95 +1,95 @@ -#include <Parsers/ASTRolesOrUsersSet.h> -#include <Common/quoteString.h> +#include <Parsers/ASTRolesOrUsersSet.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ -namespace -{ + + +namespace DB +{ +namespace +{ void formatNameOrID(const String & str, bool is_id, const IAST::FormatSettings & settings) - { - if (is_id) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ID" << (settings.hilite ? IAST::hilite_none : "") << "(" - << quoteString(str) << ")"; - } - else - { - settings.ostr << backQuoteIfNeed(str); - } - } -} - -void ASTRolesOrUsersSet::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - if (empty()) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : ""); - return; - } - - bool need_comma = false; + { + if (is_id) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ID" << (settings.hilite ? IAST::hilite_none : "") << "(" + << quoteString(str) << ")"; + } + else + { + settings.ostr << backQuoteIfNeed(str); + } + } +} + +void ASTRolesOrUsersSet::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + if (empty()) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : ""); + return; + } - if (all) - { - if (std::exchange(need_comma, true)) - settings.ostr << ", "; + bool need_comma = false; + + if (all) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (use_keyword_any ? "ANY" : "ALL") << (settings.hilite ? IAST::hilite_none : ""); - } - else - { + } + else + { for (const auto & name : names) - { - if (std::exchange(need_comma, true)) - settings.ostr << ", "; + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; formatNameOrID(name, id_mode, settings); - } - - if (current_user) - { - if (std::exchange(need_comma, true)) - settings.ostr << ", "; - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : ""); - } - } - - if (except_current_user || !except_names.empty()) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " EXCEPT " << (settings.hilite ? IAST::hilite_none : ""); - need_comma = false; - + } + + if (current_user) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : ""); + } + } + + if (except_current_user || !except_names.empty()) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " EXCEPT " << (settings.hilite ? IAST::hilite_none : ""); + need_comma = false; + for (const auto & name : except_names) - { - if (std::exchange(need_comma, true)) - settings.ostr << ", "; + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; formatNameOrID(name, id_mode, settings); - } - - if (except_current_user) - { - if (std::exchange(need_comma, true)) - settings.ostr << ", "; - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : ""); - } - } -} - - + } + + if (except_current_user) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : ""); + } + } +} + + void ASTRolesOrUsersSet::replaceCurrentUserTag(const String & current_user_name) -{ - if (current_user) - { - names.push_back(current_user_name); - current_user = false; - } - - if (except_current_user) - { - except_names.push_back(current_user_name); - except_current_user = false; - } -} - -} +{ + if (current_user) + { + names.push_back(current_user_name); + current_user = false; + } + + if (except_current_user) + { + except_names.push_back(current_user_name); + except_current_user = false; + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h index 1efdab0cdb..15d42ee39a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h @@ -1,35 +1,35 @@ -#pragma once - -#include <Parsers/IAST.h> - - -namespace DB -{ +#pragma once + +#include <Parsers/IAST.h> + + +namespace DB +{ using Strings = std::vector<String>; -/// Represents a set of users/roles like +/// Represents a set of users/roles like /// {user_name | role_name | CURRENT_USER | ALL | NONE} [,...] /// [EXCEPT {user_name | role_name | CURRENT_USER | ALL | NONE} [,...]] -class ASTRolesOrUsersSet : public IAST -{ -public: +class ASTRolesOrUsersSet : public IAST +{ +public: bool all = false; - Strings names; - bool current_user = false; - Strings except_names; - bool except_current_user = false; - + Strings names; + bool current_user = false; + Strings except_names; + bool except_current_user = false; + bool allow_users = true; /// whether this set can contain names of users bool allow_roles = true; /// whether this set can contain names of roles bool id_mode = false; /// whether this set keep UUIDs instead of names bool use_keyword_any = false; /// whether the keyword ANY should be used instead of the keyword ALL - - bool empty() const { return names.empty() && !current_user && !all; } + + bool empty() const { return names.empty() && !current_user && !all; } void replaceCurrentUserTag(const String & current_user_name); - - String getID(char) const override { return "RolesOrUsersSet"; } - ASTPtr clone() const override { return std::make_shared<ASTRolesOrUsersSet>(*this); } - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; -}; -} + + String getID(char) const override { return "RolesOrUsersSet"; } + ASTPtr clone() const override { return std::make_shared<ASTRolesOrUsersSet>(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp index 66fc6c44ea..e59e103b77 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp @@ -1,44 +1,44 @@ -#include <Parsers/ASTSetRoleQuery.h> -#include <Parsers/ASTRolesOrUsersSet.h> -#include <Common/quoteString.h> +#include <Parsers/ASTSetRoleQuery.h> +#include <Parsers/ASTRolesOrUsersSet.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ -String ASTSetRoleQuery::getID(char) const -{ - return "SetRoleQuery"; -} - - -ASTPtr ASTSetRoleQuery::clone() const -{ - return std::make_shared<ASTSetRoleQuery>(*this); -} - - -void ASTSetRoleQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - settings.ostr << (settings.hilite ? hilite_keyword : ""); - switch (kind) - { - case Kind::SET_ROLE: settings.ostr << "SET ROLE"; break; - case Kind::SET_ROLE_DEFAULT: settings.ostr << "SET ROLE DEFAULT"; break; - case Kind::SET_DEFAULT_ROLE: settings.ostr << "SET DEFAULT ROLE"; break; - } - settings.ostr << (settings.hilite ? hilite_none : ""); - - if (kind == Kind::SET_ROLE_DEFAULT) - return; - - settings.ostr << " "; - roles->format(settings); - - if (kind == Kind::SET_ROLE) - return; - - settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : ""); - to_users->format(settings); -} -} + + +namespace DB +{ +String ASTSetRoleQuery::getID(char) const +{ + return "SetRoleQuery"; +} + + +ASTPtr ASTSetRoleQuery::clone() const +{ + return std::make_shared<ASTSetRoleQuery>(*this); +} + + +void ASTSetRoleQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : ""); + switch (kind) + { + case Kind::SET_ROLE: settings.ostr << "SET ROLE"; break; + case Kind::SET_ROLE_DEFAULT: settings.ostr << "SET ROLE DEFAULT"; break; + case Kind::SET_DEFAULT_ROLE: settings.ostr << "SET DEFAULT ROLE"; break; + } + settings.ostr << (settings.hilite ? hilite_none : ""); + + if (kind == Kind::SET_ROLE_DEFAULT) + return; + + settings.ostr << " "; + roles->format(settings); + + if (kind == Kind::SET_ROLE) + return; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : ""); + to_users->format(settings); +} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h index 725a2a1737..f0170ae6af 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h @@ -1,31 +1,31 @@ -#pragma once - -#include <Parsers/IAST.h> - - -namespace DB -{ -class ASTRolesOrUsersSet; - -/** SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]} - * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...] - */ -class ASTSetRoleQuery : public IAST -{ -public: - enum class Kind - { - SET_ROLE, - SET_ROLE_DEFAULT, - SET_DEFAULT_ROLE, - }; - Kind kind = Kind::SET_ROLE; - - std::shared_ptr<ASTRolesOrUsersSet> roles; - std::shared_ptr<ASTRolesOrUsersSet> to_users; - - String getID(char) const override; - ASTPtr clone() const override; - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; -}; -} +#pragma once + +#include <Parsers/IAST.h> + + +namespace DB +{ +class ASTRolesOrUsersSet; + +/** SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]} + * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...] + */ +class ASTSetRoleQuery : public IAST +{ +public: + enum class Kind + { + SET_ROLE, + SET_ROLE_DEFAULT, + SET_DEFAULT_ROLE, + }; + Kind kind = Kind::SET_ROLE; + + std::shared_ptr<ASTRolesOrUsersSet> roles; + std::shared_ptr<ASTRolesOrUsersSet> to_users; + + String getID(char) const override; + ASTPtr clone() const override; + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp index 0e6274d452..8f35c154a7 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp @@ -1,98 +1,98 @@ -#include <Parsers/ASTSettingsProfileElement.h> +#include <Parsers/ASTSettingsProfileElement.h> #include <Parsers/formatSettingName.h> #include <Common/FieldVisitorToString.h> -#include <Common/quoteString.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ -namespace -{ - void formatProfileNameOrID(const String & str, bool is_id, const IAST::FormatSettings & settings) - { - if (is_id) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ID" << (settings.hilite ? IAST::hilite_none : "") << "(" - << quoteString(str) << ")"; - } - else - { - settings.ostr << backQuoteIfNeed(str); - } - } -} - -void ASTSettingsProfileElement::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - if (!parent_profile.empty()) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (use_inherit_keyword ? "INHERIT" : "PROFILE") << " " - << (settings.hilite ? IAST::hilite_none : ""); - formatProfileNameOrID(parent_profile, id_mode, settings); - return; - } - + + +namespace DB +{ +namespace +{ + void formatProfileNameOrID(const String & str, bool is_id, const IAST::FormatSettings & settings) + { + if (is_id) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ID" << (settings.hilite ? IAST::hilite_none : "") << "(" + << quoteString(str) << ")"; + } + else + { + settings.ostr << backQuoteIfNeed(str); + } + } +} + +void ASTSettingsProfileElement::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + if (!parent_profile.empty()) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (use_inherit_keyword ? "INHERIT" : "PROFILE") << " " + << (settings.hilite ? IAST::hilite_none : ""); + formatProfileNameOrID(parent_profile, id_mode, settings); + return; + } + formatSettingName(setting_name, settings.ostr); - - if (!value.isNull()) - { - settings.ostr << " = " << applyVisitor(FieldVisitorToString{}, value); - } - - if (!min_value.isNull()) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MIN " << (settings.hilite ? IAST::hilite_none : "") - << applyVisitor(FieldVisitorToString{}, min_value); - } - - if (!max_value.isNull()) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX " << (settings.hilite ? IAST::hilite_none : "") - << applyVisitor(FieldVisitorToString{}, max_value); - } - - if (readonly) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (*readonly ? " READONLY" : " WRITABLE") - << (settings.hilite ? IAST::hilite_none : ""); - } -} - - -bool ASTSettingsProfileElements::empty() const -{ - for (const auto & element : elements) - if (!element->empty()) - return false; - return true; -} - - -void ASTSettingsProfileElements::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - if (empty()) - { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : ""); - return; - } - - bool need_comma = false; - for (const auto & element : elements) - { - if (need_comma) - settings.ostr << ", "; - need_comma = true; - - element->format(settings); - } -} - - -void ASTSettingsProfileElements::setUseInheritKeyword(bool use_inherit_keyword_) -{ - for (auto & element : elements) - element->use_inherit_keyword = use_inherit_keyword_; -} - -} + + if (!value.isNull()) + { + settings.ostr << " = " << applyVisitor(FieldVisitorToString{}, value); + } + + if (!min_value.isNull()) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MIN " << (settings.hilite ? IAST::hilite_none : "") + << applyVisitor(FieldVisitorToString{}, min_value); + } + + if (!max_value.isNull()) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX " << (settings.hilite ? IAST::hilite_none : "") + << applyVisitor(FieldVisitorToString{}, max_value); + } + + if (readonly) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (*readonly ? " READONLY" : " WRITABLE") + << (settings.hilite ? IAST::hilite_none : ""); + } +} + + +bool ASTSettingsProfileElements::empty() const +{ + for (const auto & element : elements) + if (!element->empty()) + return false; + return true; +} + + +void ASTSettingsProfileElements::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + if (empty()) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : ""); + return; + } + + bool need_comma = false; + for (const auto & element : elements) + { + if (need_comma) + settings.ostr << ", "; + need_comma = true; + + element->format(settings); + } +} + + +void ASTSettingsProfileElements::setUseInheritKeyword(bool use_inherit_keyword_) +{ + for (auto & element : elements) + element->use_inherit_keyword = use_inherit_keyword_; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h index 0a2bbc92b8..6a54bca321 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h @@ -1,48 +1,48 @@ -#pragma once - -#include <Parsers/IAST.h> -#include <Core/Field.h> - - -namespace DB -{ -/** Represents a settings profile's element like the following - * {variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name' - */ -class ASTSettingsProfileElement : public IAST -{ -public: - String parent_profile; - String setting_name; - Field value; - Field min_value; - Field max_value; - std::optional<bool> readonly; - bool id_mode = false; /// If true then `parent_profile` keeps UUID, not a name. - bool use_inherit_keyword = false; /// If true then this element is a part of ASTCreateSettingsProfileQuery. - - bool empty() const { return parent_profile.empty() && setting_name.empty(); } - - String getID(char) const override { return "SettingsProfileElement"; } - ASTPtr clone() const override { return std::make_shared<ASTSettingsProfileElement>(*this); } - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; -}; - - -/** Represents settings profile's elements like the following - * {{variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name'} [,...] - */ -class ASTSettingsProfileElements : public IAST -{ -public: - std::vector<std::shared_ptr<ASTSettingsProfileElement>> elements; - - bool empty() const; - - String getID(char) const override { return "SettingsProfileElements"; } - ASTPtr clone() const override { return std::make_shared<ASTSettingsProfileElements>(*this); } - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; - - void setUseInheritKeyword(bool use_inherit_keyword_); -}; -} +#pragma once + +#include <Parsers/IAST.h> +#include <Core/Field.h> + + +namespace DB +{ +/** Represents a settings profile's element like the following + * {variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name' + */ +class ASTSettingsProfileElement : public IAST +{ +public: + String parent_profile; + String setting_name; + Field value; + Field min_value; + Field max_value; + std::optional<bool> readonly; + bool id_mode = false; /// If true then `parent_profile` keeps UUID, not a name. + bool use_inherit_keyword = false; /// If true then this element is a part of ASTCreateSettingsProfileQuery. + + bool empty() const { return parent_profile.empty() && setting_name.empty(); } + + String getID(char) const override { return "SettingsProfileElement"; } + ASTPtr clone() const override { return std::make_shared<ASTSettingsProfileElement>(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + + +/** Represents settings profile's elements like the following + * {{variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name'} [,...] + */ +class ASTSettingsProfileElements : public IAST +{ +public: + std::vector<std::shared_ptr<ASTSettingsProfileElement>> elements; + + bool empty() const; + + String getID(char) const override { return "SettingsProfileElements"; } + ASTPtr clone() const override { return std::make_shared<ASTSettingsProfileElements>(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + + void setUseInheritKeyword(bool use_inherit_keyword_); +}; +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h index 8b14660bce..dffd7ff240 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h @@ -1,17 +1,17 @@ -#pragma once - -#include <Parsers/ASTQueryWithOutput.h> - - -namespace DB -{ - -struct ASTShowAccessQueryNames -{ - static constexpr auto ID = "ShowAccessQuery"; - static constexpr auto Query = "SHOW ACCESS"; -}; - -using ASTShowAccessQuery = ASTQueryWithOutputImpl<ASTShowAccessQueryNames>; - -} +#pragma once + +#include <Parsers/ASTQueryWithOutput.h> + + +namespace DB +{ + +struct ASTShowAccessQueryNames +{ + static constexpr auto ID = "ShowAccessQuery"; + static constexpr auto Query = "SHOW ACCESS"; +}; + +using ASTShowAccessQuery = ASTQueryWithOutputImpl<ASTShowAccessQueryNames>; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp index d89c246e62..4011cfc522 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp @@ -1,37 +1,37 @@ -#include <Parsers/ASTShowGrantsQuery.h> -#include <Parsers/ASTRolesOrUsersSet.h> -#include <Common/quoteString.h> +#include <Parsers/ASTShowGrantsQuery.h> +#include <Parsers/ASTRolesOrUsersSet.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ -String ASTShowGrantsQuery::getID(char) const -{ - return "ShowGrantsQuery"; -} - - -ASTPtr ASTShowGrantsQuery::clone() const -{ - return std::make_shared<ASTShowGrantsQuery>(*this); -} - - -void ASTShowGrantsQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW GRANTS" - << (settings.hilite ? hilite_none : ""); - - if (for_roles->current_user && !for_roles->all && for_roles->names.empty() && for_roles->except_names.empty() - && !for_roles->except_current_user) - { - } - else - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " - << (settings.hilite ? hilite_none : ""); - for_roles->format(settings); - } -} -} + + +namespace DB +{ +String ASTShowGrantsQuery::getID(char) const +{ + return "ShowGrantsQuery"; +} + + +ASTPtr ASTShowGrantsQuery::clone() const +{ + return std::make_shared<ASTShowGrantsQuery>(*this); +} + + +void ASTShowGrantsQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW GRANTS" + << (settings.hilite ? hilite_none : ""); + + if (for_roles->current_user && !for_roles->all && for_roles->names.empty() && for_roles->except_names.empty() + && !for_roles->except_current_user) + { + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " + << (settings.hilite ? hilite_none : ""); + for_roles->format(settings); + } +} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h index b125efe784..04764fe350 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h @@ -1,21 +1,21 @@ -#pragma once - -#include <Parsers/ASTQueryWithOutput.h> - - -namespace DB -{ -class ASTRolesOrUsersSet; - -/** SHOW GRANTS [FOR user_name] - */ -class ASTShowGrantsQuery : public ASTQueryWithOutput -{ -public: - std::shared_ptr<ASTRolesOrUsersSet> for_roles; - - String getID(char) const override; - ASTPtr clone() const override; - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; -}; -} +#pragma once + +#include <Parsers/ASTQueryWithOutput.h> + + +namespace DB +{ +class ASTRolesOrUsersSet; + +/** SHOW GRANTS [FOR user_name] + */ +class ASTShowGrantsQuery : public ASTQueryWithOutput +{ +public: + std::shared_ptr<ASTRolesOrUsersSet> for_roles; + + String getID(char) const override; + ASTPtr clone() const override; + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h index 4cbab20024..8cc4ed16f9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h @@ -1,17 +1,17 @@ -#pragma once - -#include <Parsers/ASTQueryWithOutput.h> - - -namespace DB -{ - -struct ASTShowPrivilegesIDAndQueryName -{ - static constexpr auto ID = "ShowPrivilegesQuery"; - static constexpr auto Query = "SHOW PRIVILEGES"; -}; - -using ASTShowPrivilegesQuery = ASTQueryWithOutputImpl<ASTShowPrivilegesIDAndQueryName>; - -} +#pragma once + +#include <Parsers/ASTQueryWithOutput.h> + + +namespace DB +{ + +struct ASTShowPrivilegesIDAndQueryName +{ + static constexpr auto ID = "ShowPrivilegesQuery"; + static constexpr auto Query = "SHOW PRIVILEGES"; +}; + +using ASTShowPrivilegesQuery = ASTQueryWithOutputImpl<ASTShowPrivilegesIDAndQueryName>; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h index 194d25c5c3..2bf67c1951 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h @@ -1,17 +1,17 @@ -#pragma once - -#include <Parsers/ASTQueryWithOutput.h> - - -namespace DB -{ - -struct ASTShowProcesslistIDAndQueryNames -{ - static constexpr auto ID = "ShowProcesslistQuery"; - static constexpr auto Query = "SHOW PROCESSLIST"; -}; - -using ASTShowProcesslistQuery = ASTQueryWithOutputImpl<ASTShowProcesslistIDAndQueryNames>; - -} +#pragma once + +#include <Parsers/ASTQueryWithOutput.h> + + +namespace DB +{ + +struct ASTShowProcesslistIDAndQueryNames +{ + static constexpr auto ID = "ShowProcesslistQuery"; + static constexpr auto Query = "SHOW PROCESSLIST"; +}; + +using ASTShowProcesslistQuery = ASTQueryWithOutputImpl<ASTShowProcesslistIDAndQueryNames>; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp index fc7ed6b374..61d68c4a27 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp @@ -1,19 +1,19 @@ -#include <iomanip> -#include <Parsers/ASTShowTablesQuery.h> -#include <Common/quoteString.h> +#include <iomanip> +#include <Parsers/ASTShowTablesQuery.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - -namespace DB -{ - -ASTPtr ASTShowTablesQuery::clone() const -{ - auto res = std::make_shared<ASTShowTablesQuery>(*this); - res->children.clear(); - cloneOutputOptions(*res); - return res; -} - + +namespace DB +{ + +ASTPtr ASTShowTablesQuery::clone() const +{ + auto res = std::make_shared<ASTShowTablesQuery>(*this); + res->children.clear(); + cloneOutputOptions(*res); + return res; +} + void ASTShowTablesQuery::formatLike(const FormatSettings & settings) const { if (!like.empty()) @@ -34,52 +34,52 @@ void ASTShowTablesQuery::formatLimit(const FormatSettings & settings, FormatStat } } -void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const -{ - if (databases) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); +void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + if (databases) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); formatLike(settings); formatLimit(settings, state, frame); - } - else if (clusters) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTERS" << (settings.hilite ? hilite_none : ""); + } + else if (clusters) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTERS" << (settings.hilite ? hilite_none : ""); formatLike(settings); formatLimit(settings, state, frame); - - } - else if (cluster) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTER" << (settings.hilite ? hilite_none : ""); - settings.ostr << " " << backQuoteIfNeed(cluster_str); - } + + } + else if (cluster) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTER" << (settings.hilite ? hilite_none : ""); + settings.ostr << " " << backQuoteIfNeed(cluster_str); + } else if (m_settings) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (changed ? "CHANGED " : "") << "SETTINGS" << (settings.hilite ? hilite_none : ""); formatLike(settings); } - else - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << - (dictionaries ? "DICTIONARIES" : "TABLES") << (settings.hilite ? hilite_none : ""); - - if (!from.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") - << backQuoteIfNeed(from); - + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << + (dictionaries ? "DICTIONARIES" : "TABLES") << (settings.hilite ? hilite_none : ""); + + if (!from.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(from); + formatLike(settings); - + if (where_expression) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); - where_expression->formatImpl(settings, state, frame); - } - + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); + where_expression->formatImpl(settings, state, frame); + } + formatLimit(settings, state, frame); - } -} - -} + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h index f5cbfe77e7..57383dff66 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h @@ -1,46 +1,46 @@ -#pragma once - -#include <iomanip> -#include <Parsers/IAST.h> -#include <Parsers/ASTQueryWithOutput.h> - - -namespace DB -{ - - -/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS - */ -class ASTShowTablesQuery : public ASTQueryWithOutput -{ -public: - bool databases{false}; - bool clusters{false}; - bool cluster{false}; - bool dictionaries{false}; +#pragma once + +#include <iomanip> +#include <Parsers/IAST.h> +#include <Parsers/ASTQueryWithOutput.h> + + +namespace DB +{ + + +/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS + */ +class ASTShowTablesQuery : public ASTQueryWithOutput +{ +public: + bool databases{false}; + bool clusters{false}; + bool cluster{false}; + bool dictionaries{false}; bool m_settings{false}; bool changed{false}; - bool temporary{false}; - - String cluster_str; - String from; - String like; - - bool not_like{false}; - bool case_insensitive_like{false}; - - ASTPtr where_expression; - ASTPtr limit_length; - - /** Get the text that identifies this element. */ - String getID(char) const override { return "ShowTables"; } - - ASTPtr clone() const override; - -protected: + bool temporary{false}; + + String cluster_str; + String from; + String like; + + bool not_like{false}; + bool case_insensitive_like{false}; + + ASTPtr where_expression; + ASTPtr limit_length; + + /** Get the text that identifies this element. */ + String getID(char) const override { return "ShowTables"; } + + ASTPtr clone() const override; + +protected: void formatLike(const FormatSettings & settings) const; void formatLimit(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const; - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; -}; - -} + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp index 3f140f4da4..5d01e124b0 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp @@ -1,149 +1,149 @@ -#include <Parsers/IAST.h> -#include <Parsers/ASTSystemQuery.h> -#include <Common/quoteString.h> +#include <Parsers/IAST.h> +#include <Parsers/ASTSystemQuery.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ - - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - -const char * ASTSystemQuery::typeToString(Type type) -{ - switch (type) - { - case Type::SHUTDOWN: - return "SHUTDOWN"; - case Type::KILL: - return "KILL"; + + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +const char * ASTSystemQuery::typeToString(Type type) +{ + switch (type) + { + case Type::SHUTDOWN: + return "SHUTDOWN"; + case Type::KILL: + return "KILL"; case Type::SUSPEND: return "SUSPEND"; - case Type::DROP_DNS_CACHE: - return "DROP DNS CACHE"; - case Type::DROP_MARK_CACHE: - return "DROP MARK CACHE"; - case Type::DROP_UNCOMPRESSED_CACHE: - return "DROP UNCOMPRESSED CACHE"; + case Type::DROP_DNS_CACHE: + return "DROP DNS CACHE"; + case Type::DROP_MARK_CACHE: + return "DROP MARK CACHE"; + case Type::DROP_UNCOMPRESSED_CACHE: + return "DROP UNCOMPRESSED CACHE"; case Type::DROP_MMAP_CACHE: return "DROP MMAP CACHE"; -#if USE_EMBEDDED_COMPILER - case Type::DROP_COMPILED_EXPRESSION_CACHE: - return "DROP COMPILED EXPRESSION CACHE"; -#endif - case Type::STOP_LISTEN_QUERIES: - return "STOP LISTEN QUERIES"; - case Type::START_LISTEN_QUERIES: - return "START LISTEN QUERIES"; - case Type::RESTART_REPLICAS: - return "RESTART REPLICAS"; - case Type::RESTART_REPLICA: - return "RESTART REPLICA"; +#if USE_EMBEDDED_COMPILER + case Type::DROP_COMPILED_EXPRESSION_CACHE: + return "DROP COMPILED EXPRESSION CACHE"; +#endif + case Type::STOP_LISTEN_QUERIES: + return "STOP LISTEN QUERIES"; + case Type::START_LISTEN_QUERIES: + return "START LISTEN QUERIES"; + case Type::RESTART_REPLICAS: + return "RESTART REPLICAS"; + case Type::RESTART_REPLICA: + return "RESTART REPLICA"; case Type::RESTORE_REPLICA: return "RESTORE REPLICA"; - case Type::DROP_REPLICA: - return "DROP REPLICA"; - case Type::SYNC_REPLICA: - return "SYNC REPLICA"; - case Type::FLUSH_DISTRIBUTED: - return "FLUSH DISTRIBUTED"; - case Type::RELOAD_DICTIONARY: - return "RELOAD DICTIONARY"; - case Type::RELOAD_DICTIONARIES: - return "RELOAD DICTIONARIES"; + case Type::DROP_REPLICA: + return "DROP REPLICA"; + case Type::SYNC_REPLICA: + return "SYNC REPLICA"; + case Type::FLUSH_DISTRIBUTED: + return "FLUSH DISTRIBUTED"; + case Type::RELOAD_DICTIONARY: + return "RELOAD DICTIONARY"; + case Type::RELOAD_DICTIONARIES: + return "RELOAD DICTIONARIES"; case Type::RELOAD_MODEL: return "RELOAD MODEL"; case Type::RELOAD_MODELS: return "RELOAD MODELS"; - case Type::RELOAD_EMBEDDED_DICTIONARIES: - return "RELOAD EMBEDDED DICTIONARIES"; - case Type::RELOAD_CONFIG: - return "RELOAD CONFIG"; + case Type::RELOAD_EMBEDDED_DICTIONARIES: + return "RELOAD EMBEDDED DICTIONARIES"; + case Type::RELOAD_CONFIG: + return "RELOAD CONFIG"; case Type::RELOAD_SYMBOLS: return "RELOAD SYMBOLS"; - case Type::STOP_MERGES: - return "STOP MERGES"; - case Type::START_MERGES: - return "START MERGES"; - case Type::STOP_TTL_MERGES: - return "STOP TTL MERGES"; - case Type::START_TTL_MERGES: - return "START TTL MERGES"; - case Type::STOP_MOVES: - return "STOP MOVES"; - case Type::START_MOVES: - return "START MOVES"; - case Type::STOP_FETCHES: - return "STOP FETCHES"; - case Type::START_FETCHES: - return "START FETCHES"; - case Type::STOP_REPLICATED_SENDS: - return "STOP REPLICATED SENDS"; - case Type::START_REPLICATED_SENDS: - return "START REPLICATED SENDS"; - case Type::STOP_REPLICATION_QUEUES: - return "STOP REPLICATION QUEUES"; - case Type::START_REPLICATION_QUEUES: - return "START REPLICATION QUEUES"; - case Type::STOP_DISTRIBUTED_SENDS: - return "STOP DISTRIBUTED SENDS"; - case Type::START_DISTRIBUTED_SENDS: - return "START DISTRIBUTED SENDS"; - case Type::FLUSH_LOGS: - return "FLUSH LOGS"; + case Type::STOP_MERGES: + return "STOP MERGES"; + case Type::START_MERGES: + return "START MERGES"; + case Type::STOP_TTL_MERGES: + return "STOP TTL MERGES"; + case Type::START_TTL_MERGES: + return "START TTL MERGES"; + case Type::STOP_MOVES: + return "STOP MOVES"; + case Type::START_MOVES: + return "START MOVES"; + case Type::STOP_FETCHES: + return "STOP FETCHES"; + case Type::START_FETCHES: + return "START FETCHES"; + case Type::STOP_REPLICATED_SENDS: + return "STOP REPLICATED SENDS"; + case Type::START_REPLICATED_SENDS: + return "START REPLICATED SENDS"; + case Type::STOP_REPLICATION_QUEUES: + return "STOP REPLICATION QUEUES"; + case Type::START_REPLICATION_QUEUES: + return "START REPLICATION QUEUES"; + case Type::STOP_DISTRIBUTED_SENDS: + return "STOP DISTRIBUTED SENDS"; + case Type::START_DISTRIBUTED_SENDS: + return "START DISTRIBUTED SENDS"; + case Type::FLUSH_LOGS: + return "FLUSH LOGS"; case Type::RESTART_DISK: return "RESTART DISK"; - default: - throw Exception("Unknown SYSTEM query command", ErrorCodes::LOGICAL_ERROR); - } -} - - -void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM "; - settings.ostr << typeToString(type) << (settings.hilite ? hilite_none : ""); - - auto print_database_table = [&] - { - settings.ostr << " "; - if (!database.empty()) - { - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database) - << (settings.hilite ? hilite_none : "") << "."; - } - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(table) - << (settings.hilite ? hilite_none : ""); - }; - + default: + throw Exception("Unknown SYSTEM query command", ErrorCodes::LOGICAL_ERROR); + } +} + + +void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM "; + settings.ostr << typeToString(type) << (settings.hilite ? hilite_none : ""); + + auto print_database_table = [&] + { + settings.ostr << " "; + if (!database.empty()) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database) + << (settings.hilite ? hilite_none : "") << "."; + } + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(table) + << (settings.hilite ? hilite_none : ""); + }; + auto print_drop_replica = [&] { - settings.ostr << " " << quoteString(replica); - if (!table.empty()) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM TABLE" - << (settings.hilite ? hilite_none : ""); - print_database_table(); - } - else if (!replica_zk_path.empty()) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM ZKPATH " - << (settings.hilite ? hilite_none : "") << quoteString(replica_zk_path); - } - else if (!database.empty()) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM DATABASE " - << (settings.hilite ? hilite_none : ""); - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database) - << (settings.hilite ? hilite_none : ""); - } - }; - + settings.ostr << " " << quoteString(replica); + if (!table.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM TABLE" + << (settings.hilite ? hilite_none : ""); + print_database_table(); + } + else if (!replica_zk_path.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM ZKPATH " + << (settings.hilite ? hilite_none : "") << quoteString(replica_zk_path); + } + else if (!database.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM DATABASE " + << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database) + << (settings.hilite ? hilite_none : ""); + } + }; + auto print_on_volume = [&] { settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON VOLUME " @@ -154,40 +154,40 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, << (settings.hilite ? hilite_none : ""); }; - if (!cluster.empty()) - formatOnCluster(settings); - - if ( type == Type::STOP_MERGES - || type == Type::START_MERGES - || type == Type::STOP_TTL_MERGES - || type == Type::START_TTL_MERGES - || type == Type::STOP_MOVES - || type == Type::START_MOVES - || type == Type::STOP_FETCHES - || type == Type::START_FETCHES - || type == Type::STOP_REPLICATED_SENDS - || type == Type::START_REPLICATED_SENDS - || type == Type::STOP_REPLICATION_QUEUES - || type == Type::START_REPLICATION_QUEUES - || type == Type::STOP_DISTRIBUTED_SENDS - || type == Type::START_DISTRIBUTED_SENDS) - { - if (!table.empty()) - print_database_table(); + if (!cluster.empty()) + formatOnCluster(settings); + + if ( type == Type::STOP_MERGES + || type == Type::START_MERGES + || type == Type::STOP_TTL_MERGES + || type == Type::START_TTL_MERGES + || type == Type::STOP_MOVES + || type == Type::START_MOVES + || type == Type::STOP_FETCHES + || type == Type::START_FETCHES + || type == Type::STOP_REPLICATED_SENDS + || type == Type::START_REPLICATED_SENDS + || type == Type::STOP_REPLICATION_QUEUES + || type == Type::START_REPLICATION_QUEUES + || type == Type::STOP_DISTRIBUTED_SENDS + || type == Type::START_DISTRIBUTED_SENDS) + { + if (!table.empty()) + print_database_table(); else if (!volume.empty()) print_on_volume(); - } + } else if ( type == Type::RESTART_REPLICA || type == Type::RESTORE_REPLICA || type == Type::SYNC_REPLICA || type == Type::FLUSH_DISTRIBUTED || type == Type::RELOAD_DICTIONARY) - { - print_database_table(); - } - else if (type == Type::DROP_REPLICA) { - print_drop_replica(); + print_database_table(); + } + else if (type == Type::DROP_REPLICA) + { + print_drop_replica(); } else if (type == Type::SUSPEND) { @@ -196,7 +196,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, << (settings.hilite ? hilite_keyword : "") << " SECOND" << (settings.hilite ? hilite_none : ""); } -} - - -} +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h index df9684cfc7..8f9144d438 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h @@ -1,92 +1,92 @@ -#pragma once - -#include <Parsers/ASTQueryWithOnCluster.h> -#include <Parsers/IAST.h> - -namespace DB -{ - -class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster -{ -public: - - enum class Type - { - UNKNOWN, - SHUTDOWN, - KILL, +#pragma once + +#include <Parsers/ASTQueryWithOnCluster.h> +#include <Parsers/IAST.h> + +namespace DB +{ + +class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster +{ +public: + + enum class Type + { + UNKNOWN, + SHUTDOWN, + KILL, SUSPEND, - DROP_DNS_CACHE, - DROP_MARK_CACHE, - DROP_UNCOMPRESSED_CACHE, + DROP_DNS_CACHE, + DROP_MARK_CACHE, + DROP_UNCOMPRESSED_CACHE, DROP_MMAP_CACHE, -#if USE_EMBEDDED_COMPILER - DROP_COMPILED_EXPRESSION_CACHE, -#endif - STOP_LISTEN_QUERIES, - START_LISTEN_QUERIES, - RESTART_REPLICAS, - RESTART_REPLICA, +#if USE_EMBEDDED_COMPILER + DROP_COMPILED_EXPRESSION_CACHE, +#endif + STOP_LISTEN_QUERIES, + START_LISTEN_QUERIES, + RESTART_REPLICAS, + RESTART_REPLICA, RESTORE_REPLICA, - DROP_REPLICA, - SYNC_REPLICA, - RELOAD_DICTIONARY, - RELOAD_DICTIONARIES, + DROP_REPLICA, + SYNC_REPLICA, + RELOAD_DICTIONARY, + RELOAD_DICTIONARIES, RELOAD_MODEL, RELOAD_MODELS, - RELOAD_EMBEDDED_DICTIONARIES, - RELOAD_CONFIG, + RELOAD_EMBEDDED_DICTIONARIES, + RELOAD_CONFIG, RELOAD_SYMBOLS, RESTART_DISK, - STOP_MERGES, - START_MERGES, - STOP_TTL_MERGES, - START_TTL_MERGES, - STOP_FETCHES, - START_FETCHES, - STOP_MOVES, - START_MOVES, - STOP_REPLICATED_SENDS, - START_REPLICATED_SENDS, - STOP_REPLICATION_QUEUES, - START_REPLICATION_QUEUES, - FLUSH_LOGS, - FLUSH_DISTRIBUTED, - STOP_DISTRIBUTED_SENDS, - START_DISTRIBUTED_SENDS, - END - }; - - static const char * typeToString(Type type); - - Type type = Type::UNKNOWN; - + STOP_MERGES, + START_MERGES, + STOP_TTL_MERGES, + START_TTL_MERGES, + STOP_FETCHES, + START_FETCHES, + STOP_MOVES, + START_MOVES, + STOP_REPLICATED_SENDS, + START_REPLICATED_SENDS, + STOP_REPLICATION_QUEUES, + START_REPLICATION_QUEUES, + FLUSH_LOGS, + FLUSH_DISTRIBUTED, + STOP_DISTRIBUTED_SENDS, + START_DISTRIBUTED_SENDS, + END + }; + + static const char * typeToString(Type type); + + Type type = Type::UNKNOWN; + String target_model; - String database; - String table; - String replica; - String replica_zk_path; + String database; + String table; + String replica; + String replica_zk_path; bool is_drop_whole_replica{}; String storage_policy; String volume; String disk; UInt64 seconds{}; - - String getID(char) const override { return "SYSTEM query"; } - - ASTPtr clone() const override { return std::make_shared<ASTSystemQuery>(*this); } - - ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override - { - return removeOnCluster<ASTSystemQuery>(clone(), new_database); - } - + + String getID(char) const override { return "SYSTEM query"; } + + ASTPtr clone() const override { return std::make_shared<ASTSystemQuery>(*this); } + + ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override + { + return removeOnCluster<ASTSystemQuery>(clone(), new_database); + } + const char * getQueryKindString() const override { return "System"; } -protected: - - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; -}; - - -} +protected: + + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h index aa181ee28e..4e4a13c2a7 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h @@ -1,32 +1,32 @@ -#pragma once - -#include <Parsers/IAST.h> -#include <Common/quoteString.h> +#pragma once + +#include <Parsers/IAST.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ - - -/** USE query - */ -class ASTUseQuery : public IAST -{ -public: - String database; - - /** Get the text that identifies this element. */ - String getID(char delim) const override { return "UseQuery" + (delim + database); } - - ASTPtr clone() const override { return std::make_shared<ASTUseQuery>(*this); } - -protected: - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); - return; - } -}; - -} + + +namespace DB +{ + + +/** USE query + */ +class ASTUseQuery : public IAST +{ +public: + String database; + + /** Get the text that identifies this element. */ + String getID(char delim) const override { return "UseQuery" + (delim + database); } + + ASTPtr clone() const override { return std::make_shared<ASTUseQuery>(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); + return; + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp index 9d81727138..b99ea5ab8d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp @@ -1,75 +1,75 @@ -#include <Parsers/ASTUserNameWithHost.h> -#include <Common/quoteString.h> +#include <Parsers/ASTUserNameWithHost.h> +#include <Common/quoteString.h> #include <IO/Operators.h> - - -namespace DB -{ - -void ASTUserNameWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - settings.ostr << backQuoteIfNeed(base_name); - - if (!host_pattern.empty()) - settings.ostr << "@" << backQuoteIfNeed(host_pattern); -} - -String ASTUserNameWithHost::toString() const -{ - String res = base_name; - if (!host_pattern.empty()) - res += '@' + host_pattern; - return res; -} - -void ASTUserNameWithHost::concatParts() -{ - base_name = toString(); - host_pattern.clear(); -} - - -void ASTUserNamesWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const -{ - assert(!names.empty()); - bool need_comma = false; - for (const auto & name : names) - { - if (std::exchange(need_comma, true)) - settings.ostr << ", "; - name->format(settings); - } -} - -Strings ASTUserNamesWithHost::toStrings() const -{ - Strings res; - res.reserve(names.size()); - for (const auto & name : names) - res.emplace_back(name->toString()); - return res; -} - -void ASTUserNamesWithHost::concatParts() -{ - for (auto & name : names) - name->concatParts(); -} - - -bool ASTUserNamesWithHost::getHostPatternIfCommon(String & out_common_host_pattern) const -{ - out_common_host_pattern.clear(); - - if (names.empty()) - return true; - - for (size_t i = 1; i != names.size(); ++i) - if (names[i]->host_pattern != names[0]->host_pattern) - return false; - - out_common_host_pattern = names[0]->host_pattern; - return true; -} - -} + + +namespace DB +{ + +void ASTUserNameWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << backQuoteIfNeed(base_name); + + if (!host_pattern.empty()) + settings.ostr << "@" << backQuoteIfNeed(host_pattern); +} + +String ASTUserNameWithHost::toString() const +{ + String res = base_name; + if (!host_pattern.empty()) + res += '@' + host_pattern; + return res; +} + +void ASTUserNameWithHost::concatParts() +{ + base_name = toString(); + host_pattern.clear(); +} + + +void ASTUserNamesWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + assert(!names.empty()); + bool need_comma = false; + for (const auto & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + name->format(settings); + } +} + +Strings ASTUserNamesWithHost::toStrings() const +{ + Strings res; + res.reserve(names.size()); + for (const auto & name : names) + res.emplace_back(name->toString()); + return res; +} + +void ASTUserNamesWithHost::concatParts() +{ + for (auto & name : names) + name->concatParts(); +} + + +bool ASTUserNamesWithHost::getHostPatternIfCommon(String & out_common_host_pattern) const +{ + out_common_host_pattern.clear(); + + if (names.empty()) + return true; + + for (size_t i = 1; i != names.size(); ++i) + if (names[i]->host_pattern != names[0]->host_pattern) + return false; + + out_common_host_pattern = names[0]->host_pattern; + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h index 6d768629e3..00b1570e06 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h @@ -1,53 +1,53 @@ -#pragma once - -#include <Parsers/IParser.h> - - -namespace DB -{ - -/** Represents a user name. - * It can be a simple string or identifier or something like `name@host`. - * In the last case `host` specifies the hosts user is allowed to connect from. - * The `host` can be an ip address, ip subnet, or a host name. - * The % and _ wildcard characters are permitted in `host`. - * These have the same meaning as for pattern-matching operations performed with the LIKE operator. - */ -class ASTUserNameWithHost : public IAST -{ -public: - String base_name; - String host_pattern; - - String toString() const; - void concatParts(); - - ASTUserNameWithHost() = default; - ASTUserNameWithHost(const String & name_) : base_name(name_) {} - String getID(char) const override { return "UserNameWithHost"; } - ASTPtr clone() const override { return std::make_shared<ASTUserNameWithHost>(*this); } - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; -}; - - -class ASTUserNamesWithHost : public IAST -{ -public: - std::vector<std::shared_ptr<ASTUserNameWithHost>> names; - - size_t size() const { return names.size(); } - auto begin() const { return names.begin(); } - auto end() const { return names.end(); } - auto front() const { return *begin(); } - void push_back(const String & name_) { names.push_back(std::make_shared<ASTUserNameWithHost>(name_)); } - - Strings toStrings() const; - void concatParts(); - bool getHostPatternIfCommon(String & out_common_host_pattern) const; - - String getID(char) const override { return "UserNamesWithHost"; } - ASTPtr clone() const override { return std::make_shared<ASTUserNamesWithHost>(*this); } - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; -}; - -} +#pragma once + +#include <Parsers/IParser.h> + + +namespace DB +{ + +/** Represents a user name. + * It can be a simple string or identifier or something like `name@host`. + * In the last case `host` specifies the hosts user is allowed to connect from. + * The `host` can be an ip address, ip subnet, or a host name. + * The % and _ wildcard characters are permitted in `host`. + * These have the same meaning as for pattern-matching operations performed with the LIKE operator. + */ +class ASTUserNameWithHost : public IAST +{ +public: + String base_name; + String host_pattern; + + String toString() const; + void concatParts(); + + ASTUserNameWithHost() = default; + ASTUserNameWithHost(const String & name_) : base_name(name_) {} + String getID(char) const override { return "UserNameWithHost"; } + ASTPtr clone() const override { return std::make_shared<ASTUserNameWithHost>(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + + +class ASTUserNamesWithHost : public IAST +{ +public: + std::vector<std::shared_ptr<ASTUserNameWithHost>> names; + + size_t size() const { return names.size(); } + auto begin() const { return names.begin(); } + auto end() const { return names.end(); } + auto front() const { return *begin(); } + void push_back(const String & name_) { names.push_back(std::make_shared<ASTUserNameWithHost>(name_)); } + + Strings toStrings() const; + void concatParts(); + bool getHostPatternIfCommon(String & out_common_host_pattern) const; + + String getID(char) const override { return "UserNamesWithHost"; } + ASTPtr clone() const override { return std::make_shared<ASTUserNamesWithHost>(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h index 2983198e19..9285742e3c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h @@ -1,48 +1,48 @@ #pragma once - -#include <Parsers/ASTQueryWithTableAndOutput.h> -#include <Common/quoteString.h> - -namespace DB -{ - -class ASTWatchQuery : public ASTQueryWithTableAndOutput -{ - -public: - ASTPtr limit_length; - bool is_watch_events; - - ASTWatchQuery() = default; - String getID(char) const override { return "WatchQuery_" + database + "_" + table; } - - ASTPtr clone() const override - { - std::shared_ptr<ASTWatchQuery> res = std::make_shared<ASTWatchQuery>(*this); - res->children.clear(); - cloneOutputOptions(*res); - return res; - } - -protected: - void formatQueryImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override - { - std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); - + +#include <Parsers/ASTQueryWithTableAndOutput.h> +#include <Common/quoteString.h> + +namespace DB +{ + +class ASTWatchQuery : public ASTQueryWithTableAndOutput +{ + +public: + ASTPtr limit_length; + bool is_watch_events; + + ASTWatchQuery() = default; + String getID(char) const override { return "WatchQuery_" + database + "_" + table; } + + ASTPtr clone() const override + { + std::shared_ptr<ASTWatchQuery> res = std::make_shared<ASTWatchQuery>(*this); + res->children.clear(); + cloneOutputOptions(*res); + return res; + } + +protected: + void formatQueryImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override + { + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); + s.ostr << (s.hilite ? hilite_keyword : "") << "WATCH " << (s.hilite ? hilite_none : "") - << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); - - if (is_watch_events) - { - s.ostr << " " << (s.hilite ? hilite_keyword : "") << "EVENTS" << (s.hilite ? hilite_none : ""); - } - - if (limit_length) - { - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : ""); - limit_length->formatImpl(s, state, frame); - } - } -}; - -} + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + + if (is_watch_events) + { + s.ostr << " " << (s.hilite ? hilite_keyword : "") << "EVENTS" << (s.hilite ? hilite_none : ""); + } + + if (limit_length) + { + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : ""); + limit_length->formatImpl(s, state, frame); + } + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp index 66e39deac6..24390773d1 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp @@ -1,156 +1,156 @@ -#include <Parsers/Lexer.h> -#include <Common/StringUtils/StringUtils.h> -#include <common/find_symbols.h> - -namespace DB -{ - -namespace -{ - -/// This must be consistent with functions in ReadHelpers.h -template <char quote, TokenType success_token, TokenType error_token> -Token quotedString(const char *& pos, const char * const token_begin, const char * const end) -{ - ++pos; - while (true) - { - pos = find_first_symbols<quote, '\\'>(pos, end); - if (pos >= end) - return Token(error_token, token_begin, end); - - if (*pos == quote) - { - ++pos; - if (pos < end && *pos == quote) - { - ++pos; - continue; - } - return Token(success_token, token_begin, pos); - } - - if (*pos == '\\') - { - ++pos; - if (pos >= end) - return Token(error_token, token_begin, end); - ++pos; - continue; - } - - __builtin_unreachable(); - } -} - -} - - -Token Lexer::nextToken() -{ - Token res = nextTokenImpl(); +#include <Parsers/Lexer.h> +#include <Common/StringUtils/StringUtils.h> +#include <common/find_symbols.h> + +namespace DB +{ + +namespace +{ + +/// This must be consistent with functions in ReadHelpers.h +template <char quote, TokenType success_token, TokenType error_token> +Token quotedString(const char *& pos, const char * const token_begin, const char * const end) +{ + ++pos; + while (true) + { + pos = find_first_symbols<quote, '\\'>(pos, end); + if (pos >= end) + return Token(error_token, token_begin, end); + + if (*pos == quote) + { + ++pos; + if (pos < end && *pos == quote) + { + ++pos; + continue; + } + return Token(success_token, token_begin, pos); + } + + if (*pos == '\\') + { + ++pos; + if (pos >= end) + return Token(error_token, token_begin, end); + ++pos; + continue; + } + + __builtin_unreachable(); + } +} + +} + + +Token Lexer::nextToken() +{ + Token res = nextTokenImpl(); if (max_query_size && res.end > begin + max_query_size) - res.type = TokenType::ErrorMaxQuerySizeExceeded; - if (res.isSignificant()) - prev_significant_token_type = res.type; - return res; -} - - -Token Lexer::nextTokenImpl() -{ - if (pos >= end) - return Token(TokenType::EndOfStream, end, end); - - const char * const token_begin = pos; - - auto comment_until_end_of_line = [&]() mutable - { - pos = find_first_symbols<'\n'>(pos, end); /// This means that newline in single-line comment cannot be escaped. - return Token(TokenType::Comment, token_begin, pos); - }; - - switch (*pos) - { - case ' ': [[fallthrough]]; - case '\t': [[fallthrough]]; - case '\n': [[fallthrough]]; - case '\r': [[fallthrough]]; - case '\f': [[fallthrough]]; - case '\v': - { - ++pos; - while (pos < end && isWhitespaceASCII(*pos)) - ++pos; - return Token(TokenType::Whitespace, token_begin, pos); - } - - case '0': [[fallthrough]]; - case '1': [[fallthrough]]; - case '2': [[fallthrough]]; - case '3': [[fallthrough]]; - case '4': [[fallthrough]]; - case '5': [[fallthrough]]; - case '6': [[fallthrough]]; - case '7': [[fallthrough]]; - case '8': [[fallthrough]]; - case '9': - { - /// The task is not to parse a number or check correctness, but only to skip it. - - /// Disambiguation: if previous token was dot, then we could parse only simple integer, - /// for chained tuple access operators (x.1.1) to work. - // Otherwise it will be tokenized as x . 1.1, not as x . 1 . 1 - if (prev_significant_token_type == TokenType::Dot) - { - ++pos; - while (pos < end && isNumericASCII(*pos)) - ++pos; - } - else - { - /// 0x, 0b - bool hex = false; - if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B')) - { - if (pos[1] == 'x' || pos[1] == 'X') - hex = true; - pos += 2; - } - else - ++pos; - - while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) - ++pos; - - /// decimal point - if (pos < end && *pos == '.') - { - ++pos; - while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) - ++pos; - } - - /// exponentiation (base 10 or base 2) - if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E'))) - { - ++pos; - - /// sign of exponent. It is always decimal. - if (pos + 1 < end && (*pos == '-' || *pos == '+')) - ++pos; - - while (pos < end && isNumericASCII(*pos)) - ++pos; - } - } - + res.type = TokenType::ErrorMaxQuerySizeExceeded; + if (res.isSignificant()) + prev_significant_token_type = res.type; + return res; +} + + +Token Lexer::nextTokenImpl() +{ + if (pos >= end) + return Token(TokenType::EndOfStream, end, end); + + const char * const token_begin = pos; + + auto comment_until_end_of_line = [&]() mutable + { + pos = find_first_symbols<'\n'>(pos, end); /// This means that newline in single-line comment cannot be escaped. + return Token(TokenType::Comment, token_begin, pos); + }; + + switch (*pos) + { + case ' ': [[fallthrough]]; + case '\t': [[fallthrough]]; + case '\n': [[fallthrough]]; + case '\r': [[fallthrough]]; + case '\f': [[fallthrough]]; + case '\v': + { + ++pos; + while (pos < end && isWhitespaceASCII(*pos)) + ++pos; + return Token(TokenType::Whitespace, token_begin, pos); + } + + case '0': [[fallthrough]]; + case '1': [[fallthrough]]; + case '2': [[fallthrough]]; + case '3': [[fallthrough]]; + case '4': [[fallthrough]]; + case '5': [[fallthrough]]; + case '6': [[fallthrough]]; + case '7': [[fallthrough]]; + case '8': [[fallthrough]]; + case '9': + { + /// The task is not to parse a number or check correctness, but only to skip it. + + /// Disambiguation: if previous token was dot, then we could parse only simple integer, + /// for chained tuple access operators (x.1.1) to work. + // Otherwise it will be tokenized as x . 1.1, not as x . 1 . 1 + if (prev_significant_token_type == TokenType::Dot) + { + ++pos; + while (pos < end && isNumericASCII(*pos)) + ++pos; + } + else + { + /// 0x, 0b + bool hex = false; + if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B')) + { + if (pos[1] == 'x' || pos[1] == 'X') + hex = true; + pos += 2; + } + else + ++pos; + + while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) + ++pos; + + /// decimal point + if (pos < end && *pos == '.') + { + ++pos; + while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) + ++pos; + } + + /// exponentiation (base 10 or base 2) + if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E'))) + { + ++pos; + + /// sign of exponent. It is always decimal. + if (pos + 1 < end && (*pos == '-' || *pos == '+')) + ++pos; + + while (pos < end && isNumericASCII(*pos)) + ++pos; + } + } + /// Try to parse it to a identifier(1identifier_name), otherwise it return ErrorWrongNumber - if (pos < end && isWordCharASCII(*pos)) - { - ++pos; - while (pos < end && isWordCharASCII(*pos)) - ++pos; + if (pos < end && isWordCharASCII(*pos)) + { + ++pos; + while (pos < end && isWordCharASCII(*pos)) + ++pos; for (const char * iterator = token_begin; iterator < pos; ++iterator) { @@ -159,110 +159,110 @@ Token Lexer::nextTokenImpl() } return Token(TokenType::BareWord, token_begin, pos); - } - - return Token(TokenType::Number, token_begin, pos); - } - - case '\'': - return quotedString<'\'', TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed>(pos, token_begin, end); - case '"': - return quotedString<'"', TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed>(pos, token_begin, end); - case '`': - return quotedString<'`', TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed>(pos, token_begin, end); - - case '(': - return Token(TokenType::OpeningRoundBracket, token_begin, ++pos); - case ')': - return Token(TokenType::ClosingRoundBracket, token_begin, ++pos); - case '[': - return Token(TokenType::OpeningSquareBracket, token_begin, ++pos); - case ']': - return Token(TokenType::ClosingSquareBracket, token_begin, ++pos); - case '{': - return Token(TokenType::OpeningCurlyBrace, token_begin, ++pos); - case '}': - return Token(TokenType::ClosingCurlyBrace, token_begin, ++pos); - case ',': - return Token(TokenType::Comma, token_begin, ++pos); - case ';': - return Token(TokenType::Semicolon, token_begin, ++pos); - - case '.': /// qualifier, tuple access operator or start of floating point number - { - /// Just after identifier or complex expression or number (for chained tuple access like x.1.1 to work properly). - if (pos > begin - && (!(pos + 1 < end && isNumericASCII(pos[1])) - || prev_significant_token_type == TokenType::ClosingRoundBracket - || prev_significant_token_type == TokenType::ClosingSquareBracket - || prev_significant_token_type == TokenType::BareWord - || prev_significant_token_type == TokenType::QuotedIdentifier - || prev_significant_token_type == TokenType::Number)) - return Token(TokenType::Dot, token_begin, ++pos); - - ++pos; - while (pos < end && isNumericASCII(*pos)) - ++pos; - - /// exponentiation - if (pos + 1 < end && (*pos == 'e' || *pos == 'E')) - { - ++pos; - - /// sign of exponent - if (pos + 1 < end && (*pos == '-' || *pos == '+')) - ++pos; - - while (pos < end && isNumericASCII(*pos)) - ++pos; - } - - return Token(TokenType::Number, token_begin, pos); - } - - case '+': - return Token(TokenType::Plus, token_begin, ++pos); - case '-': /// minus (-), arrow (->) or start of comment (--) - { - ++pos; - if (pos < end && *pos == '>') - return Token(TokenType::Arrow, token_begin, ++pos); - - if (pos < end && *pos == '-') - { - ++pos; - return comment_until_end_of_line(); - } - - return Token(TokenType::Minus, token_begin, pos); - } - case '*': - ++pos; - return Token(TokenType::Asterisk, token_begin, pos); - case '/': /// division (/) or start of comment (//, /*) - { - ++pos; - if (pos < end && (*pos == '/' || *pos == '*')) - { - if (*pos == '/') - { - ++pos; - return comment_until_end_of_line(); - } - else - { - ++pos; + } + + return Token(TokenType::Number, token_begin, pos); + } + + case '\'': + return quotedString<'\'', TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed>(pos, token_begin, end); + case '"': + return quotedString<'"', TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed>(pos, token_begin, end); + case '`': + return quotedString<'`', TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed>(pos, token_begin, end); + + case '(': + return Token(TokenType::OpeningRoundBracket, token_begin, ++pos); + case ')': + return Token(TokenType::ClosingRoundBracket, token_begin, ++pos); + case '[': + return Token(TokenType::OpeningSquareBracket, token_begin, ++pos); + case ']': + return Token(TokenType::ClosingSquareBracket, token_begin, ++pos); + case '{': + return Token(TokenType::OpeningCurlyBrace, token_begin, ++pos); + case '}': + return Token(TokenType::ClosingCurlyBrace, token_begin, ++pos); + case ',': + return Token(TokenType::Comma, token_begin, ++pos); + case ';': + return Token(TokenType::Semicolon, token_begin, ++pos); + + case '.': /// qualifier, tuple access operator or start of floating point number + { + /// Just after identifier or complex expression or number (for chained tuple access like x.1.1 to work properly). + if (pos > begin + && (!(pos + 1 < end && isNumericASCII(pos[1])) + || prev_significant_token_type == TokenType::ClosingRoundBracket + || prev_significant_token_type == TokenType::ClosingSquareBracket + || prev_significant_token_type == TokenType::BareWord + || prev_significant_token_type == TokenType::QuotedIdentifier + || prev_significant_token_type == TokenType::Number)) + return Token(TokenType::Dot, token_begin, ++pos); + + ++pos; + while (pos < end && isNumericASCII(*pos)) + ++pos; + + /// exponentiation + if (pos + 1 < end && (*pos == 'e' || *pos == 'E')) + { + ++pos; + + /// sign of exponent + if (pos + 1 < end && (*pos == '-' || *pos == '+')) + ++pos; + + while (pos < end && isNumericASCII(*pos)) + ++pos; + } + + return Token(TokenType::Number, token_begin, pos); + } + + case '+': + return Token(TokenType::Plus, token_begin, ++pos); + case '-': /// minus (-), arrow (->) or start of comment (--) + { + ++pos; + if (pos < end && *pos == '>') + return Token(TokenType::Arrow, token_begin, ++pos); + + if (pos < end && *pos == '-') + { + ++pos; + return comment_until_end_of_line(); + } + + return Token(TokenType::Minus, token_begin, pos); + } + case '*': + ++pos; + return Token(TokenType::Asterisk, token_begin, pos); + case '/': /// division (/) or start of comment (//, /*) + { + ++pos; + if (pos < end && (*pos == '/' || *pos == '*')) + { + if (*pos == '/') + { + ++pos; + return comment_until_end_of_line(); + } + else + { + ++pos; /// Nested multiline comments are supported according to the SQL standard. size_t nesting_level = 1; - while (pos + 2 <= end) - { + while (pos + 2 <= end) + { if (pos[0] == '/' && pos[1] == '*') - { - pos += 2; + { + pos += 2; ++nesting_level; - } + } else if (pos[0] == '*' && pos[1] == '/') { pos += 2; @@ -273,70 +273,70 @@ Token Lexer::nextTokenImpl() } else ++pos; - } + } pos = end; return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, pos); - } - } - return Token(TokenType::Slash, token_begin, pos); - } - case '%': - return Token(TokenType::Percent, token_begin, ++pos); - case '=': /// =, == - { - ++pos; - if (pos < end && *pos == '=') - ++pos; - return Token(TokenType::Equals, token_begin, pos); - } - case '!': /// != - { - ++pos; - if (pos < end && *pos == '=') - return Token(TokenType::NotEquals, token_begin, ++pos); - return Token(TokenType::ErrorSingleExclamationMark, token_begin, pos); - } - case '<': /// <, <=, <> - { - ++pos; - if (pos < end && *pos == '=') - return Token(TokenType::LessOrEquals, token_begin, ++pos); - if (pos < end && *pos == '>') - return Token(TokenType::NotEquals, token_begin, ++pos); - return Token(TokenType::Less, token_begin, pos); - } - case '>': /// >, >= - { - ++pos; - if (pos < end && *pos == '=') - return Token(TokenType::GreaterOrEquals, token_begin, ++pos); - return Token(TokenType::Greater, token_begin, pos); - } - case '?': - return Token(TokenType::QuestionMark, token_begin, ++pos); - case ':': + } + } + return Token(TokenType::Slash, token_begin, pos); + } + case '%': + return Token(TokenType::Percent, token_begin, ++pos); + case '=': /// =, == + { + ++pos; + if (pos < end && *pos == '=') + ++pos; + return Token(TokenType::Equals, token_begin, pos); + } + case '!': /// != + { + ++pos; + if (pos < end && *pos == '=') + return Token(TokenType::NotEquals, token_begin, ++pos); + return Token(TokenType::ErrorSingleExclamationMark, token_begin, pos); + } + case '<': /// <, <=, <> + { + ++pos; + if (pos < end && *pos == '=') + return Token(TokenType::LessOrEquals, token_begin, ++pos); + if (pos < end && *pos == '>') + return Token(TokenType::NotEquals, token_begin, ++pos); + return Token(TokenType::Less, token_begin, pos); + } + case '>': /// >, >= + { + ++pos; + if (pos < end && *pos == '=') + return Token(TokenType::GreaterOrEquals, token_begin, ++pos); + return Token(TokenType::Greater, token_begin, pos); + } + case '?': + return Token(TokenType::QuestionMark, token_begin, ++pos); + case ':': { ++pos; if (pos < end && *pos == ':') return Token(TokenType::DoubleColon, token_begin, ++pos); return Token(TokenType::Colon, token_begin, pos); } - case '|': - { - ++pos; - if (pos < end && *pos == '|') - return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); - } - case '@': - { - ++pos; - if (pos < end && *pos == '@') - return Token(TokenType::DoubleAt, token_begin, ++pos); - return Token(TokenType::At, token_begin, pos); - } - - default: + case '|': + { + ++pos; + if (pos < end && *pos == '|') + return Token(TokenType::Concatenation, token_begin, ++pos); + return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + } + case '@': + { + ++pos; + if (pos < end && *pos == '@') + return Token(TokenType::DoubleAt, token_begin, ++pos); + return Token(TokenType::At, token_begin, pos); + } + + default: if (*pos == '$') { /// Try to capture dollar sign as start of here doc @@ -366,64 +366,64 @@ Token Lexer::nextTokenImpl() } } if (isWordCharASCII(*pos) || *pos == '$') - { - ++pos; + { + ++pos; while (pos < end && (isWordCharASCII(*pos) || *pos == '$')) - ++pos; - return Token(TokenType::BareWord, token_begin, pos); - } - else - { - /// We will also skip unicode whitespaces in UTF-8 to support for queries copy-pasted from MS Word and similar. - pos = skipWhitespacesUTF8(pos, end); - if (pos > token_begin) - return Token(TokenType::Whitespace, token_begin, pos); - else - return Token(TokenType::Error, token_begin, ++pos); - } - } -} - - -const char * getTokenName(TokenType type) -{ - switch (type) - { -#define M(TOKEN) \ - case TokenType::TOKEN: return #TOKEN; -APPLY_FOR_TOKENS(M) -#undef M - } - - __builtin_unreachable(); -} - - -const char * getErrorTokenDescription(TokenType type) -{ - switch (type) - { - case TokenType::Error: - return "Unrecognized token"; - case TokenType::ErrorMultilineCommentIsNotClosed: - return "Multiline comment is not closed"; - case TokenType::ErrorSingleQuoteIsNotClosed: - return "Single quoted string is not closed"; - case TokenType::ErrorDoubleQuoteIsNotClosed: - return "Double quoted string is not closed"; - case TokenType::ErrorBackQuoteIsNotClosed: - return "Back quoted string is not closed"; - case TokenType::ErrorSingleExclamationMark: - return "Exclamation mark can only occur in != operator"; - case TokenType::ErrorSinglePipeMark: - return "Pipe symbol could only occur in || operator"; - case TokenType::ErrorWrongNumber: - return "Wrong number"; - case TokenType::ErrorMaxQuerySizeExceeded: - return "Max query size exceeded"; - default: - return "Not an error"; - } -} - -} + ++pos; + return Token(TokenType::BareWord, token_begin, pos); + } + else + { + /// We will also skip unicode whitespaces in UTF-8 to support for queries copy-pasted from MS Word and similar. + pos = skipWhitespacesUTF8(pos, end); + if (pos > token_begin) + return Token(TokenType::Whitespace, token_begin, pos); + else + return Token(TokenType::Error, token_begin, ++pos); + } + } +} + + +const char * getTokenName(TokenType type) +{ + switch (type) + { +#define M(TOKEN) \ + case TokenType::TOKEN: return #TOKEN; +APPLY_FOR_TOKENS(M) +#undef M + } + + __builtin_unreachable(); +} + + +const char * getErrorTokenDescription(TokenType type) +{ + switch (type) + { + case TokenType::Error: + return "Unrecognized token"; + case TokenType::ErrorMultilineCommentIsNotClosed: + return "Multiline comment is not closed"; + case TokenType::ErrorSingleQuoteIsNotClosed: + return "Single quoted string is not closed"; + case TokenType::ErrorDoubleQuoteIsNotClosed: + return "Double quoted string is not closed"; + case TokenType::ErrorBackQuoteIsNotClosed: + return "Back quoted string is not closed"; + case TokenType::ErrorSingleExclamationMark: + return "Exclamation mark can only occur in != operator"; + case TokenType::ErrorSinglePipeMark: + return "Pipe symbol could only occur in || operator"; + case TokenType::ErrorWrongNumber: + return "Wrong number"; + case TokenType::ErrorMaxQuerySizeExceeded: + return "Max query size exceeded"; + default: + return "Not an error"; + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp index fb842fdd80..f654033b29 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp @@ -1,99 +1,99 @@ -#include <Common/typeid_cast.h> -#include <Parsers/ParserAlterQuery.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ExpressionListParsers.h> -#include <Parsers/ParserCreateQuery.h> -#include <Parsers/ParserPartition.h> -#include <Parsers/ParserSelectWithUnionQuery.h> -#include <Parsers/ParserSetQuery.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ASTIndexDeclaration.h> -#include <Parsers/ASTAlterQuery.h> -#include <Parsers/ASTLiteral.h> -#include <Parsers/parseDatabaseAndTableName.h> - - -namespace DB -{ - -bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto command = std::make_shared<ASTAlterCommand>(); - node = command; - - ParserKeyword s_add_column("ADD COLUMN"); - ParserKeyword s_drop_column("DROP COLUMN"); - ParserKeyword s_clear_column("CLEAR COLUMN"); - ParserKeyword s_modify_column("MODIFY COLUMN"); - ParserKeyword s_rename_column("RENAME COLUMN"); - ParserKeyword s_comment_column("COMMENT COLUMN"); +#include <Common/typeid_cast.h> +#include <Parsers/ParserAlterQuery.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ExpressionListParsers.h> +#include <Parsers/ParserCreateQuery.h> +#include <Parsers/ParserPartition.h> +#include <Parsers/ParserSelectWithUnionQuery.h> +#include <Parsers/ParserSetQuery.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTIndexDeclaration.h> +#include <Parsers/ASTAlterQuery.h> +#include <Parsers/ASTLiteral.h> +#include <Parsers/parseDatabaseAndTableName.h> + + +namespace DB +{ + +bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto command = std::make_shared<ASTAlterCommand>(); + node = command; + + ParserKeyword s_add_column("ADD COLUMN"); + ParserKeyword s_drop_column("DROP COLUMN"); + ParserKeyword s_clear_column("CLEAR COLUMN"); + ParserKeyword s_modify_column("MODIFY COLUMN"); + ParserKeyword s_rename_column("RENAME COLUMN"); + ParserKeyword s_comment_column("COMMENT COLUMN"); ParserKeyword s_materialize_column("MATERIALIZE COLUMN"); - ParserKeyword s_modify_order_by("MODIFY ORDER BY"); + ParserKeyword s_modify_order_by("MODIFY ORDER BY"); ParserKeyword s_modify_sample_by("MODIFY SAMPLE BY"); - ParserKeyword s_modify_ttl("MODIFY TTL"); - ParserKeyword s_materialize_ttl("MATERIALIZE TTL"); - ParserKeyword s_modify_setting("MODIFY SETTING"); + ParserKeyword s_modify_ttl("MODIFY TTL"); + ParserKeyword s_materialize_ttl("MATERIALIZE TTL"); + ParserKeyword s_modify_setting("MODIFY SETTING"); ParserKeyword s_reset_setting("RESET SETTING"); - ParserKeyword s_modify_query("MODIFY QUERY"); - - ParserKeyword s_add_index("ADD INDEX"); - ParserKeyword s_drop_index("DROP INDEX"); - ParserKeyword s_clear_index("CLEAR INDEX"); - ParserKeyword s_materialize_index("MATERIALIZE INDEX"); - - ParserKeyword s_add_constraint("ADD CONSTRAINT"); - ParserKeyword s_drop_constraint("DROP CONSTRAINT"); - + ParserKeyword s_modify_query("MODIFY QUERY"); + + ParserKeyword s_add_index("ADD INDEX"); + ParserKeyword s_drop_index("DROP INDEX"); + ParserKeyword s_clear_index("CLEAR INDEX"); + ParserKeyword s_materialize_index("MATERIALIZE INDEX"); + + ParserKeyword s_add_constraint("ADD CONSTRAINT"); + ParserKeyword s_drop_constraint("DROP CONSTRAINT"); + ParserKeyword s_add_projection("ADD PROJECTION"); ParserKeyword s_drop_projection("DROP PROJECTION"); ParserKeyword s_clear_projection("CLEAR PROJECTION"); ParserKeyword s_materialize_projection("MATERIALIZE PROJECTION"); - ParserKeyword s_add("ADD"); - ParserKeyword s_drop("DROP"); - ParserKeyword s_suspend("SUSPEND"); - ParserKeyword s_resume("RESUME"); - ParserKeyword s_refresh("REFRESH"); - ParserKeyword s_modify("MODIFY"); - - ParserKeyword s_attach_partition("ATTACH PARTITION"); + ParserKeyword s_add("ADD"); + ParserKeyword s_drop("DROP"); + ParserKeyword s_suspend("SUSPEND"); + ParserKeyword s_resume("RESUME"); + ParserKeyword s_refresh("REFRESH"); + ParserKeyword s_modify("MODIFY"); + + ParserKeyword s_attach_partition("ATTACH PARTITION"); ParserKeyword s_attach_part("ATTACH PART"); - ParserKeyword s_detach_partition("DETACH PARTITION"); + ParserKeyword s_detach_partition("DETACH PARTITION"); ParserKeyword s_detach_part("DETACH PART"); - ParserKeyword s_drop_partition("DROP PARTITION"); + ParserKeyword s_drop_partition("DROP PARTITION"); ParserKeyword s_drop_part("DROP PART"); - ParserKeyword s_move_partition("MOVE PARTITION"); + ParserKeyword s_move_partition("MOVE PARTITION"); ParserKeyword s_move_part("MOVE PART"); - ParserKeyword s_drop_detached_partition("DROP DETACHED PARTITION"); - ParserKeyword s_drop_detached_part("DROP DETACHED PART"); - ParserKeyword s_fetch_partition("FETCH PARTITION"); + ParserKeyword s_drop_detached_partition("DROP DETACHED PARTITION"); + ParserKeyword s_drop_detached_part("DROP DETACHED PART"); + ParserKeyword s_fetch_partition("FETCH PARTITION"); ParserKeyword s_fetch_part("FETCH PART"); - ParserKeyword s_replace_partition("REPLACE PARTITION"); - ParserKeyword s_freeze("FREEZE"); + ParserKeyword s_replace_partition("REPLACE PARTITION"); + ParserKeyword s_freeze("FREEZE"); ParserKeyword s_unfreeze("UNFREEZE"); - ParserKeyword s_partition("PARTITION"); - - ParserKeyword s_first("FIRST"); - ParserKeyword s_after("AFTER"); - ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserKeyword s_if_exists("IF EXISTS"); - ParserKeyword s_from("FROM"); - ParserKeyword s_in_partition("IN PARTITION"); - ParserKeyword s_with("WITH"); - ParserKeyword s_name("NAME"); - - ParserKeyword s_to_disk("TO DISK"); - ParserKeyword s_to_volume("TO VOLUME"); - ParserKeyword s_to_table("TO TABLE"); + ParserKeyword s_partition("PARTITION"); + + ParserKeyword s_first("FIRST"); + ParserKeyword s_after("AFTER"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_from("FROM"); + ParserKeyword s_in_partition("IN PARTITION"); + ParserKeyword s_with("WITH"); + ParserKeyword s_name("NAME"); + + ParserKeyword s_to_disk("TO DISK"); + ParserKeyword s_to_volume("TO VOLUME"); + ParserKeyword s_to_table("TO TABLE"); ParserKeyword s_to_shard("TO SHARD"); - + ParserKeyword s_delete("DELETE"); - ParserKeyword s_update("UPDATE"); - ParserKeyword s_where("WHERE"); - ParserKeyword s_to("TO"); - + ParserKeyword s_update("UPDATE"); + ParserKeyword s_where("WHERE"); + ParserKeyword s_to("TO"); + ParserKeyword s_remove("REMOVE"); ParserKeyword s_default("DEFAULT"); ParserKeyword s_materialized("MATERIALIZED"); @@ -104,72 +104,72 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_remove_ttl("REMOVE TTL"); - ParserCompoundIdentifier parser_name; - ParserStringLiteral parser_string_literal; + ParserCompoundIdentifier parser_name; + ParserStringLiteral parser_string_literal; ParserIdentifier parser_remove_property; - ParserCompoundColumnDeclaration parser_col_decl; - ParserIndexDeclaration parser_idx_decl; - ParserConstraintDeclaration parser_constraint_decl; + ParserCompoundColumnDeclaration parser_col_decl; + ParserIndexDeclaration parser_idx_decl; + ParserConstraintDeclaration parser_constraint_decl; ParserProjectionDeclaration parser_projection_decl; ParserCompoundColumnDeclaration parser_modify_col_decl(false, false, true); - ParserPartition parser_partition; - ParserExpression parser_exp_elem; - ParserList parser_assignment_list( - std::make_unique<ParserAssignment>(), std::make_unique<ParserToken>(TokenType::Comma), - /* allow_empty = */ false); - ParserSetQuery parser_settings(true); + ParserPartition parser_partition; + ParserExpression parser_exp_elem; + ParserList parser_assignment_list( + std::make_unique<ParserAssignment>(), std::make_unique<ParserToken>(TokenType::Comma), + /* allow_empty = */ false); + ParserSetQuery parser_settings(true); ParserList parser_reset_setting( std::make_unique<ParserIdentifier>(), std::make_unique<ParserToken>(TokenType::Comma), /* allow_empty = */ false); - ParserNameList values_p; - ParserSelectWithUnionQuery select_p; - ParserTTLExpressionList parser_ttl_list; - - if (is_live_view) - { - if (s_refresh.ignore(pos, expected)) - { - command->type = ASTAlterCommand::LIVE_VIEW_REFRESH; - } - else - return false; - } - else - { - if (s_add_column.ignore(pos, expected)) - { - if (s_if_not_exists.ignore(pos, expected)) - command->if_not_exists = true; - - if (!parser_col_decl.parse(pos, command->col_decl, expected)) - return false; - - if (s_first.ignore(pos, expected)) - command->first = true; - else if (s_after.ignore(pos, expected)) - { - if (!parser_name.parse(pos, command->column, expected)) - return false; - } - - command->type = ASTAlterCommand::ADD_COLUMN; - } - else if (s_rename_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->column, expected)) - return false; - - if (!s_to.ignore(pos, expected)) - return false; - - if (!parser_name.parse(pos, command->rename_to, expected)) - return false; - - command->type = ASTAlterCommand::RENAME_COLUMN; - } + ParserNameList values_p; + ParserSelectWithUnionQuery select_p; + ParserTTLExpressionList parser_ttl_list; + + if (is_live_view) + { + if (s_refresh.ignore(pos, expected)) + { + command->type = ASTAlterCommand::LIVE_VIEW_REFRESH; + } + else + return false; + } + else + { + if (s_add_column.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_col_decl.parse(pos, command->col_decl, expected)) + return false; + + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + } + + command->type = ASTAlterCommand::ADD_COLUMN; + } + else if (s_rename_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->column, expected)) + return false; + + if (!s_to.ignore(pos, expected)) + return false; + + if (!parser_name.parse(pos, command->rename_to, expected)) + return false; + + command->type = ASTAlterCommand::RENAME_COLUMN; + } else if (s_materialize_column.ignore(pos, expected)) { if (!parser_name.parse(pos, command->column, expected)) @@ -184,13 +184,13 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } } - else if (s_drop_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_PARTITION; - } + else if (s_drop_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_PARTITION; + } else if (s_drop_part.ignore(pos, expected)) { if (!parser_string_literal.parse(pos, command->partition, expected)) @@ -199,114 +199,114 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::DROP_PARTITION; command->part = true; } - else if (s_drop_detached_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; - } - else if (s_drop_detached_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; - command->part = true; - } - else if (s_drop_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->column, expected)) - return false; - - command->type = ASTAlterCommand::DROP_COLUMN; - command->detach = false; - } - else if (s_clear_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->column, expected)) - return false; - - command->type = ASTAlterCommand::DROP_COLUMN; - command->clear_column = true; - command->detach = false; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - } - } - else if (s_add_index.ignore(pos, expected)) - { - if (s_if_not_exists.ignore(pos, expected)) - command->if_not_exists = true; - - if (!parser_idx_decl.parse(pos, command->index_decl, expected)) - return false; - + else if (s_drop_detached_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; + } + else if (s_drop_detached_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; + command->part = true; + } + else if (s_drop_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->column, expected)) + return false; + + command->type = ASTAlterCommand::DROP_COLUMN; + command->detach = false; + } + else if (s_clear_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->column, expected)) + return false; + + command->type = ASTAlterCommand::DROP_COLUMN; + command->clear_column = true; + command->detach = false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_add_index.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_idx_decl.parse(pos, command->index_decl, expected)) + return false; + if (s_first.ignore(pos, expected)) command->first = true; else if (s_after.ignore(pos, expected)) - { - if (!parser_name.parse(pos, command->index, expected)) - return false; - } - - command->type = ASTAlterCommand::ADD_INDEX; - } - else if (s_drop_index.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->index, expected)) - return false; - - command->type = ASTAlterCommand::DROP_INDEX; - command->detach = false; - } - else if (s_clear_index.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->index, expected)) - return false; - - command->type = ASTAlterCommand::DROP_INDEX; - command->clear_index = true; - command->detach = false; - + { + if (!parser_name.parse(pos, command->index, expected)) + return false; + } + + command->type = ASTAlterCommand::ADD_INDEX; + } + else if (s_drop_index.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->index, expected)) + return false; + + command->type = ASTAlterCommand::DROP_INDEX; + command->detach = false; + } + else if (s_clear_index.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->index, expected)) + return false; + + command->type = ASTAlterCommand::DROP_INDEX; + command->clear_index = true; + command->detach = false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_materialize_index.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->index, expected)) + return false; + + command->type = ASTAlterCommand::MATERIALIZE_INDEX; + command->detach = false; + if (s_in_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) return false; } - } - else if (s_materialize_index.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->index, expected)) - return false; - - command->type = ASTAlterCommand::MATERIALIZE_INDEX; - command->detach = false; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - } - } + } else if (s_add_projection.ignore(pos, expected)) { if (s_if_not_exists.ignore(pos, expected)) @@ -371,98 +371,98 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } } - else if (s_move_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::MOVE_PARTITION; - command->part = true; - - if (s_to_disk.ignore(pos)) - command->move_destination_type = DataDestinationType::DISK; - else if (s_to_volume.ignore(pos)) - command->move_destination_type = DataDestinationType::VOLUME; - else if (s_to_table.ignore(pos)) - { - if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) - return false; - command->move_destination_type = DataDestinationType::TABLE; - } + else if (s_move_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::MOVE_PARTITION; + command->part = true; + + if (s_to_disk.ignore(pos)) + command->move_destination_type = DataDestinationType::DISK; + else if (s_to_volume.ignore(pos)) + command->move_destination_type = DataDestinationType::VOLUME; + else if (s_to_table.ignore(pos)) + { + if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) + return false; + command->move_destination_type = DataDestinationType::TABLE; + } else if (s_to_shard.ignore(pos)) { command->move_destination_type = DataDestinationType::SHARD; } - else - return false; - - if (command->move_destination_type != DataDestinationType::TABLE) - { - ASTPtr ast_space_name; - if (!parser_string_literal.parse(pos, ast_space_name, expected)) - return false; - - command->move_destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>(); - } - } - else if (s_move_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::MOVE_PARTITION; - - if (s_to_disk.ignore(pos)) - command->move_destination_type = DataDestinationType::DISK; - else if (s_to_volume.ignore(pos)) - command->move_destination_type = DataDestinationType::VOLUME; - else if (s_to_table.ignore(pos)) - { - if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) - return false; - command->move_destination_type = DataDestinationType::TABLE; - } - else - return false; - - if (command->move_destination_type != DataDestinationType::TABLE) - { - ASTPtr ast_space_name; - if (!parser_string_literal.parse(pos, ast_space_name, expected)) - return false; - - command->move_destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>(); - } - } - else if (s_add_constraint.ignore(pos, expected)) - { - if (s_if_not_exists.ignore(pos, expected)) - command->if_not_exists = true; - - if (!parser_constraint_decl.parse(pos, command->constraint_decl, expected)) - return false; - - command->type = ASTAlterCommand::ADD_CONSTRAINT; - } - else if (s_drop_constraint.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->constraint, expected)) - return false; - - command->type = ASTAlterCommand::DROP_CONSTRAINT; - command->detach = false; - } - else if (s_detach_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_PARTITION; - command->detach = true; - } + else + return false; + + if (command->move_destination_type != DataDestinationType::TABLE) + { + ASTPtr ast_space_name; + if (!parser_string_literal.parse(pos, ast_space_name, expected)) + return false; + + command->move_destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>(); + } + } + else if (s_move_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::MOVE_PARTITION; + + if (s_to_disk.ignore(pos)) + command->move_destination_type = DataDestinationType::DISK; + else if (s_to_volume.ignore(pos)) + command->move_destination_type = DataDestinationType::VOLUME; + else if (s_to_table.ignore(pos)) + { + if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) + return false; + command->move_destination_type = DataDestinationType::TABLE; + } + else + return false; + + if (command->move_destination_type != DataDestinationType::TABLE) + { + ASTPtr ast_space_name; + if (!parser_string_literal.parse(pos, ast_space_name, expected)) + return false; + + command->move_destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>(); + } + } + else if (s_add_constraint.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_constraint_decl.parse(pos, command->constraint_decl, expected)) + return false; + + command->type = ASTAlterCommand::ADD_CONSTRAINT; + } + else if (s_drop_constraint.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->constraint, expected)) + return false; + + command->type = ASTAlterCommand::DROP_CONSTRAINT; + command->detach = false; + } + else if (s_detach_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_PARTITION; + command->detach = true; + } else if (s_detach_part.ignore(pos, expected)) { if (!parser_string_literal.parse(pos, command->partition, expected)) @@ -472,61 +472,61 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->part = true; command->detach = true; } - else if (s_attach_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - if (s_from.ignore(pos)) - { - if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) - return false; - - command->replace = false; - command->type = ASTAlterCommand::REPLACE_PARTITION; - } - else - { - command->type = ASTAlterCommand::ATTACH_PARTITION; - } - } - else if (s_replace_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - if (!s_from.ignore(pos, expected)) - return false; - - if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) - return false; - - command->replace = true; - command->type = ASTAlterCommand::REPLACE_PARTITION; - } - else if (s_attach_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - command->part = true; - command->type = ASTAlterCommand::ATTACH_PARTITION; - } - else if (s_fetch_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - if (!s_from.ignore(pos, expected)) - return false; - - ASTPtr ast_from; - if (!parser_string_literal.parse(pos, ast_from, expected)) - return false; - - command->from = ast_from->as<ASTLiteral &>().value.get<const String &>(); - command->type = ASTAlterCommand::FETCH_PARTITION; - } + else if (s_attach_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (s_from.ignore(pos)) + { + if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) + return false; + + command->replace = false; + command->type = ASTAlterCommand::REPLACE_PARTITION; + } + else + { + command->type = ASTAlterCommand::ATTACH_PARTITION; + } + } + else if (s_replace_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (!s_from.ignore(pos, expected)) + return false; + + if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) + return false; + + command->replace = true; + command->type = ASTAlterCommand::REPLACE_PARTITION; + } + else if (s_attach_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + command->part = true; + command->type = ASTAlterCommand::ATTACH_PARTITION; + } + else if (s_fetch_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (!s_from.ignore(pos, expected)) + return false; + + ASTPtr ast_from; + if (!parser_string_literal.parse(pos, ast_from, expected)) + return false; + + command->from = ast_from->as<ASTLiteral &>().value.get<const String &>(); + command->type = ASTAlterCommand::FETCH_PARTITION; + } else if (s_fetch_part.ignore(pos, expected)) { if (!parser_string_literal.parse(pos, command->partition, expected)) @@ -542,33 +542,33 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->part = true; command->type = ASTAlterCommand::FETCH_PARTITION; } - else if (s_freeze.ignore(pos, expected)) - { - if (s_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::FREEZE_PARTITION; - } - else - { - command->type = ASTAlterCommand::FREEZE_ALL; - } - - /// WITH NAME 'name' - place local backup to directory with specified name - if (s_with.ignore(pos, expected)) - { - if (!s_name.ignore(pos, expected)) - return false; - - ASTPtr ast_with_name; - if (!parser_string_literal.parse(pos, ast_with_name, expected)) - return false; - - command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>(); - } - } + else if (s_freeze.ignore(pos, expected)) + { + if (s_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::FREEZE_PARTITION; + } + else + { + command->type = ASTAlterCommand::FREEZE_ALL; + } + + /// WITH NAME 'name' - place local backup to directory with specified name + if (s_with.ignore(pos, expected)) + { + if (!s_name.ignore(pos, expected)) + return false; + + ASTPtr ast_with_name; + if (!parser_string_literal.parse(pos, ast_with_name, expected)) + return false; + + command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>(); + } + } else if (s_unfreeze.ignore(pos, expected)) { if (s_partition.ignore(pos, expected)) @@ -600,16 +600,16 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } } - else if (s_modify_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) - return false; - + else if (s_modify_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) + return false; + if (s_remove.ignore(pos, expected)) - { + { if (s_default.ignore(pos, expected)) command->remove_property = "DEFAULT"; else if (s_materialized.ignore(pos, expected)) @@ -623,8 +623,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected else if (s_ttl.ignore(pos, expected)) command->remove_property = "TTL"; else - return false; - } + return false; + } else { if (s_first.ignore(pos, expected)) @@ -635,15 +635,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } } - command->type = ASTAlterCommand::MODIFY_COLUMN; - } - else if (s_modify_order_by.ignore(pos, expected)) - { - if (!parser_exp_elem.parse(pos, command->order_by, expected)) - return false; - - command->type = ASTAlterCommand::MODIFY_ORDER_BY; - } + command->type = ASTAlterCommand::MODIFY_COLUMN; + } + else if (s_modify_order_by.ignore(pos, expected)) + { + if (!parser_exp_elem.parse(pos, command->order_by, expected)) + return false; + + command->type = ASTAlterCommand::MODIFY_ORDER_BY; + } else if (s_modify_sample_by.ignore(pos, expected)) { if (!parser_exp_elem.parse(pos, command->sample_by, expected)) @@ -652,7 +652,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::MODIFY_SAMPLE_BY; } else if (s_delete.ignore(pos, expected)) - { + { if (s_in_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) @@ -662,93 +662,93 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!s_where.ignore(pos, expected)) return false; - if (!parser_exp_elem.parse(pos, command->predicate, expected)) - return false; - - command->type = ASTAlterCommand::DELETE; - } - else if (s_update.ignore(pos, expected)) - { - if (!parser_assignment_list.parse(pos, command->update_assignments, expected)) - return false; - + if (!parser_exp_elem.parse(pos, command->predicate, expected)) + return false; + + command->type = ASTAlterCommand::DELETE; + } + else if (s_update.ignore(pos, expected)) + { + if (!parser_assignment_list.parse(pos, command->update_assignments, expected)) + return false; + if (s_in_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) return false; } - if (!s_where.ignore(pos, expected)) - return false; - - if (!parser_exp_elem.parse(pos, command->predicate, expected)) - return false; - - command->type = ASTAlterCommand::UPDATE; - } - else if (s_comment_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->column, expected)) - return false; - - if (!parser_string_literal.parse(pos, command->comment, expected)) - return false; - - command->type = ASTAlterCommand::COMMENT_COLUMN; - } - else if (s_modify_ttl.ignore(pos, expected)) - { - if (!parser_ttl_list.parse(pos, command->ttl, expected)) - return false; - command->type = ASTAlterCommand::MODIFY_TTL; - } + if (!s_where.ignore(pos, expected)) + return false; + + if (!parser_exp_elem.parse(pos, command->predicate, expected)) + return false; + + command->type = ASTAlterCommand::UPDATE; + } + else if (s_comment_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->column, expected)) + return false; + + if (!parser_string_literal.parse(pos, command->comment, expected)) + return false; + + command->type = ASTAlterCommand::COMMENT_COLUMN; + } + else if (s_modify_ttl.ignore(pos, expected)) + { + if (!parser_ttl_list.parse(pos, command->ttl, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_TTL; + } else if (s_remove_ttl.ignore(pos, expected)) { command->type = ASTAlterCommand::REMOVE_TTL; } - else if (s_materialize_ttl.ignore(pos, expected)) - { - command->type = ASTAlterCommand::MATERIALIZE_TTL; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - } - } - else if (s_modify_setting.ignore(pos, expected)) - { - if (!parser_settings.parse(pos, command->settings_changes, expected)) - return false; - command->type = ASTAlterCommand::MODIFY_SETTING; - } + else if (s_materialize_ttl.ignore(pos, expected)) + { + command->type = ASTAlterCommand::MATERIALIZE_TTL; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_modify_setting.ignore(pos, expected)) + { + if (!parser_settings.parse(pos, command->settings_changes, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_SETTING; + } else if (s_reset_setting.ignore(pos, expected)) { if (!parser_reset_setting.parse(pos, command->settings_resets, expected)) return false; command->type = ASTAlterCommand::RESET_SETTING; } - else if (s_modify_query.ignore(pos, expected)) - { - if (!select_p.parse(pos, command->select, expected)) - return false; - command->type = ASTAlterCommand::MODIFY_QUERY; - } - else - return false; - } - - if (command->col_decl) - command->children.push_back(command->col_decl); - if (command->column) - command->children.push_back(command->column); - if (command->partition) - command->children.push_back(command->partition); - if (command->order_by) - command->children.push_back(command->order_by); + else if (s_modify_query.ignore(pos, expected)) + { + if (!select_p.parse(pos, command->select, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_QUERY; + } + else + return false; + } + + if (command->col_decl) + command->children.push_back(command->col_decl); + if (command->column) + command->children.push_back(command->column); + if (command->partition) + command->children.push_back(command->partition); + if (command->order_by) + command->children.push_back(command->order_by); if (command->sample_by) command->children.push_back(command->sample_by); if (command->index_decl) @@ -759,89 +759,89 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->children.push_back(command->constraint_decl); if (command->constraint) command->children.push_back(command->constraint); - if (command->predicate) - command->children.push_back(command->predicate); - if (command->update_assignments) - command->children.push_back(command->update_assignments); - if (command->values) - command->children.push_back(command->values); - if (command->comment) - command->children.push_back(command->comment); - if (command->ttl) - command->children.push_back(command->ttl); - if (command->settings_changes) - command->children.push_back(command->settings_changes); + if (command->predicate) + command->children.push_back(command->predicate); + if (command->update_assignments) + command->children.push_back(command->update_assignments); + if (command->values) + command->children.push_back(command->values); + if (command->comment) + command->children.push_back(command->comment); + if (command->ttl) + command->children.push_back(command->ttl); + if (command->settings_changes) + command->children.push_back(command->settings_changes); if (command->select) command->children.push_back(command->select); if (command->rename_to) command->children.push_back(command->rename_to); - - return true; -} - - -bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ + + return true; +} + + +bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ auto command_list = std::make_shared<ASTExpressionList>(); - node = command_list; - - ParserToken s_comma(TokenType::Comma); - ParserAlterCommand p_command(is_live_view); - - do - { - ASTPtr command; - if (!p_command.parse(pos, command, expected)) - return false; - + node = command_list; + + ParserToken s_comma(TokenType::Comma); + ParserAlterCommand p_command(is_live_view); + + do + { + ASTPtr command; + if (!p_command.parse(pos, command, expected)) + return false; + command_list->children.push_back(command); - } - while (s_comma.ignore(pos, expected)); - - return true; -} - - -bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto query = std::make_shared<ASTAlterQuery>(); - node = query; - - ParserKeyword s_alter_table("ALTER TABLE"); - ParserKeyword s_alter_live_view("ALTER LIVE VIEW"); - - bool is_live_view = false; - - if (!s_alter_table.ignore(pos, expected)) - { - if (!s_alter_live_view.ignore(pos, expected)) - return false; - else - is_live_view = true; - } - - if (is_live_view) - query->is_live_view = true; - - if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) - return false; - - String cluster_str; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) - return false; - } - query->cluster = cluster_str; - - ParserAlterCommandList p_command_list(is_live_view); - ASTPtr command_list; - if (!p_command_list.parse(pos, command_list, expected)) - return false; - - query->set(query->command_list, command_list); - - return true; -} - -} + } + while (s_comma.ignore(pos, expected)); + + return true; +} + + +bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto query = std::make_shared<ASTAlterQuery>(); + node = query; + + ParserKeyword s_alter_table("ALTER TABLE"); + ParserKeyword s_alter_live_view("ALTER LIVE VIEW"); + + bool is_live_view = false; + + if (!s_alter_table.ignore(pos, expected)) + { + if (!s_alter_live_view.ignore(pos, expected)) + return false; + else + is_live_view = true; + } + + if (is_live_view) + query->is_live_view = true; + + if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) + return false; + + String cluster_str; + if (ParserKeyword{"ON"}.ignore(pos, expected)) + { + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + } + query->cluster = cluster_str; + + ParserAlterCommandList p_command_list(is_live_view); + ASTPtr command_list; + if (!p_command_list.parse(pos, command_list, expected)) + return false; + + query->set(query->command_list, command_list); + + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h index 88e11f4c98..2e54c4ddba 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h @@ -1,67 +1,67 @@ -#pragma once - -#include <Parsers/IParserBase.h> -#include <Parsers/ExpressionElementParsers.h> - -namespace DB -{ - -/** Query like this: - * ALTER TABLE [db.]name [ON CLUSTER cluster] - * [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],] - * [DROP COLUMN [IF EXISTS] col_to_drop, ...] +#pragma once + +#include <Parsers/IParserBase.h> +#include <Parsers/ExpressionElementParsers.h> + +namespace DB +{ + +/** Query like this: + * ALTER TABLE [db.]name [ON CLUSTER cluster] + * [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],] + * [DROP COLUMN [IF EXISTS] col_to_drop, ...] * [CLEAR COLUMN [IF EXISTS] col_to_clear[ IN PARTITION partition],] - * [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...] - * [RENAME COLUMN [IF EXISTS] col_name TO col_name] - * [MODIFY PRIMARY KEY (a, b, c...)] - * [MODIFY SETTING setting_name=setting_value, ...] + * [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...] + * [RENAME COLUMN [IF EXISTS] col_name TO col_name] + * [MODIFY PRIMARY KEY (a, b, c...)] + * [MODIFY SETTING setting_name=setting_value, ...] * [RESET SETTING setting_name, ...] - * [COMMENT COLUMN [IF EXISTS] col_name string] - * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] - * [FETCH PARTITION partition FROM ...] - * [FREEZE [PARTITION] [WITH NAME name]] + * [COMMENT COLUMN [IF EXISTS] col_name string] + * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] + * [FETCH PARTITION partition FROM ...] + * [FREEZE [PARTITION] [WITH NAME name]] * [DELETE[ IN PARTITION partition] WHERE ...] * [UPDATE col_name = expr, ...[ IN PARTITION partition] WHERE ...] * [ADD INDEX [IF NOT EXISTS] index_name [AFTER index_name]] * [DROP INDEX [IF EXISTS] index_name] * [CLEAR INDEX [IF EXISTS] index_name IN PARTITION partition] * [MATERIALIZE INDEX [IF EXISTS] index_name [IN PARTITION partition]] - * ALTER LIVE VIEW [db.name] - * [REFRESH] - */ - -class ParserAlterQuery : public IParserBase -{ -protected: - const char * getName() const override{ return "ALTER query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserAlterCommandList : public IParserBase -{ -protected: - const char * getName() const override{ return "a list of ALTER commands"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -public: - bool is_live_view; - - ParserAlterCommandList(bool is_live_view_ = false) : is_live_view(is_live_view_) {} -}; - - -class ParserAlterCommand : public IParserBase -{ -protected: - const char * getName() const override{ return "ALTER command"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -public: - bool is_live_view; - - ParserAlterCommand(bool is_live_view_ = false) : is_live_view(is_live_view_) {} -}; - - -} + * ALTER LIVE VIEW [db.name] + * [REFRESH] + */ + +class ParserAlterQuery : public IParserBase +{ +protected: + const char * getName() const override{ return "ALTER query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +class ParserAlterCommandList : public IParserBase +{ +protected: + const char * getName() const override{ return "a list of ALTER commands"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + bool is_live_view; + + ParserAlterCommandList(bool is_live_view_ = false) : is_live_view(is_live_view_) {} +}; + + +class ParserAlterCommand : public IParserBase +{ +protected: + const char * getName() const override{ return "ALTER command"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + bool is_live_view; + + ParserAlterCommand(bool is_live_view_ = false) : is_live_view(is_live_view_) {} +}; + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp index db0df02772..c397e1c33c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp @@ -1,54 +1,54 @@ -#include <Parsers/ParserCheckQuery.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ASTCheckQuery.h> -#include <Parsers/ParserPartition.h> - - -namespace DB -{ - -bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_check_table("CHECK TABLE"); - ParserKeyword s_partition("PARTITION"); - ParserToken s_dot(TokenType::Dot); - - ParserIdentifier table_parser; - ParserPartition partition_parser; - - ASTPtr table; - ASTPtr database; - - if (!s_check_table.ignore(pos, expected)) - return false; - if (!table_parser.parse(pos, database, expected)) - return false; - - auto query = std::make_shared<ASTCheckQuery>(); - if (s_dot.ignore(pos)) - { - if (!table_parser.parse(pos, table, expected)) - return false; - - tryGetIdentifierNameInto(database, query->database); - tryGetIdentifierNameInto(table, query->table); - } - else - { - table = database; - tryGetIdentifierNameInto(table, query->table); - } - - if (s_partition.ignore(pos, expected)) - { - if (!partition_parser.parse(pos, query->partition, expected)) - return false; - } - - node = query; - return true; -} - -} +#include <Parsers/ParserCheckQuery.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ASTCheckQuery.h> +#include <Parsers/ParserPartition.h> + + +namespace DB +{ + +bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_check_table("CHECK TABLE"); + ParserKeyword s_partition("PARTITION"); + ParserToken s_dot(TokenType::Dot); + + ParserIdentifier table_parser; + ParserPartition partition_parser; + + ASTPtr table; + ASTPtr database; + + if (!s_check_table.ignore(pos, expected)) + return false; + if (!table_parser.parse(pos, database, expected)) + return false; + + auto query = std::make_shared<ASTCheckQuery>(); + if (s_dot.ignore(pos)) + { + if (!table_parser.parse(pos, table, expected)) + return false; + + tryGetIdentifierNameInto(database, query->database); + tryGetIdentifierNameInto(table, query->table); + } + else + { + table = database; + tryGetIdentifierNameInto(table, query->table); + } + + if (s_partition.ignore(pos, expected)) + { + if (!partition_parser.parse(pos, query->partition, expected)) + return false; + } + + node = query; + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h index 2a378c6683..fb0c390fa0 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h @@ -1,17 +1,17 @@ -#pragma once - -#include <Parsers/IParserBase.h> - -namespace DB -{ -/** Query of form - * CHECK [TABLE] [database.]table - */ -class ParserCheckQuery : public IParserBase -{ -protected: - const char * getName() const override{ return "ALTER query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> + +namespace DB +{ +/** Query of form + * CHECK [TABLE] [database.]table + */ +class ParserCheckQuery : public IParserBase +{ +protected: + const char * getName() const override{ return "ALTER query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp index 4bd7371636..0f768e2232 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp @@ -1,44 +1,44 @@ -#include <Parsers/TablePropertiesQueriesASTs.h> - -#include <Parsers/CommonParsers.h> -#include <Parsers/ParserDescribeTableQuery.h> -#include <Parsers/ParserTablesInSelectQuery.h> - -#include <Common/typeid_cast.h> - - -namespace DB -{ - - -bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_describe("DESCRIBE"); - ParserKeyword s_desc("DESC"); - ParserKeyword s_table("TABLE"); - ParserToken s_dot(TokenType::Dot); - ParserIdentifier name_p; - - ASTPtr database; - ASTPtr table; - - if (!s_describe.ignore(pos, expected) && !s_desc.ignore(pos, expected)) - return false; - - auto query = std::make_shared<ASTDescribeQuery>(); - - s_table.ignore(pos, expected); - - ASTPtr table_expression; - if (!ParserTableExpression().parse(pos, table_expression, expected)) - return false; - - query->table_expression = table_expression; - - node = query; - - return true; -} - - -} +#include <Parsers/TablePropertiesQueriesASTs.h> + +#include <Parsers/CommonParsers.h> +#include <Parsers/ParserDescribeTableQuery.h> +#include <Parsers/ParserTablesInSelectQuery.h> + +#include <Common/typeid_cast.h> + + +namespace DB +{ + + +bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_describe("DESCRIBE"); + ParserKeyword s_desc("DESC"); + ParserKeyword s_table("TABLE"); + ParserToken s_dot(TokenType::Dot); + ParserIdentifier name_p; + + ASTPtr database; + ASTPtr table; + + if (!s_describe.ignore(pos, expected) && !s_desc.ignore(pos, expected)) + return false; + + auto query = std::make_shared<ASTDescribeQuery>(); + + s_table.ignore(pos, expected); + + ASTPtr table_expression; + if (!ParserTableExpression().parse(pos, table_expression, expected)) + return false; + + query->table_expression = table_expression; + + node = query; + + return true; +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h index 9878eb2d1c..caf3590ed3 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h @@ -1,20 +1,20 @@ -#pragma once - - -#include <Parsers/IParserBase.h> -#include <Parsers/ExpressionElementParsers.h> - - -namespace DB -{ - -/** Query (DESCRIBE | DESC) ([TABLE] [db.]name | tableFunction) [FORMAT format] - */ -class ParserDescribeTableQuery : public IParserBase -{ -protected: - const char * getName() const override { return "DESCRIBE query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once + + +#include <Parsers/IParserBase.h> +#include <Parsers/ExpressionElementParsers.h> + + +namespace DB +{ + +/** Query (DESCRIBE | DESC) ([TABLE] [db.]name | tableFunction) [FORMAT format] + */ +class ParserDescribeTableQuery : public IParserBase +{ +protected: + const char * getName() const override { return "DESCRIBE query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp index 37206e85a1..5400f33fbd 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp @@ -1,84 +1,84 @@ -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ASTDropQuery.h> - -#include <Parsers/CommonParsers.h> -#include <Parsers/ParserDropQuery.h> - - -namespace DB -{ - -namespace -{ - +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTDropQuery.h> + +#include <Parsers/CommonParsers.h> +#include <Parsers/ParserDropQuery.h> + + +namespace DB +{ + +namespace +{ + bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, const ASTDropQuery::Kind kind) -{ - ParserKeyword s_temporary("TEMPORARY"); - ParserKeyword s_table("TABLE"); - ParserKeyword s_dictionary("DICTIONARY"); - ParserKeyword s_view("VIEW"); - ParserKeyword s_database("DATABASE"); - ParserToken s_dot(TokenType::Dot); - ParserKeyword s_if_exists("IF EXISTS"); - ParserIdentifier name_p; +{ + ParserKeyword s_temporary("TEMPORARY"); + ParserKeyword s_table("TABLE"); + ParserKeyword s_dictionary("DICTIONARY"); + ParserKeyword s_view("VIEW"); + ParserKeyword s_database("DATABASE"); + ParserToken s_dot(TokenType::Dot); + ParserKeyword s_if_exists("IF EXISTS"); + ParserIdentifier name_p; ParserKeyword s_permanently("PERMANENTLY"); - ParserKeyword s_no_delay("NO DELAY"); + ParserKeyword s_no_delay("NO DELAY"); ParserKeyword s_sync("SYNC"); - - ASTPtr database; - ASTPtr table; - String cluster_str; - bool if_exists = false; - bool temporary = false; - bool is_dictionary = false; - bool is_view = false; - bool no_delay = false; + + ASTPtr database; + ASTPtr table; + String cluster_str; + bool if_exists = false; + bool temporary = false; + bool is_dictionary = false; + bool is_view = false; + bool no_delay = false; bool permanently = false; - - if (s_database.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - if_exists = true; - - if (!name_p.parse(pos, database, expected)) - return false; - } - else - { - if (s_view.ignore(pos, expected)) - is_view = true; - else if (s_dictionary.ignore(pos, expected)) - is_dictionary = true; - else if (s_temporary.ignore(pos, expected)) - temporary = true; - + + if (s_database.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + + if (!name_p.parse(pos, database, expected)) + return false; + } + else + { + if (s_view.ignore(pos, expected)) + is_view = true; + else if (s_dictionary.ignore(pos, expected)) + is_dictionary = true; + else if (s_temporary.ignore(pos, expected)) + temporary = true; + /// for TRUNCATE queries TABLE keyword is assumed as default and can be skipped if (!is_view && !is_dictionary && (!s_table.ignore(pos, expected) && kind != ASTDropQuery::Kind::Truncate)) - { - return false; - } - - if (s_if_exists.ignore(pos, expected)) - if_exists = true; - - if (!name_p.parse(pos, table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) - { - database = table; - if (!name_p.parse(pos, table, expected)) - return false; - } + { + return false; + } + + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + + if (!name_p.parse(pos, table, expected)) + return false; + + if (s_dot.ignore(pos, expected)) + { + database = table; + if (!name_p.parse(pos, table, expected)) + return false; + } } - + /// common for tables / dictionaries / databases if (ParserKeyword{"ON"}.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; - } - + } + if (kind == ASTDropQuery::Kind::Detach && s_permanently.ignore(pos, expected)) permanently = true; @@ -86,41 +86,41 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons if (s_no_delay.ignore(pos, expected) || s_sync.ignore(pos, expected)) no_delay = true; - auto query = std::make_shared<ASTDropQuery>(); - node = query; - + auto query = std::make_shared<ASTDropQuery>(); + node = query; + query->kind = kind; - query->if_exists = if_exists; - query->temporary = temporary; - query->is_dictionary = is_dictionary; - query->is_view = is_view; - query->no_delay = no_delay; + query->if_exists = if_exists; + query->temporary = temporary; + query->is_dictionary = is_dictionary; + query->is_view = is_view; + query->no_delay = no_delay; query->permanently = permanently; - - tryGetIdentifierNameInto(database, query->database); - tryGetIdentifierNameInto(table, query->table); - - query->cluster = cluster_str; - - return true; -} - -} - -bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_drop("DROP"); - ParserKeyword s_detach("DETACH"); - ParserKeyword s_truncate("TRUNCATE"); - - if (s_drop.ignore(pos, expected)) + + tryGetIdentifierNameInto(database, query->database); + tryGetIdentifierNameInto(table, query->table); + + query->cluster = cluster_str; + + return true; +} + +} + +bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_drop("DROP"); + ParserKeyword s_detach("DETACH"); + ParserKeyword s_truncate("TRUNCATE"); + + if (s_drop.ignore(pos, expected)) return parseDropQuery(pos, node, expected, ASTDropQuery::Kind::Drop); - else if (s_detach.ignore(pos, expected)) + else if (s_detach.ignore(pos, expected)) return parseDropQuery(pos, node, expected, ASTDropQuery::Kind::Detach); - else if (s_truncate.ignore(pos, expected)) + else if (s_truncate.ignore(pos, expected)) return parseDropQuery(pos, node, expected, ASTDropQuery::Kind::Truncate); - else - return false; -} - -} + else + return false; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h index 5dea24aa43..39ff5b7c4f 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h @@ -1,26 +1,26 @@ -#pragma once - -#include <Parsers/IParserBase.h> -#include <Parsers/ExpressionElementParsers.h> - - -namespace DB -{ - -/** Query like this: +#pragma once + +#include <Parsers/IParserBase.h> +#include <Parsers/ExpressionElementParsers.h> + + +namespace DB +{ + +/** Query like this: * DROP|DETACH|TRUNCATE TABLE [IF EXISTS] [db.]name [PERMANENTLY] - * - * Or: - * DROP DATABASE [IF EXISTS] db - * - * Or: - * DROP DICTIONARY [IF EXISTS] [db.]name - */ -class ParserDropQuery : public IParserBase -{ -protected: - const char * getName() const override{ return "DROP query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} + * + * Or: + * DROP DATABASE [IF EXISTS] db + * + * Or: + * DROP DICTIONARY [IF EXISTS] [db.]name + */ +class ParserDropQuery : public IParserBase +{ +protected: + const char * getName() const override{ return "DROP query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp index 8f0ab6d12c..b4ba052323 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp @@ -1,60 +1,60 @@ -#include <Parsers/ParserExplainQuery.h> +#include <Parsers/ParserExplainQuery.h> -#include <Parsers/ASTExplainQuery.h> -#include <Parsers/CommonParsers.h> +#include <Parsers/ASTExplainQuery.h> +#include <Parsers/CommonParsers.h> #include <Parsers/ParserCreateQuery.h> -#include <Parsers/ParserSelectWithUnionQuery.h> -#include <Parsers/ParserSetQuery.h> +#include <Parsers/ParserSelectWithUnionQuery.h> +#include <Parsers/ParserSetQuery.h> #include <Parsers/ParserQuery.h> - -namespace DB -{ - -bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTExplainQuery::ExplainKind kind; - - ParserKeyword s_ast("AST"); - ParserKeyword s_explain("EXPLAIN"); - ParserKeyword s_syntax("SYNTAX"); - ParserKeyword s_pipeline("PIPELINE"); - ParserKeyword s_plan("PLAN"); + +namespace DB +{ + +bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTExplainQuery::ExplainKind kind; + + ParserKeyword s_ast("AST"); + ParserKeyword s_explain("EXPLAIN"); + ParserKeyword s_syntax("SYNTAX"); + ParserKeyword s_pipeline("PIPELINE"); + ParserKeyword s_plan("PLAN"); ParserKeyword s_estimates("ESTIMATE"); - + if (s_explain.ignore(pos, expected)) - { - kind = ASTExplainQuery::QueryPlan; - - if (s_ast.ignore(pos, expected)) - kind = ASTExplainQuery::ExplainKind::ParsedAST; - else if (s_syntax.ignore(pos, expected)) - kind = ASTExplainQuery::ExplainKind::AnalyzedSyntax; - else if (s_pipeline.ignore(pos, expected)) - kind = ASTExplainQuery::ExplainKind::QueryPipeline; - else if (s_plan.ignore(pos, expected)) + { + kind = ASTExplainQuery::QueryPlan; + + if (s_ast.ignore(pos, expected)) + kind = ASTExplainQuery::ExplainKind::ParsedAST; + else if (s_syntax.ignore(pos, expected)) + kind = ASTExplainQuery::ExplainKind::AnalyzedSyntax; + else if (s_pipeline.ignore(pos, expected)) + kind = ASTExplainQuery::ExplainKind::QueryPipeline; + else if (s_plan.ignore(pos, expected)) kind = ASTExplainQuery::ExplainKind::QueryPlan; //-V1048 else if (s_estimates.ignore(pos, expected)) kind = ASTExplainQuery::ExplainKind::QueryEstimates; //-V1048 - } - else - return false; - + } + else + return false; + auto explain_query = std::make_shared<ASTExplainQuery>(kind); - - { - ASTPtr settings; - ParserSetQuery parser_settings(true); - - auto begin = pos; - if (parser_settings.parse(pos, settings, expected)) - explain_query->setSettings(std::move(settings)); - else - pos = begin; - } - + + { + ASTPtr settings; + ParserSetQuery parser_settings(true); + + auto begin = pos; + if (parser_settings.parse(pos, settings, expected)) + explain_query->setSettings(std::move(settings)); + else + pos = begin; + } + ParserCreateTableQuery create_p; - ParserSelectWithUnionQuery select_p; - ASTPtr query; + ParserSelectWithUnionQuery select_p; + ASTPtr query; if (kind == ASTExplainQuery::ExplainKind::ParsedAST) { ParserQuery p(end); @@ -67,10 +67,10 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected create_p.parse(pos, query, expected)) explain_query->setExplainedQuery(std::move(query)); else - return false; - - node = std::move(explain_query); - return true; -} - -} + return false; + + node = std::move(explain_query); + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h index 77e8ab90ac..a1865e3023 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h @@ -1,20 +1,20 @@ -#pragma once - -#include <Parsers/IParserBase.h> - -namespace DB -{ - - -class ParserExplainQuery : public IParserBase -{ -protected: +#pragma once + +#include <Parsers/IParserBase.h> + +namespace DB +{ + + +class ParserExplainQuery : public IParserBase +{ +protected: const char * end; - const char * getName() const override { return "EXPLAIN"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + const char * getName() const override { return "EXPLAIN"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: ParserExplainQuery(const char* end_) : end(end_) {} -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp index ac25d0b1e3..19457f027b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp @@ -1,107 +1,107 @@ -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ASTSelectWithUnionQuery.h> -#include <Parsers/ASTInsertQuery.h> - -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ExpressionListParsers.h> -#include <Parsers/ParserSelectWithUnionQuery.h> -#include <Parsers/ParserWatchQuery.h> -#include <Parsers/ParserInsertQuery.h> -#include <Parsers/ParserSetQuery.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTSelectWithUnionQuery.h> +#include <Parsers/ASTInsertQuery.h> + +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ExpressionListParsers.h> +#include <Parsers/ParserSelectWithUnionQuery.h> +#include <Parsers/ParserWatchQuery.h> +#include <Parsers/ParserInsertQuery.h> +#include <Parsers/ParserSetQuery.h> #include <Parsers/InsertQuerySettingsPushDownVisitor.h> -#include <Common/typeid_cast.h> +#include <Common/typeid_cast.h> #include "Parsers/IAST_fwd.h" - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int SYNTAX_ERROR; -} - - -bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_insert_into("INSERT INTO"); + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + + +bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_insert_into("INSERT INTO"); ParserKeyword s_from_infile("FROM INFILE"); - ParserKeyword s_table("TABLE"); - ParserKeyword s_function("FUNCTION"); - ParserToken s_dot(TokenType::Dot); - ParserKeyword s_values("VALUES"); - ParserKeyword s_format("FORMAT"); - ParserKeyword s_settings("SETTINGS"); - ParserKeyword s_select("SELECT"); - ParserKeyword s_watch("WATCH"); + ParserKeyword s_table("TABLE"); + ParserKeyword s_function("FUNCTION"); + ParserToken s_dot(TokenType::Dot); + ParserKeyword s_values("VALUES"); + ParserKeyword s_format("FORMAT"); + ParserKeyword s_settings("SETTINGS"); + ParserKeyword s_select("SELECT"); + ParserKeyword s_watch("WATCH"); ParserKeyword s_partition_by("PARTITION BY"); - ParserKeyword s_with("WITH"); - ParserToken s_lparen(TokenType::OpeningRoundBracket); - ParserToken s_rparen(TokenType::ClosingRoundBracket); - ParserIdentifier name_p; + ParserKeyword s_with("WITH"); + ParserToken s_lparen(TokenType::OpeningRoundBracket); + ParserToken s_rparen(TokenType::ClosingRoundBracket); + ParserIdentifier name_p; ParserList columns_p(std::make_unique<ParserInsertElement>(), std::make_unique<ParserToken>(TokenType::Comma), false); - ParserFunction table_function_p{false}; + ParserFunction table_function_p{false}; ParserStringLiteral infile_name_p; ParserExpressionWithOptionalAlias exp_elem_p(false); - - ASTPtr database; - ASTPtr table; + + ASTPtr database; + ASTPtr table; ASTPtr infile; - ASTPtr columns; - ASTPtr format; - ASTPtr select; - ASTPtr watch; - ASTPtr table_function; - ASTPtr settings_ast; + ASTPtr columns; + ASTPtr format; + ASTPtr select; + ASTPtr watch; + ASTPtr table_function; + ASTPtr settings_ast; ASTPtr partition_by_expr; - /// Insertion data - const char * data = nullptr; - - if (!s_insert_into.ignore(pos, expected)) - return false; - - s_table.ignore(pos, expected); - - if (s_function.ignore(pos, expected)) - { - if (!table_function_p.parse(pos, table_function, expected)) - return false; + /// Insertion data + const char * data = nullptr; + + if (!s_insert_into.ignore(pos, expected)) + return false; + + s_table.ignore(pos, expected); + + if (s_function.ignore(pos, expected)) + { + if (!table_function_p.parse(pos, table_function, expected)) + return false; if (s_partition_by.ignore(pos, expected)) { if (!exp_elem_p.parse(pos, partition_by_expr, expected)) return false; } - } - else - { - if (!name_p.parse(pos, table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) - { - database = table; - if (!name_p.parse(pos, table, expected)) - return false; - } - } - - /// Is there a list of columns - if (s_lparen.ignore(pos, expected)) - { - if (!columns_p.parse(pos, columns, expected)) - return false; - - if (!s_rparen.ignore(pos, expected)) - return false; - } - - Pos before_values = pos; - + } + else + { + if (!name_p.parse(pos, table, expected)) + return false; + + if (s_dot.ignore(pos, expected)) + { + database = table; + if (!name_p.parse(pos, table, expected)) + return false; + } + } + + /// Is there a list of columns + if (s_lparen.ignore(pos, expected)) + { + if (!columns_p.parse(pos, columns, expected)) + return false; + + if (!s_rparen.ignore(pos, expected)) + return false; + } + + Pos before_values = pos; + if (s_from_infile.ignore(pos, expected)) - { + { if (!infile_name_p.parse(pos, infile, expected)) return false; } @@ -109,45 +109,45 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// VALUES or FROM INFILE or FORMAT or SELECT if (!infile && s_values.ignore(pos, expected)) { - data = pos->begin; - } - else if (s_format.ignore(pos, expected)) - { - if (!name_p.parse(pos, format, expected)) - return false; - } - else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected)) - { - pos = before_values; - ParserSelectWithUnionQuery select_p; - select_p.parse(pos, select, expected); - - /// FORMAT section is expected if we have input() in SELECT part - if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected)) - return false; - } - else if (s_watch.ignore(pos, expected)) - { - pos = before_values; - ParserWatchQuery watch_p; - watch_p.parse(pos, watch, expected); - - /// FORMAT section is expected if we have input() in SELECT part - if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected)) - return false; - } - else - { - return false; - } - - if (s_settings.ignore(pos, expected)) - { - ParserSetQuery parser_settings(true); - if (!parser_settings.parse(pos, settings_ast, expected)) - return false; - } - + data = pos->begin; + } + else if (s_format.ignore(pos, expected)) + { + if (!name_p.parse(pos, format, expected)) + return false; + } + else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected)) + { + pos = before_values; + ParserSelectWithUnionQuery select_p; + select_p.parse(pos, select, expected); + + /// FORMAT section is expected if we have input() in SELECT part + if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected)) + return false; + } + else if (s_watch.ignore(pos, expected)) + { + pos = before_values; + ParserWatchQuery watch_p; + watch_p.parse(pos, watch, expected); + + /// FORMAT section is expected if we have input() in SELECT part + if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected)) + return false; + } + else + { + return false; + } + + if (s_settings.ignore(pos, expected)) + { + ParserSetQuery parser_settings(true); + if (!parser_settings.parse(pos, settings_ast, expected)) + return false; + } + if (select) { /// Copy SETTINGS from the INSERT ... SELECT ... SETTINGS @@ -157,71 +157,71 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (format && !infile) - { - Pos last_token = pos; - --last_token; - data = last_token->end; - - if (data < end && *data == ';') - throw Exception("You have excessive ';' symbol before data for INSERT.\n" - "Example:\n\n" - "INSERT INTO t (x, y) FORMAT TabSeparated\n" - ";\tHello\n" - "2\tWorld\n" - "\n" - "Note that there is no ';' just after format name, " - "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR); - - while (data < end && (*data == ' ' || *data == '\t' || *data == '\f')) - ++data; - - /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise. - - if (data < end && *data == '\r') - ++data; - - if (data < end && *data == '\n') - ++data; - } - - auto query = std::make_shared<ASTInsertQuery>(); - node = query; - + { + Pos last_token = pos; + --last_token; + data = last_token->end; + + if (data < end && *data == ';') + throw Exception("You have excessive ';' symbol before data for INSERT.\n" + "Example:\n\n" + "INSERT INTO t (x, y) FORMAT TabSeparated\n" + ";\tHello\n" + "2\tWorld\n" + "\n" + "Note that there is no ';' just after format name, " + "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR); + + while (data < end && (*data == ' ' || *data == '\t' || *data == '\f')) + ++data; + + /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise. + + if (data < end && *data == '\r') + ++data; + + if (data < end && *data == '\n') + ++data; + } + + auto query = std::make_shared<ASTInsertQuery>(); + node = query; + if (infile) query->infile = infile; - if (table_function) - { - query->table_function = table_function; + if (table_function) + { + query->table_function = table_function; query->partition_by = partition_by_expr; - } - else - { - tryGetIdentifierNameInto(database, query->table_id.database_name); - tryGetIdentifierNameInto(table, query->table_id.table_name); - } - - tryGetIdentifierNameInto(format, query->format); - - query->columns = columns; - query->select = select; - query->watch = watch; - query->settings_ast = settings_ast; - query->data = data != end ? data : nullptr; - query->end = end; - - if (columns) - query->children.push_back(columns); - if (select) - query->children.push_back(select); - if (watch) - query->children.push_back(watch); - if (settings_ast) - query->children.push_back(settings_ast); - - return true; -} - + } + else + { + tryGetIdentifierNameInto(database, query->table_id.database_name); + tryGetIdentifierNameInto(table, query->table_id.table_name); + } + + tryGetIdentifierNameInto(format, query->format); + + query->columns = columns; + query->select = select; + query->watch = watch; + query->settings_ast = settings_ast; + query->data = data != end ? data : nullptr; + query->end = end; + + if (columns) + query->children.push_back(columns); + if (select) + query->children.push_back(select); + if (watch) + query->children.push_back(watch); + if (settings_ast) + query->children.push_back(settings_ast); + + return true; +} + bool ParserInsertElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserColumnsMatcher().parse(pos, node, expected) @@ -229,5 +229,5 @@ bool ParserInsertElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte || ParserAsterisk().parse(pos, node, expected) || ParserCompoundIdentifier().parse(pos, node, expected); } - -} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h index 1f24400c4c..f98e433551 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h @@ -1,38 +1,38 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ - - -/** Cases: - * - * Normal case: - * INSERT INTO [db.]table (c1, c2, c3) VALUES (v11, v12, v13), (v21, v22, v23), ... - * INSERT INTO [db.]table VALUES (v11, v12, v13), (v21, v22, v23), ... - * - * Insert of data in an arbitrary format. - * The data itself comes after LF(line feed), if it exists, or after all the whitespace characters, otherwise. - * INSERT INTO [db.]table (c1, c2, c3) FORMAT format \n ... - * INSERT INTO [db.]table FORMAT format \n ... - * - * Insert the result of the SELECT or WATCH query. - * INSERT INTO [db.]table (c1, c2, c3) SELECT | WATCH ... - * INSERT INTO [db.]table SELECT | WATCH ... - */ -class ParserInsertQuery : public IParserBase -{ -private: - const char * end; - - const char * getName() const override { return "INSERT query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -public: +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ + + +/** Cases: + * + * Normal case: + * INSERT INTO [db.]table (c1, c2, c3) VALUES (v11, v12, v13), (v21, v22, v23), ... + * INSERT INTO [db.]table VALUES (v11, v12, v13), (v21, v22, v23), ... + * + * Insert of data in an arbitrary format. + * The data itself comes after LF(line feed), if it exists, or after all the whitespace characters, otherwise. + * INSERT INTO [db.]table (c1, c2, c3) FORMAT format \n ... + * INSERT INTO [db.]table FORMAT format \n ... + * + * Insert the result of the SELECT or WATCH query. + * INSERT INTO [db.]table (c1, c2, c3) SELECT | WATCH ... + * INSERT INTO [db.]table SELECT | WATCH ... + */ +class ParserInsertQuery : public IParserBase +{ +private: + const char * end; + + const char * getName() const override { return "INSERT query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: explicit ParserInsertQuery(const char * end_) : end(end_) {} -}; - +}; + /** Insert accepts an identifier and an asterisk with variants. */ class ParserInsertElement : public IParserBase @@ -42,4 +42,4 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp index 485a3acf83..a195a778ed 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp @@ -1,56 +1,56 @@ -#include <Parsers/ParserKillQueryQuery.h> -#include <Parsers/ASTKillQueryQuery.h> - -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionListParsers.h> - - -namespace DB -{ - - -bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - String cluster_str; - auto query = std::make_shared<ASTKillQueryQuery>(); - - ParserKeyword p_kill{"KILL"}; - ParserKeyword p_query{"QUERY"}; - ParserKeyword p_mutation{"MUTATION"}; - ParserKeyword p_on{"ON"}; - ParserKeyword p_test{"TEST"}; - ParserKeyword p_sync{"SYNC"}; - ParserKeyword p_async{"ASYNC"}; - ParserKeyword p_where{"WHERE"}; - ParserExpression p_where_expression; - - if (!p_kill.ignore(pos, expected)) - return false; - - if (p_query.ignore(pos, expected)) - query->type = ASTKillQueryQuery::Type::Query; - else if (p_mutation.ignore(pos, expected)) - query->type = ASTKillQueryQuery::Type::Mutation; - else - return false; - - if (p_on.ignore(pos, expected) && !ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) - return false; - - if (!p_where.ignore(pos, expected) || !p_where_expression.parse(pos, query->where_expression, expected)) - return false; - - if (p_sync.ignore(pos, expected)) - query->sync = true; - else if (p_async.ignore(pos, expected)) - query->sync = false; - else if (p_test.ignore(pos, expected)) - query->test = true; - - query->cluster = cluster_str; - query->children.emplace_back(query->where_expression); - node = std::move(query); - return true; -} - -} +#include <Parsers/ParserKillQueryQuery.h> +#include <Parsers/ASTKillQueryQuery.h> + +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionListParsers.h> + + +namespace DB +{ + + +bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + String cluster_str; + auto query = std::make_shared<ASTKillQueryQuery>(); + + ParserKeyword p_kill{"KILL"}; + ParserKeyword p_query{"QUERY"}; + ParserKeyword p_mutation{"MUTATION"}; + ParserKeyword p_on{"ON"}; + ParserKeyword p_test{"TEST"}; + ParserKeyword p_sync{"SYNC"}; + ParserKeyword p_async{"ASYNC"}; + ParserKeyword p_where{"WHERE"}; + ParserExpression p_where_expression; + + if (!p_kill.ignore(pos, expected)) + return false; + + if (p_query.ignore(pos, expected)) + query->type = ASTKillQueryQuery::Type::Query; + else if (p_mutation.ignore(pos, expected)) + query->type = ASTKillQueryQuery::Type::Mutation; + else + return false; + + if (p_on.ignore(pos, expected) && !ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + + if (!p_where.ignore(pos, expected) || !p_where_expression.parse(pos, query->where_expression, expected)) + return false; + + if (p_sync.ignore(pos, expected)) + query->sync = true; + else if (p_async.ignore(pos, expected)) + query->sync = false; + else if (p_test.ignore(pos, expected)) + query->test = true; + + query->cluster = cluster_str; + query->children.emplace_back(query->where_expression); + node = std::move(query); + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h index 7019e8a9dd..da75317c52 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h @@ -1,19 +1,19 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ - -/** KILL QUERY WHERE <logical expression upon system.processes fields> [SYNC|ASYNC|TEST] - */ -class ParserKillQueryQuery : public IParserBase -{ -protected: - const char * getName() const override { return "KILL QUERY query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} - +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ + +/** KILL QUERY WHERE <logical expression upon system.processes fields> [SYNC|ASYNC|TEST] + */ +class ParserKillQueryQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KILL QUERY query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp index ccff3d0282..441cec1465 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp @@ -1,74 +1,74 @@ -#include <Parsers/ParserOptimizeQuery.h> -#include <Parsers/ParserPartition.h> -#include <Parsers/CommonParsers.h> - -#include <Parsers/ASTOptimizeQuery.h> -#include <Parsers/ASTIdentifier.h> +#include <Parsers/ParserOptimizeQuery.h> +#include <Parsers/ParserPartition.h> +#include <Parsers/CommonParsers.h> + +#include <Parsers/ASTOptimizeQuery.h> +#include <Parsers/ASTIdentifier.h> #include <Parsers/ExpressionListParsers.h> - - -namespace DB -{ - + + +namespace DB +{ + bool ParserOptimizeQueryColumnsSpecification::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // Do not allow APPLY and REPLACE transformers. // Since we use Columns Transformers only to get list of columns, // we can't actually modify content of the columns for deduplication. const auto allowed_transformers = ParserColumnsTransformers::ColumnTransformers{ParserColumnsTransformers::ColumnTransformer::EXCEPT}; - + return ParserColumnsMatcher(allowed_transformers).parse(pos, node, expected) || ParserAsterisk(allowed_transformers).parse(pos, node, expected) || ParserIdentifier(false).parse(pos, node, expected); } -bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_optimize_table("OPTIMIZE TABLE"); - ParserKeyword s_partition("PARTITION"); - ParserKeyword s_final("FINAL"); - ParserKeyword s_deduplicate("DEDUPLICATE"); +bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_optimize_table("OPTIMIZE TABLE"); + ParserKeyword s_partition("PARTITION"); + ParserKeyword s_final("FINAL"); + ParserKeyword s_deduplicate("DEDUPLICATE"); ParserKeyword s_by("BY"); - ParserToken s_dot(TokenType::Dot); - ParserIdentifier name_p; - ParserPartition partition_p; - - ASTPtr database; - ASTPtr table; - ASTPtr partition; - bool final = false; - bool deduplicate = false; - String cluster_str; - - if (!s_optimize_table.ignore(pos, expected)) - return false; - - if (!name_p.parse(pos, table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) - { - database = table; - if (!name_p.parse(pos, table, expected)) - return false; - } - - if (ParserKeyword{"ON"}.ignore(pos, expected) && !ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) - return false; - - if (s_partition.ignore(pos, expected)) - { - if (!partition_p.parse(pos, partition, expected)) - return false; - } - - if (s_final.ignore(pos, expected)) - final = true; - - if (s_deduplicate.ignore(pos, expected)) - deduplicate = true; - + ParserToken s_dot(TokenType::Dot); + ParserIdentifier name_p; + ParserPartition partition_p; + + ASTPtr database; + ASTPtr table; + ASTPtr partition; + bool final = false; + bool deduplicate = false; + String cluster_str; + + if (!s_optimize_table.ignore(pos, expected)) + return false; + + if (!name_p.parse(pos, table, expected)) + return false; + + if (s_dot.ignore(pos, expected)) + { + database = table; + if (!name_p.parse(pos, table, expected)) + return false; + } + + if (ParserKeyword{"ON"}.ignore(pos, expected) && !ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + + if (s_partition.ignore(pos, expected)) + { + if (!partition_p.parse(pos, partition, expected)) + return false; + } + + if (s_final.ignore(pos, expected)) + final = true; + + if (s_deduplicate.ignore(pos, expected)) + deduplicate = true; + ASTPtr deduplicate_by_columns; if (deduplicate && s_by.ignore(pos, expected)) { @@ -77,21 +77,21 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return false; } - auto query = std::make_shared<ASTOptimizeQuery>(); - node = query; - - tryGetIdentifierNameInto(database, query->database); - tryGetIdentifierNameInto(table, query->table); - - query->cluster = cluster_str; + auto query = std::make_shared<ASTOptimizeQuery>(); + node = query; + + tryGetIdentifierNameInto(database, query->database); + tryGetIdentifierNameInto(table, query->table); + + query->cluster = cluster_str; if ((query->partition = partition)) query->children.push_back(partition); - query->final = final; - query->deduplicate = deduplicate; + query->final = final; + query->deduplicate = deduplicate; query->deduplicate_by_columns = deduplicate_by_columns; - - return true; -} - - -} + + return true; +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h index 631ff8cbb4..c8294d9ff6 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h @@ -1,12 +1,12 @@ -#pragma once - -#include <Parsers/IParserBase.h> -#include <Parsers/ExpressionElementParsers.h> - - -namespace DB -{ - +#pragma once + +#include <Parsers/IParserBase.h> +#include <Parsers/ExpressionElementParsers.h> + + +namespace DB +{ + class ParserOptimizeQueryColumnsSpecification : public IParserBase { protected: @@ -14,13 +14,13 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/** Query OPTIMIZE TABLE [db.]name [PARTITION partition] [FINAL] [DEDUPLICATE] - */ -class ParserOptimizeQuery : public IParserBase -{ -protected: - const char * getName() const override { return "OPTIMIZE query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +/** Query OPTIMIZE TABLE [db.]name [PARTITION partition] [FINAL] [DEDUPLICATE] + */ +class ParserOptimizeQuery : public IParserBase +{ +protected: + const char * getName() const override { return "OPTIMIZE query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp index 6ffaf9f2dd..a3ec4943e1 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp @@ -1,95 +1,95 @@ -#include <Parsers/ParserPartition.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ExpressionListParsers.h> -#include <Parsers/ASTPartition.h> -#include <Parsers/ASTLiteral.h> -#include <Parsers/ASTFunction.h> -#include <Common/typeid_cast.h> - -namespace DB -{ - -bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_id("ID"); - ParserStringLiteral parser_string_literal; - ParserExpression parser_expr; - - Pos begin = pos; - - auto partition = std::make_shared<ASTPartition>(); - - if (s_id.ignore(pos, expected)) - { - ASTPtr partition_id; - if (!parser_string_literal.parse(pos, partition_id, expected)) - return false; - - partition->id = partition_id->as<ASTLiteral &>().value.get<String>(); - } - else - { - ASTPtr value; - if (!parser_expr.parse(pos, value, expected)) - return false; - - size_t fields_count; - String fields_str; - - const auto * tuple_ast = value->as<ASTFunction>(); - bool surrounded_by_parens = false; - if (tuple_ast && tuple_ast->name == "tuple") - { - surrounded_by_parens = true; - const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>(); - if (arguments_ast) - fields_count = arguments_ast->children.size(); - else - fields_count = 0; - } - else if (const auto * literal = value->as<ASTLiteral>()) - { - if (literal->value.getType() == Field::Types::Tuple) - { - surrounded_by_parens = true; - fields_count = literal->value.get<const Tuple &>().size(); - } - else - { - fields_count = 1; - fields_str = String(begin->begin, pos->begin - begin->begin); - } - } - else - return false; - - if (surrounded_by_parens) - { - Pos left_paren = begin; - Pos right_paren = pos; - - while (left_paren != right_paren && left_paren->type != TokenType::OpeningRoundBracket) - ++left_paren; - if (left_paren->type != TokenType::OpeningRoundBracket) - return false; - - while (right_paren != left_paren && right_paren->type != TokenType::ClosingRoundBracket) - --right_paren; - if (right_paren->type != TokenType::ClosingRoundBracket) - return false; - - fields_str = String(left_paren->end, right_paren->begin - left_paren->end); - } - - partition->value = value; - partition->children.push_back(value); - partition->fields_str = std::move(fields_str); - partition->fields_count = fields_count; - } - - node = partition; - return true; -} - -} +#include <Parsers/ParserPartition.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ExpressionListParsers.h> +#include <Parsers/ASTPartition.h> +#include <Parsers/ASTLiteral.h> +#include <Parsers/ASTFunction.h> +#include <Common/typeid_cast.h> + +namespace DB +{ + +bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_id("ID"); + ParserStringLiteral parser_string_literal; + ParserExpression parser_expr; + + Pos begin = pos; + + auto partition = std::make_shared<ASTPartition>(); + + if (s_id.ignore(pos, expected)) + { + ASTPtr partition_id; + if (!parser_string_literal.parse(pos, partition_id, expected)) + return false; + + partition->id = partition_id->as<ASTLiteral &>().value.get<String>(); + } + else + { + ASTPtr value; + if (!parser_expr.parse(pos, value, expected)) + return false; + + size_t fields_count; + String fields_str; + + const auto * tuple_ast = value->as<ASTFunction>(); + bool surrounded_by_parens = false; + if (tuple_ast && tuple_ast->name == "tuple") + { + surrounded_by_parens = true; + const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>(); + if (arguments_ast) + fields_count = arguments_ast->children.size(); + else + fields_count = 0; + } + else if (const auto * literal = value->as<ASTLiteral>()) + { + if (literal->value.getType() == Field::Types::Tuple) + { + surrounded_by_parens = true; + fields_count = literal->value.get<const Tuple &>().size(); + } + else + { + fields_count = 1; + fields_str = String(begin->begin, pos->begin - begin->begin); + } + } + else + return false; + + if (surrounded_by_parens) + { + Pos left_paren = begin; + Pos right_paren = pos; + + while (left_paren != right_paren && left_paren->type != TokenType::OpeningRoundBracket) + ++left_paren; + if (left_paren->type != TokenType::OpeningRoundBracket) + return false; + + while (right_paren != left_paren && right_paren->type != TokenType::ClosingRoundBracket) + --right_paren; + if (right_paren->type != TokenType::ClosingRoundBracket) + return false; + + fields_str = String(left_paren->end, right_paren->begin - left_paren->end); + } + + partition->value = value; + partition->children.push_back(value); + partition->fields_str = std::move(fields_str); + partition->fields_count = fields_count; + } + + node = partition; + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h index 5f454438bf..2bb7048fd8 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h @@ -1,17 +1,17 @@ -#pragma once - -#include <Parsers/IParserBase.h> - -namespace DB -{ - -/// Parse either a partition value as a (possibly compound) literal or a partition ID. -/// Produce ASTPartition. -class ParserPartition : public IParserBase -{ -protected: - const char * getName() const override { return "partition"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> + +namespace DB +{ + +/// Parse either a partition value as a (possibly compound) literal or a partition ID. +/// Produce ASTPartition. +class ParserPartition : public IParserBase +{ +protected: + const char * getName() const override { return "partition"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp index 2893611fb0..c42a0af88b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp @@ -1,56 +1,56 @@ -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ASTRenameQuery.h> - -#include <Parsers/CommonParsers.h> -#include <Parsers/ParserRenameQuery.h> - - -namespace DB -{ - - -/// Parse database.table or table. -static bool parseDatabaseAndTable( - ASTRenameQuery::Table & db_and_table, IParser::Pos & pos, Expected & expected) -{ - ParserIdentifier name_p; - ParserToken s_dot(TokenType::Dot); - - ASTPtr database; - ASTPtr table; - - if (!name_p.parse(pos, table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) - { - database = table; - if (!name_p.parse(pos, table, expected)) - return false; - } - - db_and_table.database.clear(); - tryGetIdentifierNameInto(database, db_and_table.database); - tryGetIdentifierNameInto(table, db_and_table.table); - - return true; -} - - -bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_rename_table("RENAME TABLE"); +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTRenameQuery.h> + +#include <Parsers/CommonParsers.h> +#include <Parsers/ParserRenameQuery.h> + + +namespace DB +{ + + +/// Parse database.table or table. +static bool parseDatabaseAndTable( + ASTRenameQuery::Table & db_and_table, IParser::Pos & pos, Expected & expected) +{ + ParserIdentifier name_p; + ParserToken s_dot(TokenType::Dot); + + ASTPtr database; + ASTPtr table; + + if (!name_p.parse(pos, table, expected)) + return false; + + if (s_dot.ignore(pos, expected)) + { + database = table; + if (!name_p.parse(pos, table, expected)) + return false; + } + + db_and_table.database.clear(); + tryGetIdentifierNameInto(database, db_and_table.database); + tryGetIdentifierNameInto(table, db_and_table.table); + + return true; +} + + +bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_rename_table("RENAME TABLE"); ParserKeyword s_exchange_tables("EXCHANGE TABLES"); ParserKeyword s_rename_dictionary("RENAME DICTIONARY"); ParserKeyword s_exchange_dictionaries("EXCHANGE DICTIONARIES"); ParserKeyword s_rename_database("RENAME DATABASE"); - ParserKeyword s_to("TO"); - ParserKeyword s_and("AND"); - ParserToken s_comma(TokenType::Comma); - - bool exchange = false; + ParserKeyword s_to("TO"); + ParserKeyword s_and("AND"); + ParserToken s_comma(TokenType::Comma); + + bool exchange = false; bool dictionary = false; - + if (s_rename_table.ignore(pos, expected)) ; else if (s_exchange_tables.ignore(pos, expected)) @@ -63,12 +63,12 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) dictionary = true; } else if (s_rename_database.ignore(pos, expected)) - { + { ASTPtr from_db; ASTPtr to_db; ParserIdentifier db_name_p; if (!db_name_p.parse(pos, from_db, expected)) - return false; + return false; if (!s_to.ignore(pos, expected)) return false; if (!db_name_p.parse(pos, to_db, expected)) @@ -89,43 +89,43 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->cluster = cluster_str; node = query; return true; - } + } else return false; - - ASTRenameQuery::Elements elements; - + + ASTRenameQuery::Elements elements; + const auto ignore_delim = [&] { return exchange ? s_and.ignore(pos) : s_to.ignore(pos); }; - - while (true) - { - if (!elements.empty() && !s_comma.ignore(pos)) - break; - + + while (true) + { + if (!elements.empty() && !s_comma.ignore(pos)) + break; + ASTRenameQuery::Element& ref = elements.emplace_back(); - + if (!parseDatabaseAndTable(ref.from, pos, expected) - || !ignore_delim() + || !ignore_delim() || !parseDatabaseAndTable(ref.to, pos, expected)) - return false; - } - - String cluster_str; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) - return false; - } - - auto query = std::make_shared<ASTRenameQuery>(); - query->cluster = cluster_str; - node = query; - - query->elements = elements; - query->exchange = exchange; + return false; + } + + String cluster_str; + if (ParserKeyword{"ON"}.ignore(pos, expected)) + { + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + } + + auto query = std::make_shared<ASTRenameQuery>(); + query->cluster = cluster_str; + node = query; + + query->elements = elements; + query->exchange = exchange; query->dictionary = dictionary; - return true; -} - - -} + return true; +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h index b79b7de6a5..c95bc893b5 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h @@ -1,21 +1,21 @@ -#pragma once - -#include <Parsers/IParserBase.h> -#include <Parsers/ExpressionElementParsers.h> - - -namespace DB -{ - -/** Query like this: - * RENAME TABLE [db.]name TO [db.]name, [db.]name TO [db.]name, ... - * (An arbitrary number of tables can be renamed.) - */ -class ParserRenameQuery : public IParserBase -{ -protected: - const char * getName() const override{ return "RENAME query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> +#include <Parsers/ExpressionElementParsers.h> + + +namespace DB +{ + +/** Query like this: + * RENAME TABLE [db.]name TO [db.]name, [db.]name TO [db.]name, ... + * (An arbitrary number of tables can be renamed.) + */ +class ParserRenameQuery : public IParserBase +{ +protected: + const char * getName() const override{ return "RENAME query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp index 86195accc9..41e9ee6501 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp @@ -1,145 +1,145 @@ -#include <Parsers/ParserRolesOrUsersSet.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ASTLiteral.h> -#include <Parsers/ASTRolesOrUsersSet.h> -#include <Parsers/parseUserName.h> -#include <Parsers/ExpressionListParsers.h> -#include <boost/range/algorithm/find.hpp> - - -namespace DB -{ -namespace -{ +#include <Parsers/ParserRolesOrUsersSet.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ASTLiteral.h> +#include <Parsers/ASTRolesOrUsersSet.h> +#include <Parsers/parseUserName.h> +#include <Parsers/ExpressionListParsers.h> +#include <boost/range/algorithm/find.hpp> + + +namespace DB +{ +namespace +{ bool parseNameOrID(IParserBase::Pos & pos, Expected & expected, bool id_mode, String & res) - { - return IParserBase::wrapParseImpl(pos, [&] - { - if (!id_mode) - return parseRoleName(pos, expected, res); - - if (!ParserKeyword{"ID"}.ignore(pos, expected)) - return false; - if (!ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) - return false; - ASTPtr ast; - if (!ParserStringLiteral{}.parse(pos, ast, expected)) - return false; - String id = ast->as<ASTLiteral &>().value.safeGet<String>(); - if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - return false; - - res = std::move(id); - return true; - }); - } - - bool parseBeforeExcept( - IParserBase::Pos & pos, - Expected & expected, - bool id_mode, - bool allow_all, + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!id_mode) + return parseRoleName(pos, expected, res); + + if (!ParserKeyword{"ID"}.ignore(pos, expected)) + return false; + if (!ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + return false; + ASTPtr ast; + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + String id = ast->as<ASTLiteral &>().value.safeGet<String>(); + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + + res = std::move(id); + return true; + }); + } + + bool parseBeforeExcept( + IParserBase::Pos & pos, + Expected & expected, + bool id_mode, + bool allow_all, bool allow_any, - bool allow_current_user, + bool allow_current_user, bool & all, - Strings & names, - bool & current_user) - { - bool res_all = false; + Strings & names, + bool & current_user) + { + bool res_all = false; Strings res_names; - bool res_current_user = false; + bool res_current_user = false; Strings res_with_roles_names; - - auto parse_element = [&] - { - if (ParserKeyword{"NONE"}.ignore(pos, expected)) - return true; - - if (allow_all && ParserKeyword{"ALL"}.ignore(pos, expected)) - { - res_all = true; - return true; - } - + + auto parse_element = [&] + { + if (ParserKeyword{"NONE"}.ignore(pos, expected)) + return true; + + if (allow_all && ParserKeyword{"ALL"}.ignore(pos, expected)) + { + res_all = true; + return true; + } + if (allow_any && ParserKeyword{"ANY"}.ignore(pos, expected)) { res_all = true; return true; } - if (allow_current_user && parseCurrentUserTag(pos, expected)) - { - res_current_user = true; - return true; - } - - String name; + if (allow_current_user && parseCurrentUserTag(pos, expected)) + { + res_current_user = true; + return true; + } + + String name; if (parseNameOrID(pos, expected, id_mode, name)) - { - res_names.emplace_back(std::move(name)); - return true; - } - - return false; - }; - - if (!ParserList::parseUtil(pos, expected, parse_element, false)) - return false; - - names = std::move(res_names); + { + res_names.emplace_back(std::move(name)); + return true; + } + + return false; + }; + + if (!ParserList::parseUtil(pos, expected, parse_element, false)) + return false; + + names = std::move(res_names); current_user = res_current_user; - all = res_all; - return true; - } - - bool parseExceptAndAfterExcept( - IParserBase::Pos & pos, - Expected & expected, - bool id_mode, - bool allow_current_user, - Strings & except_names, - bool & except_current_user) - { + all = res_all; + return true; + } + + bool parseExceptAndAfterExcept( + IParserBase::Pos & pos, + Expected & expected, + bool id_mode, + bool allow_current_user, + Strings & except_names, + bool & except_current_user) + { return IParserBase::wrapParseImpl(pos, [&] { - if (!ParserKeyword{"EXCEPT"}.ignore(pos, expected)) - return false; - - bool unused; + if (!ParserKeyword{"EXCEPT"}.ignore(pos, expected)) + return false; + + bool unused; return parseBeforeExcept(pos, expected, id_mode, false, false, allow_current_user, unused, except_names, except_current_user); - }); - } -} - - -bool ParserRolesOrUsersSet::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ + }); + } +} + + +bool ParserRolesOrUsersSet::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ bool all = false; - Strings names; - bool current_user = false; - Strings except_names; - bool except_current_user = false; - + Strings names; + bool current_user = false; + Strings except_names; + bool except_current_user = false; + if (!parseBeforeExcept(pos, expected, id_mode, allow_all, allow_any, allow_current_user, all, names, current_user)) - return false; - - parseExceptAndAfterExcept(pos, expected, id_mode, allow_current_user, except_names, except_current_user); - - if (all) - names.clear(); - - auto result = std::make_shared<ASTRolesOrUsersSet>(); - result->names = std::move(names); - result->current_user = current_user; - result->all = all; - result->except_names = std::move(except_names); - result->except_current_user = except_current_user; + return false; + + parseExceptAndAfterExcept(pos, expected, id_mode, allow_current_user, except_names, except_current_user); + + if (all) + names.clear(); + + auto result = std::make_shared<ASTRolesOrUsersSet>(); + result->names = std::move(names); + result->current_user = current_user; + result->all = all; + result->except_names = std::move(except_names); + result->except_current_user = except_current_user; result->allow_users = allow_users; result->allow_roles = allow_roles; - result->id_mode = id_mode; + result->id_mode = id_mode; result->use_keyword_any = all && allow_any && !allow_all; - node = result; - return true; -} - -} + node = result; + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h index 46cb0bbd43..9ae9937e78 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h @@ -1,35 +1,35 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ -/** Parses a string like this: +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +/** Parses a string like this: * {user_name | role_name | CURRENT_USER | ALL | NONE} [,...] * [EXCEPT {user_name | role_name | CURRENT_USER | ALL | NONE} [,...]] - */ -class ParserRolesOrUsersSet : public IParserBase -{ -public: - ParserRolesOrUsersSet & allowAll(bool allow_all_ = true) { allow_all = allow_all_; return *this; } + */ +class ParserRolesOrUsersSet : public IParserBase +{ +public: + ParserRolesOrUsersSet & allowAll(bool allow_all_ = true) { allow_all = allow_all_; return *this; } ParserRolesOrUsersSet & allowAny(bool allow_any_ = true) { allow_any = allow_any_; return *this; } ParserRolesOrUsersSet & allowUsers(bool allow_users_ = true) { allow_users = allow_users_; return *this; } - ParserRolesOrUsersSet & allowCurrentUser(bool allow_current_user_ = true) { allow_current_user = allow_current_user_; return *this; } + ParserRolesOrUsersSet & allowCurrentUser(bool allow_current_user_ = true) { allow_current_user = allow_current_user_; return *this; } ParserRolesOrUsersSet & allowRoles(bool allow_roles_ = true) { allow_roles = allow_roles_; return *this; } - ParserRolesOrUsersSet & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } - -protected: - const char * getName() const override { return "RolesOrUsersSet"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - bool allow_all = false; + ParserRolesOrUsersSet & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } + +protected: + const char * getName() const override { return "RolesOrUsersSet"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool allow_all = false; bool allow_any = false; bool allow_users = false; - bool allow_current_user = false; + bool allow_current_user = false; bool allow_roles = false; - bool id_mode = false; -}; - -} + bool id_mode = false; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp index 2ddb27cc83..678474af04 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp @@ -1,86 +1,86 @@ -#include <Parsers/ParserSetRoleQuery.h> -#include <Parsers/ASTSetRoleQuery.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ASTRolesOrUsersSet.h> -#include <Parsers/ParserRolesOrUsersSet.h> - - -namespace DB -{ -namespace -{ - bool parseRoles(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTRolesOrUsersSet> & roles) - { - return IParserBase::wrapParseImpl(pos, [&] - { - ASTPtr ast; - ParserRolesOrUsersSet roles_p; +#include <Parsers/ParserSetRoleQuery.h> +#include <Parsers/ASTSetRoleQuery.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ASTRolesOrUsersSet.h> +#include <Parsers/ParserRolesOrUsersSet.h> + + +namespace DB +{ +namespace +{ + bool parseRoles(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTRolesOrUsersSet> & roles) + { + return IParserBase::wrapParseImpl(pos, [&] + { + ASTPtr ast; + ParserRolesOrUsersSet roles_p; roles_p.allowRoles().allowAll(); - if (!roles_p.parse(pos, ast, expected)) - return false; - - roles = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(ast); + if (!roles_p.parse(pos, ast, expected)) + return false; + + roles = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(ast); roles->allow_users = false; - return true; - }); - } - - bool parseToUsers(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTRolesOrUsersSet> & to_users) - { - return IParserBase::wrapParseImpl(pos, [&] - { - if (!ParserKeyword{"TO"}.ignore(pos, expected)) - return false; - - ASTPtr ast; - ParserRolesOrUsersSet users_p; + return true; + }); + } + + bool parseToUsers(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTRolesOrUsersSet> & to_users) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserKeyword{"TO"}.ignore(pos, expected)) + return false; + + ASTPtr ast; + ParserRolesOrUsersSet users_p; users_p.allowUsers().allowCurrentUser(); - if (!users_p.parse(pos, ast, expected)) - return false; - - to_users = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(ast); + if (!users_p.parse(pos, ast, expected)) + return false; + + to_users = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(ast); to_users->allow_roles = false; - return true; - }); - } -} - - -bool ParserSetRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - using Kind = ASTSetRoleQuery::Kind; - Kind kind; - if (ParserKeyword{"SET ROLE DEFAULT"}.ignore(pos, expected)) - kind = Kind::SET_ROLE_DEFAULT; - else if (ParserKeyword{"SET ROLE"}.ignore(pos, expected)) - kind = Kind::SET_ROLE; - else if (ParserKeyword{"SET DEFAULT ROLE"}.ignore(pos, expected)) - kind = Kind::SET_DEFAULT_ROLE; - else - return false; - - std::shared_ptr<ASTRolesOrUsersSet> roles; - std::shared_ptr<ASTRolesOrUsersSet> to_users; - - if ((kind == Kind::SET_ROLE) || (kind == Kind::SET_DEFAULT_ROLE)) - { - if (!parseRoles(pos, expected, roles)) - return false; - - if (kind == Kind::SET_DEFAULT_ROLE) - { - if (!parseToUsers(pos, expected, to_users)) - return false; - } - } - - auto query = std::make_shared<ASTSetRoleQuery>(); - node = query; - - query->kind = kind; - query->roles = std::move(roles); - query->to_users = std::move(to_users); - - return true; -} -} + return true; + }); + } +} + + +bool ParserSetRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + using Kind = ASTSetRoleQuery::Kind; + Kind kind; + if (ParserKeyword{"SET ROLE DEFAULT"}.ignore(pos, expected)) + kind = Kind::SET_ROLE_DEFAULT; + else if (ParserKeyword{"SET ROLE"}.ignore(pos, expected)) + kind = Kind::SET_ROLE; + else if (ParserKeyword{"SET DEFAULT ROLE"}.ignore(pos, expected)) + kind = Kind::SET_DEFAULT_ROLE; + else + return false; + + std::shared_ptr<ASTRolesOrUsersSet> roles; + std::shared_ptr<ASTRolesOrUsersSet> to_users; + + if ((kind == Kind::SET_ROLE) || (kind == Kind::SET_DEFAULT_ROLE)) + { + if (!parseRoles(pos, expected, roles)) + return false; + + if (kind == Kind::SET_DEFAULT_ROLE) + { + if (!parseToUsers(pos, expected, to_users)) + return false; + } + } + + auto query = std::make_shared<ASTSetRoleQuery>(); + node = query; + + query->kind = kind; + query->roles = std::move(roles); + query->to_users = std::move(to_users); + + return true; +} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h index 54ced410d3..7e59f08e7b 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h @@ -1,18 +1,18 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ -/** Parses queries like - * SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]} - * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...] - */ -class ParserSetRoleQuery : public IParserBase -{ -protected: - const char * getName() const override { return "SET ROLE or SET DEFAULT ROLE query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; -} +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +/** Parses queries like + * SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]} + * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...] + */ +class ParserSetRoleQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SET ROLE or SET DEFAULT ROLE query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp index a30092ae71..d7d982efe2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp @@ -1,20 +1,20 @@ -#include <Parsers/ParserSettingsProfileElement.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionListParsers.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ASTSettingsProfileElement.h> -#include <Parsers/ASTLiteral.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/parseIdentifierOrStringLiteral.h> +#include <Parsers/ParserSettingsProfileElement.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionListParsers.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ASTSettingsProfileElement.h> +#include <Parsers/ASTLiteral.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/parseIdentifierOrStringLiteral.h> #include <boost/algorithm/string/predicate.hpp> - - -namespace DB -{ -namespace -{ - bool parseProfileKeyword(IParserBase::Pos & pos, Expected & expected, bool use_inherit_keyword) - { + + +namespace DB +{ +namespace +{ + bool parseProfileKeyword(IParserBase::Pos & pos, Expected & expected, bool use_inherit_keyword) + { if (ParserKeyword{"PROFILE"}.ignore(pos, expected)) return true; @@ -25,127 +25,127 @@ namespace } return false; - } - - - bool parseProfileNameOrID(IParserBase::Pos & pos, Expected & expected, bool id_mode, String & res) - { - return IParserBase::wrapParseImpl(pos, [&] - { - ASTPtr ast; - if (!id_mode) - return parseIdentifierOrStringLiteral(pos, expected, res); - - if (!ParserKeyword{"ID"}.ignore(pos, expected)) - return false; - if (!ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) - return false; - if (!ParserStringLiteral{}.parse(pos, ast, expected)) - return false; - String id = ast->as<ASTLiteral &>().value.safeGet<String>(); - if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - return false; - - res = std::move(id); - return true; - }); - } - - - bool parseValue(IParserBase::Pos & pos, Expected & expected, Field & res) - { - return IParserBase::wrapParseImpl(pos, [&] - { - if (!ParserToken{TokenType::Equals}.ignore(pos, expected)) - return false; - - ASTPtr ast; - if (!ParserLiteral{}.parse(pos, ast, expected)) - return false; - - res = ast->as<ASTLiteral &>().value; - return true; - }); - } - - - bool parseMinMaxValue(IParserBase::Pos & pos, Expected & expected, Field & min_value, Field & max_value) - { - return IParserBase::wrapParseImpl(pos, [&] - { - bool is_min_value = ParserKeyword{"MIN"}.ignore(pos, expected); - bool is_max_value = !is_min_value && ParserKeyword{"MAX"}.ignore(pos, expected); - if (!is_min_value && !is_max_value) - return false; - - ParserToken{TokenType::Equals}.ignore(pos, expected); - - ASTPtr ast; - if (!ParserLiteral{}.parse(pos, ast, expected)) - return false; - - auto min_or_max_value = ast->as<ASTLiteral &>().value; - - if (is_min_value) - min_value = min_or_max_value; - else - max_value = min_or_max_value; - return true; - }); - } - - - bool parseReadonlyOrWritableKeyword(IParserBase::Pos & pos, Expected & expected, std::optional<bool> & readonly) - { - return IParserBase::wrapParseImpl(pos, [&] - { - if (ParserKeyword{"READONLY"}.ignore(pos, expected)) - { - readonly = true; - return true; - } - else if (ParserKeyword{"WRITABLE"}.ignore(pos, expected)) - { - readonly = false; - return true; - } - else - return false; - }); - } - - - bool parseSettingNameWithValueOrConstraints( - IParserBase::Pos & pos, - Expected & expected, - String & setting_name, - Field & value, - Field & min_value, - Field & max_value, - std::optional<bool> & readonly) - { - return IParserBase::wrapParseImpl(pos, [&] - { - ASTPtr name_ast; + } + + + bool parseProfileNameOrID(IParserBase::Pos & pos, Expected & expected, bool id_mode, String & res) + { + return IParserBase::wrapParseImpl(pos, [&] + { + ASTPtr ast; + if (!id_mode) + return parseIdentifierOrStringLiteral(pos, expected, res); + + if (!ParserKeyword{"ID"}.ignore(pos, expected)) + return false; + if (!ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + return false; + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + String id = ast->as<ASTLiteral &>().value.safeGet<String>(); + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + + res = std::move(id); + return true; + }); + } + + + bool parseValue(IParserBase::Pos & pos, Expected & expected, Field & res) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserToken{TokenType::Equals}.ignore(pos, expected)) + return false; + + ASTPtr ast; + if (!ParserLiteral{}.parse(pos, ast, expected)) + return false; + + res = ast->as<ASTLiteral &>().value; + return true; + }); + } + + + bool parseMinMaxValue(IParserBase::Pos & pos, Expected & expected, Field & min_value, Field & max_value) + { + return IParserBase::wrapParseImpl(pos, [&] + { + bool is_min_value = ParserKeyword{"MIN"}.ignore(pos, expected); + bool is_max_value = !is_min_value && ParserKeyword{"MAX"}.ignore(pos, expected); + if (!is_min_value && !is_max_value) + return false; + + ParserToken{TokenType::Equals}.ignore(pos, expected); + + ASTPtr ast; + if (!ParserLiteral{}.parse(pos, ast, expected)) + return false; + + auto min_or_max_value = ast->as<ASTLiteral &>().value; + + if (is_min_value) + min_value = min_or_max_value; + else + max_value = min_or_max_value; + return true; + }); + } + + + bool parseReadonlyOrWritableKeyword(IParserBase::Pos & pos, Expected & expected, std::optional<bool> & readonly) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (ParserKeyword{"READONLY"}.ignore(pos, expected)) + { + readonly = true; + return true; + } + else if (ParserKeyword{"WRITABLE"}.ignore(pos, expected)) + { + readonly = false; + return true; + } + else + return false; + }); + } + + + bool parseSettingNameWithValueOrConstraints( + IParserBase::Pos & pos, + Expected & expected, + String & setting_name, + Field & value, + Field & min_value, + Field & max_value, + std::optional<bool> & readonly) + { + return IParserBase::wrapParseImpl(pos, [&] + { + ASTPtr name_ast; if (!ParserCompoundIdentifier{}.parse(pos, name_ast, expected)) - return false; - - String res_setting_name = getIdentifierName(name_ast); - Field res_value; - Field res_min_value; - Field res_max_value; - std::optional<bool> res_readonly; - - bool has_value_or_constraint = false; - while (parseValue(pos, expected, res_value) || parseMinMaxValue(pos, expected, res_min_value, res_max_value) - || parseReadonlyOrWritableKeyword(pos, expected, res_readonly)) - { - has_value_or_constraint = true; - } - - if (!has_value_or_constraint) - return false; - + return false; + + String res_setting_name = getIdentifierName(name_ast); + Field res_value; + Field res_min_value; + Field res_max_value; + std::optional<bool> res_readonly; + + bool has_value_or_constraint = false; + while (parseValue(pos, expected, res_value) || parseMinMaxValue(pos, expected, res_min_value, res_max_value) + || parseReadonlyOrWritableKeyword(pos, expected, res_readonly)) + { + has_value_or_constraint = true; + } + + if (!has_value_or_constraint) + return false; + if (boost::iequals(res_setting_name, "PROFILE") && res_value.isNull() && res_min_value.isNull() && res_max_value.isNull() && res_readonly) { @@ -155,96 +155,96 @@ namespace return false; } - setting_name = std::move(res_setting_name); - value = std::move(res_value); - min_value = std::move(res_min_value); - max_value = std::move(res_max_value); - readonly = res_readonly; - return true; - }); - } - - - bool parseSettingsProfileElement(IParserBase::Pos & pos, - Expected & expected, - bool id_mode, - bool use_inherit_keyword, - bool previous_element_was_parent_profile, - std::shared_ptr<ASTSettingsProfileElement> & result) - { - return IParserBase::wrapParseImpl(pos, [&] - { - String parent_profile; - String setting_name; - Field value; - Field min_value; - Field max_value; - std::optional<bool> readonly; - + setting_name = std::move(res_setting_name); + value = std::move(res_value); + min_value = std::move(res_min_value); + max_value = std::move(res_max_value); + readonly = res_readonly; + return true; + }); + } + + + bool parseSettingsProfileElement(IParserBase::Pos & pos, + Expected & expected, + bool id_mode, + bool use_inherit_keyword, + bool previous_element_was_parent_profile, + std::shared_ptr<ASTSettingsProfileElement> & result) + { + return IParserBase::wrapParseImpl(pos, [&] + { + String parent_profile; + String setting_name; + Field value; + Field min_value; + Field max_value; + std::optional<bool> readonly; + bool ok = parseSettingNameWithValueOrConstraints(pos, expected, setting_name, value, min_value, max_value, readonly); if (!ok && (parseProfileKeyword(pos, expected, use_inherit_keyword) || previous_element_was_parent_profile)) ok = parseProfileNameOrID(pos, expected, id_mode, parent_profile); if (!ok) - return false; - - result = std::make_shared<ASTSettingsProfileElement>(); - result->parent_profile = std::move(parent_profile); - result->setting_name = std::move(setting_name); - result->value = std::move(value); - result->min_value = std::move(min_value); - result->max_value = std::move(max_value); - result->readonly = readonly; - result->id_mode = id_mode; - result->use_inherit_keyword = use_inherit_keyword; - return true; - }); - } -} - - -bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - std::shared_ptr<ASTSettingsProfileElement> res; - if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, false, res)) - return false; - - node = res; - return true; -} - - -bool ParserSettingsProfileElements::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - std::vector<std::shared_ptr<ASTSettingsProfileElement>> elements; - - if (ParserKeyword{"NONE"}.ignore(pos, expected)) - { - } - else - { - bool previous_element_was_parent_profile = false; - - auto parse_element = [&] - { - std::shared_ptr<ASTSettingsProfileElement> element; - if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, previous_element_was_parent_profile, element)) - return false; - - elements.push_back(element); - previous_element_was_parent_profile = !element->parent_profile.empty(); - return true; - }; - - if (!ParserList::parseUtil(pos, expected, parse_element, false)) - return false; - } - - auto result = std::make_shared<ASTSettingsProfileElements>(); - result->elements = std::move(elements); - node = result; - return true; -} - -} + return false; + + result = std::make_shared<ASTSettingsProfileElement>(); + result->parent_profile = std::move(parent_profile); + result->setting_name = std::move(setting_name); + result->value = std::move(value); + result->min_value = std::move(min_value); + result->max_value = std::move(max_value); + result->readonly = readonly; + result->id_mode = id_mode; + result->use_inherit_keyword = use_inherit_keyword; + return true; + }); + } +} + + +bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::shared_ptr<ASTSettingsProfileElement> res; + if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, false, res)) + return false; + + node = res; + return true; +} + + +bool ParserSettingsProfileElements::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::vector<std::shared_ptr<ASTSettingsProfileElement>> elements; + + if (ParserKeyword{"NONE"}.ignore(pos, expected)) + { + } + else + { + bool previous_element_was_parent_profile = false; + + auto parse_element = [&] + { + std::shared_ptr<ASTSettingsProfileElement> element; + if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, previous_element_was_parent_profile, element)) + return false; + + elements.push_back(element); + previous_element_was_parent_profile = !element->parent_profile.empty(); + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_element, false)) + return false; + } + + auto result = std::make_shared<ASTSettingsProfileElements>(); + result->elements = std::move(elements); + node = result; + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h index a54adc4f88..8843591a56 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h @@ -1,42 +1,42 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ -/** Parses a string like this: - * {variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name' - */ -class ParserSettingsProfileElement : public IParserBase -{ -public: - ParserSettingsProfileElement & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } - ParserSettingsProfileElement & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; } - -protected: - const char * getName() const override { return "SettingsProfileElement"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - bool id_mode = false; - bool use_inherit_keyword = false; -}; - - -class ParserSettingsProfileElements : public IParserBase -{ -public: - ParserSettingsProfileElements & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } - ParserSettingsProfileElements & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; } - -protected: - const char * getName() const override { return "SettingsProfileElements"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - bool id_mode = false; - bool use_inherit_keyword = false; -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +/** Parses a string like this: + * {variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name' + */ +class ParserSettingsProfileElement : public IParserBase +{ +public: + ParserSettingsProfileElement & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } + ParserSettingsProfileElement & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; } + +protected: + const char * getName() const override { return "SettingsProfileElement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool id_mode = false; + bool use_inherit_keyword = false; +}; + + +class ParserSettingsProfileElements : public IParserBase +{ +public: + ParserSettingsProfileElements & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } + ParserSettingsProfileElements & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; } + +protected: + const char * getName() const override { return "SettingsProfileElements"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool id_mode = false; + bool use_inherit_keyword = false; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h index fe257168dc..b6483aa3d4 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h @@ -1,32 +1,32 @@ -#pragma once - -#include <Parsers/IParserBase.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ASTShowAccessQuery.h> - - -namespace DB -{ - -/** Query SHOW ACCESS - */ -class ParserShowAccessQuery : public IParserBase -{ -protected: - const char * getName() const override { return "SHOW ACCESS query"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - auto query = std::make_shared<ASTShowAccessQuery>(); - - if (!ParserKeyword("SHOW ACCESS").ignore(pos, expected)) - return false; - - node = query; - - return true; - } -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ASTShowAccessQuery.h> + + +namespace DB +{ + +/** Query SHOW ACCESS + */ +class ParserShowAccessQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW ACCESS query"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + auto query = std::make_shared<ASTShowAccessQuery>(); + + if (!ParserKeyword("SHOW ACCESS").ignore(pos, expected)) + return false; + + node = query; + + return true; + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp index b5ef45122d..bd9e401277 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp @@ -1,40 +1,40 @@ -#include <Parsers/ParserShowGrantsQuery.h> -#include <Parsers/ParserRolesOrUsersSet.h> -#include <Parsers/ASTRolesOrUsersSet.h> -#include <Parsers/ASTShowGrantsQuery.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/parseUserName.h> - - -namespace DB -{ -bool ParserShowGrantsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - if (!ParserKeyword{"SHOW GRANTS"}.ignore(pos, expected)) - return false; - - std::shared_ptr<ASTRolesOrUsersSet> for_roles; - - if (ParserKeyword{"FOR"}.ignore(pos, expected)) - { - ASTPtr for_roles_ast; - ParserRolesOrUsersSet for_roles_p; +#include <Parsers/ParserShowGrantsQuery.h> +#include <Parsers/ParserRolesOrUsersSet.h> +#include <Parsers/ASTRolesOrUsersSet.h> +#include <Parsers/ASTShowGrantsQuery.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/parseUserName.h> + + +namespace DB +{ +bool ParserShowGrantsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword{"SHOW GRANTS"}.ignore(pos, expected)) + return false; + + std::shared_ptr<ASTRolesOrUsersSet> for_roles; + + if (ParserKeyword{"FOR"}.ignore(pos, expected)) + { + ASTPtr for_roles_ast; + ParserRolesOrUsersSet for_roles_p; for_roles_p.allowUsers().allowRoles().allowAll().allowCurrentUser(); - if (!for_roles_p.parse(pos, for_roles_ast, expected)) - return false; - - for_roles = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(for_roles_ast); - } - else - { - for_roles = std::make_shared<ASTRolesOrUsersSet>(); - for_roles->current_user = true; - } - - auto query = std::make_shared<ASTShowGrantsQuery>(); - query->for_roles = std::move(for_roles); - node = query; - - return true; -} -} + if (!for_roles_p.parse(pos, for_roles_ast, expected)) + return false; + + for_roles = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(for_roles_ast); + } + else + { + for_roles = std::make_shared<ASTRolesOrUsersSet>(); + for_roles->current_user = true; + } + + auto query = std::make_shared<ASTShowGrantsQuery>(); + query->for_roles = std::move(for_roles); + node = query; + + return true; +} +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h index bfb1afb8ca..88409b5b7e 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h @@ -1,17 +1,17 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ -/** Parses queries like - * SHOW GRANTS [FOR user_name] - */ -class ParserShowGrantsQuery : public IParserBase -{ -protected: - const char * getName() const override { return "SHOW GRANTS query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; -} +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +/** Parses queries like + * SHOW GRANTS [FOR user_name] + */ +class ParserShowGrantsQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW GRANTS query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp index de110f617f..56b4327dcc 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp @@ -1,21 +1,21 @@ -#include <Parsers/ParserShowPrivilegesQuery.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ASTShowPrivilegesQuery.h> - - -namespace DB -{ - -bool ParserShowPrivilegesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto query = std::make_shared<ASTShowPrivilegesQuery>(); - - if (!ParserKeyword("SHOW PRIVILEGES").ignore(pos, expected)) - return false; - - node = query; - - return true; -} - -} +#include <Parsers/ParserShowPrivilegesQuery.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ASTShowPrivilegesQuery.h> + + +namespace DB +{ + +bool ParserShowPrivilegesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto query = std::make_shared<ASTShowPrivilegesQuery>(); + + if (!ParserKeyword("SHOW PRIVILEGES").ignore(pos, expected)) + return false; + + node = query; + + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h index 38aa76e7ea..2604e7f28c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h @@ -1,18 +1,18 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ - -/** Query SHOW PRIVILEGES - */ -class ParserShowPrivilegesQuery : public IParserBase -{ -protected: - const char * getName() const override { return "SHOW PRIVILEGES query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ + +/** Query SHOW PRIVILEGES + */ +class ParserShowPrivilegesQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW PRIVILEGES query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h index 0eec1d4c90..de08894e05 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h @@ -1,32 +1,32 @@ -#pragma once - -#include <Parsers/IParserBase.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ASTShowProcesslistQuery.h> - - -namespace DB -{ - -/** Query SHOW PROCESSLIST - */ -class ParserShowProcesslistQuery : public IParserBase -{ -protected: - const char * getName() const override { return "SHOW PROCESSLIST query"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - auto query = std::make_shared<ASTShowProcesslistQuery>(); - - if (!ParserKeyword("SHOW PROCESSLIST").ignore(pos, expected)) - return false; - - node = query; - - return true; - } -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ASTShowProcesslistQuery.h> + + +namespace DB +{ + +/** Query SHOW PROCESSLIST + */ +class ParserShowProcesslistQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW PROCESSLIST query"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + auto query = std::make_shared<ASTShowProcesslistQuery>(); + + if (!ParserKeyword("SHOW PROCESSLIST").ignore(pos, expected)) + return false; + + node = query; + + return true; + } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp index 763c60f78a..e8cf732d09 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp @@ -1,53 +1,53 @@ -#include <Parsers/ASTLiteral.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ASTShowTablesQuery.h> - -#include <Parsers/CommonParsers.h> -#include <Parsers/ParserShowTablesQuery.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ExpressionListParsers.h> -#include <Parsers/parseIdentifierOrStringLiteral.h> - -#include <Common/typeid_cast.h> - - -namespace DB -{ - - -bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_show("SHOW"); - ParserKeyword s_temporary("TEMPORARY"); - ParserKeyword s_tables("TABLES"); - ParserKeyword s_databases("DATABASES"); - ParserKeyword s_clusters("CLUSTERS"); - ParserKeyword s_cluster("CLUSTER"); - ParserKeyword s_dictionaries("DICTIONARIES"); +#include <Parsers/ASTLiteral.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTShowTablesQuery.h> + +#include <Parsers/CommonParsers.h> +#include <Parsers/ParserShowTablesQuery.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ExpressionListParsers.h> +#include <Parsers/parseIdentifierOrStringLiteral.h> + +#include <Common/typeid_cast.h> + + +namespace DB +{ + + +bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_show("SHOW"); + ParserKeyword s_temporary("TEMPORARY"); + ParserKeyword s_tables("TABLES"); + ParserKeyword s_databases("DATABASES"); + ParserKeyword s_clusters("CLUSTERS"); + ParserKeyword s_cluster("CLUSTER"); + ParserKeyword s_dictionaries("DICTIONARIES"); ParserKeyword s_settings("SETTINGS"); ParserKeyword s_changed("CHANGED"); - ParserKeyword s_from("FROM"); - ParserKeyword s_in("IN"); - ParserKeyword s_not("NOT"); - ParserKeyword s_like("LIKE"); - ParserKeyword s_ilike("ILIKE"); - ParserKeyword s_where("WHERE"); - ParserKeyword s_limit("LIMIT"); - ParserStringLiteral like_p; - ParserIdentifier name_p; - ParserExpressionWithOptionalAlias exp_elem(false); - - ASTPtr like; - ASTPtr database; - - auto query = std::make_shared<ASTShowTablesQuery>(); - - if (!s_show.ignore(pos, expected)) - return false; - + ParserKeyword s_from("FROM"); + ParserKeyword s_in("IN"); + ParserKeyword s_not("NOT"); + ParserKeyword s_like("LIKE"); + ParserKeyword s_ilike("ILIKE"); + ParserKeyword s_where("WHERE"); + ParserKeyword s_limit("LIMIT"); + ParserStringLiteral like_p; + ParserIdentifier name_p; + ParserExpressionWithOptionalAlias exp_elem(false); + + ASTPtr like; + ASTPtr database; + + auto query = std::make_shared<ASTShowTablesQuery>(); + + if (!s_show.ignore(pos, expected)) + return false; + if (s_databases.ignore(pos, expected)) - { - query->databases = true; + { + query->databases = true; if (s_not.ignore(pos, expected)) query->not_like = true; @@ -67,40 +67,40 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!exp_elem.parse(pos, query->limit_length, expected)) return false; } - } + } else if (s_clusters.ignore(pos, expected)) - { - query->clusters = true; - - if (s_not.ignore(pos, expected)) - query->not_like = true; - - if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) - { - if (insensitive) - query->case_insensitive_like = true; - - if (!like_p.parse(pos, like, expected)) - return false; - } - else if (query->not_like) - return false; - if (s_limit.ignore(pos, expected)) - { - if (!exp_elem.parse(pos, query->limit_length, expected)) - return false; - } - } + { + query->clusters = true; + + if (s_not.ignore(pos, expected)) + query->not_like = true; + + if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) + { + if (insensitive) + query->case_insensitive_like = true; + + if (!like_p.parse(pos, like, expected)) + return false; + } + else if (query->not_like) + return false; + if (s_limit.ignore(pos, expected)) + { + if (!exp_elem.parse(pos, query->limit_length, expected)) + return false; + } + } else if (s_cluster.ignore(pos, expected)) - { - query->cluster = true; - - String cluster_str; - if (!parseIdentifierOrStringLiteral(pos, expected, cluster_str)) - return false; - - query->cluster_str = std::move(cluster_str); - } + { + query->cluster = true; + + String cluster_str; + if (!parseIdentifierOrStringLiteral(pos, expected, cluster_str)) + return false; + + query->cluster_str = std::move(cluster_str); + } else if (bool changed = s_changed.ignore(pos, expected); changed || s_settings.ignore(pos, expected)) { query->m_settings = true; @@ -124,60 +124,60 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec else return false; } - else - { - if (s_temporary.ignore(pos)) - query->temporary = true; - - if (!s_tables.ignore(pos, expected)) - { - if (s_dictionaries.ignore(pos, expected)) - query->dictionaries = true; - else - return false; - } - - if (s_from.ignore(pos, expected) || s_in.ignore(pos, expected)) - { - if (!name_p.parse(pos, database, expected)) - return false; - } - - if (s_not.ignore(pos, expected)) - query->not_like = true; - - if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) - { - if (insensitive) - query->case_insensitive_like = true; - - if (!like_p.parse(pos, like, expected)) - return false; - } - else if (query->not_like) - return false; - else if (s_where.ignore(pos, expected)) - { - if (!exp_elem.parse(pos, query->where_expression, expected)) - return false; - } - - if (s_limit.ignore(pos, expected)) - { - if (!exp_elem.parse(pos, query->limit_length, expected)) - return false; - } - } - - tryGetIdentifierNameInto(database, query->from); - - if (like) - query->like = safeGet<const String &>(like->as<ASTLiteral &>().value); - - node = query; - - return true; -} - - -} + else + { + if (s_temporary.ignore(pos)) + query->temporary = true; + + if (!s_tables.ignore(pos, expected)) + { + if (s_dictionaries.ignore(pos, expected)) + query->dictionaries = true; + else + return false; + } + + if (s_from.ignore(pos, expected) || s_in.ignore(pos, expected)) + { + if (!name_p.parse(pos, database, expected)) + return false; + } + + if (s_not.ignore(pos, expected)) + query->not_like = true; + + if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) + { + if (insensitive) + query->case_insensitive_like = true; + + if (!like_p.parse(pos, like, expected)) + return false; + } + else if (query->not_like) + return false; + else if (s_where.ignore(pos, expected)) + { + if (!exp_elem.parse(pos, query->where_expression, expected)) + return false; + } + + if (s_limit.ignore(pos, expected)) + { + if (!exp_elem.parse(pos, query->limit_length, expected)) + return false; + } + } + + tryGetIdentifierNameInto(database, query->from); + + if (like) + query->like = safeGet<const String &>(like->as<ASTLiteral &>().value); + + node = query; + + return true; +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h index 70aa18333f..3b8bb03327 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h @@ -1,21 +1,21 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ - -/** Query like this: - * SHOW TABLES [FROM db] [[NOT] [I]LIKE 'str'] [LIMIT expr] - * or - * SHOW DATABASES. - */ -class ParserShowTablesQuery : public IParserBase -{ -protected: - const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ + +/** Query like this: + * SHOW TABLES [FROM db] [[NOT] [I]LIKE 'str'] [LIMIT expr] + * or + * SHOW DATABASES. + */ +class ParserShowTablesQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp index e52323ac3f..66bd39e020 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp @@ -1,20 +1,20 @@ -#include <Parsers/ParserSystemQuery.h> -#include <Parsers/ASTSystemQuery.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ASTLiteral.h> -#include <Parsers/parseDatabaseAndTableName.h> - - -namespace ErrorCodes -{ -} - - -namespace DB -{ - +#include <Parsers/ParserSystemQuery.h> +#include <Parsers/ASTSystemQuery.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTLiteral.h> +#include <Parsers/parseDatabaseAndTableName.h> + + +namespace ErrorCodes +{ +} + + +namespace DB +{ + static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr<ASTSystemQuery> & res, IParser::Pos & pos, Expected & expected, bool require_table, bool allow_string_literal) { @@ -23,7 +23,7 @@ static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr<ASTSystemQuery> /// Need to support both String cluster; bool parsed_on_cluster = false; - + if (ParserKeyword{"ON"}.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) @@ -57,37 +57,37 @@ static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr<ASTSystemQuery> return true; } -bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) -{ - if (!ParserKeyword{"SYSTEM"}.ignore(pos, expected)) - return false; - - using Type = ASTSystemQuery::Type; - - auto res = std::make_shared<ASTSystemQuery>(); - - bool found = false; - for (int i = static_cast<int>(Type::UNKNOWN) + 1; i < static_cast<int>(Type::END); ++i) - { - Type t = static_cast<Type>(i); - if (ParserKeyword{ASTSystemQuery::typeToString(t)}.ignore(pos, expected)) - { - res->type = t; - found = true; - } - } - - if (!found) - return false; - - switch (res->type) - { - case Type::RELOAD_DICTIONARY: - { +bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword{"SYSTEM"}.ignore(pos, expected)) + return false; + + using Type = ASTSystemQuery::Type; + + auto res = std::make_shared<ASTSystemQuery>(); + + bool found = false; + for (int i = static_cast<int>(Type::UNKNOWN) + 1; i < static_cast<int>(Type::END); ++i) + { + Type t = static_cast<Type>(i); + if (ParserKeyword{ASTSystemQuery::typeToString(t)}.ignore(pos, expected)) + { + res->type = t; + found = true; + } + } + + if (!found) + return false; + + switch (res->type) + { + case Type::RELOAD_DICTIONARY: + { if (!parseQueryWithOnClusterAndMaybeTable(res, pos, expected, /* require table = */ true, /* allow_string_literal = */ true)) - return false; - break; - } + return false; + break; + } case Type::RELOAD_MODEL: { String cluster_str; @@ -107,7 +107,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ParserIdentifier model_parser; ASTPtr model; String target_model; - + if (!model_parser.parse(pos, model, expected)) return false; @@ -117,54 +117,54 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; } - case Type::DROP_REPLICA: - { - ASTPtr ast; - if (!ParserStringLiteral{}.parse(pos, ast, expected)) - return false; - res->replica = ast->as<ASTLiteral &>().value.safeGet<String>(); - if (ParserKeyword{"FROM"}.ignore(pos, expected)) - { - // way 1. parse replica database - // way 2. parse replica tables - // way 3. parse replica zkpath - if (ParserKeyword{"DATABASE"}.ignore(pos, expected)) - { - ParserIdentifier database_parser; - ASTPtr database; - if (!database_parser.parse(pos, database, expected)) - return false; - tryGetIdentifierNameInto(database, res->database); - } - else if (ParserKeyword{"TABLE"}.ignore(pos, expected)) - { - parseDatabaseAndTableName(pos, expected, res->database, res->table); - } - else if (ParserKeyword{"ZKPATH"}.ignore(pos, expected)) - { - ASTPtr path_ast; - if (!ParserStringLiteral{}.parse(pos, path_ast, expected)) - return false; - String zk_path = path_ast->as<ASTLiteral &>().value.safeGet<String>(); - if (!zk_path.empty() && zk_path[zk_path.size() - 1] == '/') - zk_path.pop_back(); - res->replica_zk_path = zk_path; - } - else - return false; - } - else - res->is_drop_whole_replica = true; - - break; - } - - case Type::RESTART_REPLICA: - case Type::SYNC_REPLICA: - if (!parseDatabaseAndTableName(pos, expected, res->database, res->table)) - return false; - break; - + case Type::DROP_REPLICA: + { + ASTPtr ast; + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + res->replica = ast->as<ASTLiteral &>().value.safeGet<String>(); + if (ParserKeyword{"FROM"}.ignore(pos, expected)) + { + // way 1. parse replica database + // way 2. parse replica tables + // way 3. parse replica zkpath + if (ParserKeyword{"DATABASE"}.ignore(pos, expected)) + { + ParserIdentifier database_parser; + ASTPtr database; + if (!database_parser.parse(pos, database, expected)) + return false; + tryGetIdentifierNameInto(database, res->database); + } + else if (ParserKeyword{"TABLE"}.ignore(pos, expected)) + { + parseDatabaseAndTableName(pos, expected, res->database, res->table); + } + else if (ParserKeyword{"ZKPATH"}.ignore(pos, expected)) + { + ASTPtr path_ast; + if (!ParserStringLiteral{}.parse(pos, path_ast, expected)) + return false; + String zk_path = path_ast->as<ASTLiteral &>().value.safeGet<String>(); + if (!zk_path.empty() && zk_path[zk_path.size() - 1] == '/') + zk_path.pop_back(); + res->replica_zk_path = zk_path; + } + else + return false; + } + else + res->is_drop_whole_replica = true; + + break; + } + + case Type::RESTART_REPLICA: + case Type::SYNC_REPLICA: + if (!parseDatabaseAndTableName(pos, expected, res->database, res->table)) + return false; + break; + case Type::RESTART_DISK: { ASTPtr ast; @@ -178,24 +178,24 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & /// FLUSH DISTRIBUTED requires table /// START/STOP DISTRIBUTED SENDS does not require table - case Type::STOP_DISTRIBUTED_SENDS: - case Type::START_DISTRIBUTED_SENDS: + case Type::STOP_DISTRIBUTED_SENDS: + case Type::START_DISTRIBUTED_SENDS: { if (!parseQueryWithOnClusterAndMaybeTable(res, pos, expected, /* require table = */ false, /* allow_string_literal = */ false)) return false; break; } - case Type::FLUSH_DISTRIBUTED: + case Type::FLUSH_DISTRIBUTED: case Type::RESTORE_REPLICA: - { + { if (!parseQueryWithOnClusterAndMaybeTable(res, pos, expected, /* require table = */ true, /* allow_string_literal = */ false)) return false; - break; - } - - case Type::STOP_MERGES: - case Type::START_MERGES: + break; + } + + case Type::STOP_MERGES: + case Type::START_MERGES: { String storage_policy_str; String volume_str; @@ -223,19 +223,19 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; } - case Type::STOP_TTL_MERGES: - case Type::START_TTL_MERGES: - case Type::STOP_MOVES: - case Type::START_MOVES: - case Type::STOP_FETCHES: - case Type::START_FETCHES: - case Type::STOP_REPLICATED_SENDS: - case Type::START_REPLICATED_SENDS: - case Type::STOP_REPLICATION_QUEUES: - case Type::START_REPLICATION_QUEUES: - parseDatabaseAndTableName(pos, expected, res->database, res->table); - break; - + case Type::STOP_TTL_MERGES: + case Type::START_TTL_MERGES: + case Type::STOP_MOVES: + case Type::START_MOVES: + case Type::STOP_FETCHES: + case Type::START_FETCHES: + case Type::STOP_REPLICATED_SENDS: + case Type::START_REPLICATED_SENDS: + case Type::STOP_REPLICATION_QUEUES: + case Type::START_REPLICATION_QUEUES: + parseDatabaseAndTableName(pos, expected, res->database, res->table); + break; + case Type::SUSPEND: { ASTPtr seconds; @@ -250,13 +250,13 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; } - default: - /// There are no [db.table] after COMMAND NAME - break; - } - - node = std::move(res); - return true; -} - -} + default: + /// There are no [db.table] after COMMAND NAME + break; + } + + node = std::move(res); + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h index 3c4c5e1387..5f947e63b9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h @@ -1,16 +1,16 @@ -#pragma once -#include <Parsers/IParserBase.h> - - -namespace DB -{ - - -class ParserSystemQuery : public IParserBase -{ -protected: - const char * getName() const override { return "SYSTEM query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once +#include <Parsers/IParserBase.h> + + +namespace DB +{ + + +class ParserSystemQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SYSTEM query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp index f2281e8f9c..30be37bc4a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp @@ -1,42 +1,42 @@ -#include <Parsers/ASTIdentifier.h> -#include <Parsers/TablePropertiesQueriesASTs.h> - -#include <Parsers/CommonParsers.h> -#include <Parsers/ParserTablePropertiesQuery.h> - -#include <Common/typeid_cast.h> - - -namespace DB -{ - - -bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_exists("EXISTS"); - ParserKeyword s_temporary("TEMPORARY"); - ParserKeyword s_describe("DESCRIBE"); - ParserKeyword s_desc("DESC"); - ParserKeyword s_show("SHOW"); - ParserKeyword s_create("CREATE"); - ParserKeyword s_database("DATABASE"); - ParserKeyword s_table("TABLE"); +#include <Parsers/ASTIdentifier.h> +#include <Parsers/TablePropertiesQueriesASTs.h> + +#include <Parsers/CommonParsers.h> +#include <Parsers/ParserTablePropertiesQuery.h> + +#include <Common/typeid_cast.h> + + +namespace DB +{ + + +bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_exists("EXISTS"); + ParserKeyword s_temporary("TEMPORARY"); + ParserKeyword s_describe("DESCRIBE"); + ParserKeyword s_desc("DESC"); + ParserKeyword s_show("SHOW"); + ParserKeyword s_create("CREATE"); + ParserKeyword s_database("DATABASE"); + ParserKeyword s_table("TABLE"); ParserKeyword s_view("VIEW"); - ParserKeyword s_dictionary("DICTIONARY"); - ParserToken s_dot(TokenType::Dot); - ParserIdentifier name_p; - - ASTPtr database; - ASTPtr table; - std::shared_ptr<ASTQueryWithTableAndOutput> query; - - bool parse_only_database_name = false; + ParserKeyword s_dictionary("DICTIONARY"); + ParserToken s_dot(TokenType::Dot); + ParserIdentifier name_p; + + ASTPtr database; + ASTPtr table; + std::shared_ptr<ASTQueryWithTableAndOutput> query; + + bool parse_only_database_name = false; bool parse_show_create_view = false; bool exists_view = false; - - bool temporary = false; - if (s_exists.ignore(pos, expected)) - { + + bool temporary = false; + if (s_exists.ignore(pos, expected)) + { if (s_database.ignore(pos, expected)) { query = std::make_shared<ASTExistsDatabaseQuery>(); @@ -51,7 +51,7 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & { if (s_temporary.ignore(pos, expected)) temporary = true; - + if (s_table.checkWithoutMoving(pos, expected)) query = std::make_shared<ASTExistsTableQuery>(); else if (s_dictionary.checkWithoutMoving(pos, expected)) @@ -59,64 +59,64 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & else query = std::make_shared<ASTExistsTableQuery>(); } - } - else if (s_show.ignore(pos, expected)) - { - if (!s_create.ignore(pos, expected)) - return false; - - if (s_database.ignore(pos, expected)) - { - parse_only_database_name = true; - query = std::make_shared<ASTShowCreateDatabaseQuery>(); - } - else if (s_dictionary.checkWithoutMoving(pos, expected)) - query = std::make_shared<ASTShowCreateDictionaryQuery>(); + } + else if (s_show.ignore(pos, expected)) + { + if (!s_create.ignore(pos, expected)) + return false; + + if (s_database.ignore(pos, expected)) + { + parse_only_database_name = true; + query = std::make_shared<ASTShowCreateDatabaseQuery>(); + } + else if (s_dictionary.checkWithoutMoving(pos, expected)) + query = std::make_shared<ASTShowCreateDictionaryQuery>(); else if (s_view.ignore(pos, expected)) { query = std::make_shared<ASTShowCreateViewQuery>(); parse_show_create_view = true; } - else - query = std::make_shared<ASTShowCreateTableQuery>(); - } - else - { - return false; - } - - if (parse_only_database_name) - { - if (!name_p.parse(pos, database, expected)) - return false; - } - else - { + else + query = std::make_shared<ASTShowCreateTableQuery>(); + } + else + { + return false; + } + + if (parse_only_database_name) + { + if (!name_p.parse(pos, database, expected)) + return false; + } + else + { if (!(exists_view || parse_show_create_view)) { if (temporary || s_temporary.ignore(pos, expected)) query->temporary = true; - + if (!s_table.ignore(pos, expected)) s_dictionary.ignore(pos, expected); } - if (!name_p.parse(pos, table, expected)) - return false; - if (s_dot.ignore(pos, expected)) - { - database = table; - if (!name_p.parse(pos, table, expected)) - return false; - } - } - - tryGetIdentifierNameInto(database, query->database); - tryGetIdentifierNameInto(table, query->table); - - node = query; - - return true; -} - - -} + if (!name_p.parse(pos, table, expected)) + return false; + if (s_dot.ignore(pos, expected)) + { + database = table; + if (!name_p.parse(pos, table, expected)) + return false; + } + } + + tryGetIdentifierNameInto(database, query->database); + tryGetIdentifierNameInto(table, query->table); + + node = query; + + return true; +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h index 8d9513755a..8d2c26d34a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h @@ -1,19 +1,19 @@ -#pragma once - -#include <Parsers/IParserBase.h> -#include <Parsers/ExpressionElementParsers.h> - - -namespace DB -{ - +#pragma once + +#include <Parsers/IParserBase.h> +#include <Parsers/ExpressionElementParsers.h> + + +namespace DB +{ + /** Query (EXISTS | SHOW CREATE) [DATABASE|TABLE|DICTIONARY] [db.]name [FORMAT format] - */ -class ParserTablePropertiesQuery : public IParserBase -{ -protected: - const char * getName() const override { return "EXISTS or SHOW CREATE query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} + */ +class ParserTablePropertiesQuery : public IParserBase +{ +protected: + const char * getName() const override { return "EXISTS or SHOW CREATE query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp index e24de9942c..a71fa17ab7 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp @@ -1,30 +1,30 @@ -#include <Parsers/ParserUseQuery.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ASTUseQuery.h> - - -namespace DB -{ - -bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_use("USE"); - ParserIdentifier name_p; - - if (!s_use.ignore(pos, expected)) - return false; - - ASTPtr database; - if (!name_p.parse(pos, database, expected)) - return false; - - auto query = std::make_shared<ASTUseQuery>(); - tryGetIdentifierNameInto(database, query->database); - node = query; - - return true; -} - -} +#include <Parsers/ParserUseQuery.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ASTUseQuery.h> + + +namespace DB +{ + +bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_use("USE"); + ParserIdentifier name_p; + + if (!s_use.ignore(pos, expected)) + return false; + + ASTPtr database; + if (!name_p.parse(pos, database, expected)) + return false; + + auto query = std::make_shared<ASTUseQuery>(); + tryGetIdentifierNameInto(database, query->database); + node = query; + + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h index 197a4e828b..f5b0be7a86 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h @@ -1,18 +1,18 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ - -/** Query USE db - */ -class ParserUseQuery : public IParserBase -{ -protected: - const char * getName() const override{ return "USE query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ + +/** Query USE db + */ +class ParserUseQuery : public IParserBase +{ +protected: + const char * getName() const override{ return "USE query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp index 1e678f69da..9cb4bb6fc9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp @@ -1,77 +1,77 @@ -#include <Parsers/ParserUserNameWithHost.h> -#include <Parsers/ASTUserNameWithHost.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ExpressionListParsers.h> -#include <Parsers/parseIdentifierOrStringLiteral.h> -#include <boost/algorithm/string.hpp> - - -namespace DB -{ -namespace -{ - bool parseUserNameWithHost(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTUserNameWithHost> & ast) - { - return IParserBase::wrapParseImpl(pos, [&] - { - String base_name; - if (!parseIdentifierOrStringLiteral(pos, expected, base_name)) - return false; - - boost::algorithm::trim(base_name); - - String host_pattern; - if (ParserToken{TokenType::At}.ignore(pos, expected)) - { - if (!parseIdentifierOrStringLiteral(pos, expected, host_pattern)) - return false; - - boost::algorithm::trim(host_pattern); - if (host_pattern == "%") - host_pattern.clear(); - } - - ast = std::make_shared<ASTUserNameWithHost>(); - ast->base_name = std::move(base_name); - ast->host_pattern = std::move(host_pattern); - return true; - }); - } -} - - -bool ParserUserNameWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - std::shared_ptr<ASTUserNameWithHost> res; - if (!parseUserNameWithHost(pos, expected, res)) - return false; - - node = res; - return true; -} - - -bool ParserUserNamesWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - std::vector<std::shared_ptr<ASTUserNameWithHost>> names; - - auto parse_single_name = [&] - { - std::shared_ptr<ASTUserNameWithHost> ast; - if (!parseUserNameWithHost(pos, expected, ast)) - return false; - - names.emplace_back(std::move(ast)); - return true; - }; - - if (!ParserList::parseUtil(pos, expected, parse_single_name, false)) - return false; - - auto result = std::make_shared<ASTUserNamesWithHost>(); - result->names = std::move(names); - node = result; - return true; -} - -} +#include <Parsers/ParserUserNameWithHost.h> +#include <Parsers/ASTUserNameWithHost.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ExpressionListParsers.h> +#include <Parsers/parseIdentifierOrStringLiteral.h> +#include <boost/algorithm/string.hpp> + + +namespace DB +{ +namespace +{ + bool parseUserNameWithHost(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTUserNameWithHost> & ast) + { + return IParserBase::wrapParseImpl(pos, [&] + { + String base_name; + if (!parseIdentifierOrStringLiteral(pos, expected, base_name)) + return false; + + boost::algorithm::trim(base_name); + + String host_pattern; + if (ParserToken{TokenType::At}.ignore(pos, expected)) + { + if (!parseIdentifierOrStringLiteral(pos, expected, host_pattern)) + return false; + + boost::algorithm::trim(host_pattern); + if (host_pattern == "%") + host_pattern.clear(); + } + + ast = std::make_shared<ASTUserNameWithHost>(); + ast->base_name = std::move(base_name); + ast->host_pattern = std::move(host_pattern); + return true; + }); + } +} + + +bool ParserUserNameWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::shared_ptr<ASTUserNameWithHost> res; + if (!parseUserNameWithHost(pos, expected, res)) + return false; + + node = res; + return true; +} + + +bool ParserUserNamesWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::vector<std::shared_ptr<ASTUserNameWithHost>> names; + + auto parse_single_name = [&] + { + std::shared_ptr<ASTUserNameWithHost> ast; + if (!parseUserNameWithHost(pos, expected, ast)) + return false; + + names.emplace_back(std::move(ast)); + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_single_name, false)) + return false; + + auto result = std::make_shared<ASTUserNamesWithHost>(); + result->names = std::move(names); + node = result; + return true; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h index 9c4f591742..453b816a98 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h @@ -1,26 +1,26 @@ -#pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ -/** Parses a user name. - * It can be a simple string or identifier or something like `name@host`. - */ -class ParserUserNameWithHost : public IParserBase -{ -protected: - const char * getName() const override { return "UserNameWithHost"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserUserNamesWithHost : public IParserBase -{ -protected: - const char * getName() const override { return "UserNamesWithHost"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +/** Parses a user name. + * It can be a simple string or identifier or something like `name@host`. + */ +class ParserUserNameWithHost : public IParserBase +{ +protected: + const char * getName() const override { return "UserNameWithHost"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +class ParserUserNamesWithHost : public IParserBase +{ +protected: + const char * getName() const override { return "UserNamesWithHost"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp index 2c9b37226e..046a840e87 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp @@ -1,66 +1,66 @@ -#include <Parsers/ASTLiteral.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/ASTWatchQuery.h> -#include <Parsers/CommonParsers.h> -#include <Parsers/ParserWatchQuery.h> -#include <Parsers/ExpressionElementParsers.h> - - -namespace DB -{ - -bool ParserWatchQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_watch("WATCH"); - ParserToken s_dot(TokenType::Dot); - ParserIdentifier name_p; - ParserKeyword s_events("EVENTS"); - ParserKeyword s_limit("LIMIT"); - - ASTPtr database; - ASTPtr table; - auto query = std::make_shared<ASTWatchQuery>(); - - if (!s_watch.ignore(pos, expected)) - { - return false; - } - - if (!name_p.parse(pos, table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) - { - database = table; - if (!name_p.parse(pos, table, expected)) - return false; - } - - /// EVENTS - if (s_events.ignore(pos, expected)) - { - query->is_watch_events = true; - } - - /// LIMIT length - if (s_limit.ignore(pos, expected)) - { - ParserNumber num; - - if (!num.parse(pos, query->limit_length, expected)) - return false; - } - - if (database) - query->database = getIdentifierName(database); - - if (table) - query->table = getIdentifierName(table); - - node = query; - - return true; -} - - -} +#include <Parsers/ASTLiteral.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTWatchQuery.h> +#include <Parsers/CommonParsers.h> +#include <Parsers/ParserWatchQuery.h> +#include <Parsers/ExpressionElementParsers.h> + + +namespace DB +{ + +bool ParserWatchQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_watch("WATCH"); + ParserToken s_dot(TokenType::Dot); + ParserIdentifier name_p; + ParserKeyword s_events("EVENTS"); + ParserKeyword s_limit("LIMIT"); + + ASTPtr database; + ASTPtr table; + auto query = std::make_shared<ASTWatchQuery>(); + + if (!s_watch.ignore(pos, expected)) + { + return false; + } + + if (!name_p.parse(pos, table, expected)) + return false; + + if (s_dot.ignore(pos, expected)) + { + database = table; + if (!name_p.parse(pos, table, expected)) + return false; + } + + /// EVENTS + if (s_events.ignore(pos, expected)) + { + query->is_watch_events = true; + } + + /// LIMIT length + if (s_limit.ignore(pos, expected)) + { + ParserNumber num; + + if (!num.parse(pos, query->limit_length, expected)) + return false; + } + + if (database) + query->database = getIdentifierName(database); + + if (table) + query->table = getIdentifierName(table); + + node = query; + + return true; +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h index 0918b5c65c..63097eba67 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h @@ -1,19 +1,19 @@ #pragma once - -#include <Parsers/IParserBase.h> - - -namespace DB -{ - -/** Query like this: - * WATCH [db.]table EVENTS - */ -class ParserWatchQuery : public IParserBase -{ -protected: - const char * getName() const override { return "WATCH query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} + +#include <Parsers/IParserBase.h> + + +namespace DB +{ + +/** Query like this: + * WATCH [db.]table EVENTS + */ +class ParserWatchQuery : public IParserBase +{ +protected: + const char * getName() const override { return "WATCH query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h index b7cc68940f..1fc285a562 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h @@ -1,71 +1,71 @@ -#pragma once - +#pragma once + #include <common/types.h> -#include <Parsers/TokenIterator.h> -#include <map> -#include <memory> -#include <Common/SipHash.h> - - -namespace DB -{ - -struct StringRange -{ - const char * first = nullptr; - const char * second = nullptr; - - StringRange() = default; - StringRange(const char * begin, const char * end) : first(begin), second(end) {} - explicit StringRange(TokenIterator token) : first(token->begin), second(token->end) {} - - StringRange(TokenIterator token_begin, TokenIterator token_end) - { - /// Empty range. - if (token_begin == token_end) - { - first = token_begin->begin; - second = token_begin->begin; - return; - } - - TokenIterator token_last = token_end; - --token_last; - - first = token_begin->begin; - second = token_last->end; - } -}; - -using StringPtr = std::shared_ptr<String>; - - -inline String toString(const StringRange & range) -{ - return range.first ? String(range.first, range.second) : String(); -} - -/// Hashes only the values of pointers in StringRange. Is used with StringRangePointersEqualTo comparator. -struct StringRangePointersHash -{ - UInt64 operator()(const StringRange & range) const - { - SipHash hash; - hash.update(range.first); - hash.update(range.second); - return hash.get64(); - } -}; - -/// Ranges are equal only when they point to the same memory region. -/// It may be used when it's enough to compare substrings by their position in the same string. -struct StringRangePointersEqualTo -{ - constexpr bool operator()(const StringRange &lhs, const StringRange &rhs) const - { - return std::tie(lhs.first, lhs.second) == std::tie(rhs.first, rhs.second); - } -}; - -} - +#include <Parsers/TokenIterator.h> +#include <map> +#include <memory> +#include <Common/SipHash.h> + + +namespace DB +{ + +struct StringRange +{ + const char * first = nullptr; + const char * second = nullptr; + + StringRange() = default; + StringRange(const char * begin, const char * end) : first(begin), second(end) {} + explicit StringRange(TokenIterator token) : first(token->begin), second(token->end) {} + + StringRange(TokenIterator token_begin, TokenIterator token_end) + { + /// Empty range. + if (token_begin == token_end) + { + first = token_begin->begin; + second = token_begin->begin; + return; + } + + TokenIterator token_last = token_end; + --token_last; + + first = token_begin->begin; + second = token_last->end; + } +}; + +using StringPtr = std::shared_ptr<String>; + + +inline String toString(const StringRange & range) +{ + return range.first ? String(range.first, range.second) : String(); +} + +/// Hashes only the values of pointers in StringRange. Is used with StringRangePointersEqualTo comparator. +struct StringRangePointersHash +{ + UInt64 operator()(const StringRange & range) const + { + SipHash hash; + hash.update(range.first); + hash.update(range.second); + return hash.get64(); + } +}; + +/// Ranges are equal only when they point to the same memory region. +/// It may be used when it's enough to compare substrings by their position in the same string. +struct StringRangePointersEqualTo +{ + constexpr bool operator()(const StringRange &lhs, const StringRange &rhs) const + { + return std::tie(lhs.first, lhs.second) == std::tie(rhs.first, rhs.second); + } +}; + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h index 33be3042b3..edb040d72d 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h @@ -1,12 +1,12 @@ -#pragma once - -#include <Parsers/ASTQueryWithTableAndOutput.h> -#include <Common/quoteString.h> - - -namespace DB -{ - +#pragma once + +#include <Parsers/ASTQueryWithTableAndOutput.h> +#include <Common/quoteString.h> + + +namespace DB +{ + struct ASTExistsDatabaseQueryIDAndQueryNames { static constexpr auto ID = "ExistsDatabaseQuery"; @@ -15,13 +15,13 @@ struct ASTExistsDatabaseQueryIDAndQueryNames static constexpr auto QueryTemporary = ""; }; -struct ASTExistsTableQueryIDAndQueryNames -{ - static constexpr auto ID = "ExistsTableQuery"; - static constexpr auto Query = "EXISTS TABLE"; - static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE"; -}; - +struct ASTExistsTableQueryIDAndQueryNames +{ + static constexpr auto ID = "ExistsTableQuery"; + static constexpr auto Query = "EXISTS TABLE"; + static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE"; +}; + struct ASTExistsViewQueryIDAndQueryNames { static constexpr auto ID = "ExistsViewQuery"; @@ -31,21 +31,21 @@ struct ASTExistsViewQueryIDAndQueryNames }; -struct ASTExistsDictionaryQueryIDAndQueryNames -{ - static constexpr auto ID = "ExistsDictionaryQuery"; - static constexpr auto Query = "EXISTS DICTIONARY"; - /// No temporary dictionaries are supported, just for parsing - static constexpr auto QueryTemporary = "EXISTS TEMPORARY DICTIONARY"; -}; - -struct ASTShowCreateTableQueryIDAndQueryNames -{ - static constexpr auto ID = "ShowCreateTableQuery"; - static constexpr auto Query = "SHOW CREATE TABLE"; - static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY TABLE"; -}; - +struct ASTExistsDictionaryQueryIDAndQueryNames +{ + static constexpr auto ID = "ExistsDictionaryQuery"; + static constexpr auto Query = "EXISTS DICTIONARY"; + /// No temporary dictionaries are supported, just for parsing + static constexpr auto QueryTemporary = "EXISTS TEMPORARY DICTIONARY"; +}; + +struct ASTShowCreateTableQueryIDAndQueryNames +{ + static constexpr auto ID = "ShowCreateTableQuery"; + static constexpr auto Query = "SHOW CREATE TABLE"; + static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY TABLE"; +}; + struct ASTShowCreateViewQueryIDAndQueryNames { static constexpr auto ID = "ShowCreateViewQuery"; @@ -54,35 +54,35 @@ struct ASTShowCreateViewQueryIDAndQueryNames static constexpr auto QueryTemporary = ""; }; -struct ASTShowCreateDatabaseQueryIDAndQueryNames -{ - static constexpr auto ID = "ShowCreateDatabaseQuery"; - static constexpr auto Query = "SHOW CREATE DATABASE"; - static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DATABASE"; -}; - -struct ASTShowCreateDictionaryQueryIDAndQueryNames -{ - static constexpr auto ID = "ShowCreateDictionaryQuery"; - static constexpr auto Query = "SHOW CREATE DICTIONARY"; - /// No temporary dictionaries are supported, just for parsing - static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DICTIONARY"; -}; - -struct ASTDescribeQueryExistsQueryIDAndQueryNames -{ - static constexpr auto ID = "DescribeQuery"; - static constexpr auto Query = "DESCRIBE TABLE"; - static constexpr auto QueryTemporary = "DESCRIBE TEMPORARY TABLE"; -}; - -using ASTExistsTableQuery = ASTQueryWithTableAndOutputImpl<ASTExistsTableQueryIDAndQueryNames>; +struct ASTShowCreateDatabaseQueryIDAndQueryNames +{ + static constexpr auto ID = "ShowCreateDatabaseQuery"; + static constexpr auto Query = "SHOW CREATE DATABASE"; + static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DATABASE"; +}; + +struct ASTShowCreateDictionaryQueryIDAndQueryNames +{ + static constexpr auto ID = "ShowCreateDictionaryQuery"; + static constexpr auto Query = "SHOW CREATE DICTIONARY"; + /// No temporary dictionaries are supported, just for parsing + static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DICTIONARY"; +}; + +struct ASTDescribeQueryExistsQueryIDAndQueryNames +{ + static constexpr auto ID = "DescribeQuery"; + static constexpr auto Query = "DESCRIBE TABLE"; + static constexpr auto QueryTemporary = "DESCRIBE TEMPORARY TABLE"; +}; + +using ASTExistsTableQuery = ASTQueryWithTableAndOutputImpl<ASTExistsTableQueryIDAndQueryNames>; using ASTExistsViewQuery = ASTQueryWithTableAndOutputImpl<ASTExistsViewQueryIDAndQueryNames>; -using ASTExistsDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTExistsDictionaryQueryIDAndQueryNames>; -using ASTShowCreateTableQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateTableQueryIDAndQueryNames>; +using ASTExistsDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTExistsDictionaryQueryIDAndQueryNames>; +using ASTShowCreateTableQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateTableQueryIDAndQueryNames>; using ASTShowCreateViewQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateViewQueryIDAndQueryNames>; -using ASTShowCreateDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateDictionaryQueryIDAndQueryNames>; - +using ASTShowCreateDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateDictionaryQueryIDAndQueryNames>; + class ASTExistsDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTExistsDatabaseQueryIDAndQueryNames> { protected: @@ -93,44 +93,44 @@ protected: } }; -class ASTShowCreateDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTShowCreateDatabaseQueryIDAndQueryNames> -{ -protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << ASTShowCreateDatabaseQueryIDAndQueryNames::Query - << " " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); - } -}; - -class ASTDescribeQuery : public ASTQueryWithOutput -{ -public: - ASTPtr table_expression; - - String getID(char) const override { return "DescribeQuery"; } - - ASTPtr clone() const override - { - auto res = std::make_shared<ASTDescribeQuery>(*this); - res->children.clear(); - if (table_expression) - { - res->table_expression = table_expression->clone(); - res->children.push_back(res->table_expression); - } - cloneOutputOptions(*res); - return res; - } - -protected: - void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override - { - settings.ostr << (settings.hilite ? hilite_keyword : "") - << "DESCRIBE TABLE " << (settings.hilite ? hilite_none : ""); - table_expression->formatImpl(settings, state, frame); - } - -}; - -} +class ASTShowCreateDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTShowCreateDatabaseQueryIDAndQueryNames> +{ +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << ASTShowCreateDatabaseQueryIDAndQueryNames::Query + << " " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); + } +}; + +class ASTDescribeQuery : public ASTQueryWithOutput +{ +public: + ASTPtr table_expression; + + String getID(char) const override { return "DescribeQuery"; } + + ASTPtr clone() const override + { + auto res = std::make_shared<ASTDescribeQuery>(*this); + res->children.clear(); + if (table_expression) + { + res->table_expression = table_expression->clone(); + res->children.push_back(res->table_expression); + } + cloneOutputOptions(*res); + return res; + } + +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "DESCRIBE TABLE " << (settings.hilite ? hilite_none : ""); + table_expression->formatImpl(settings, state, frame); + } + +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp index f6dc405728..08877e0b2f 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp @@ -1,47 +1,47 @@ -#include <Parsers/TokenIterator.h> - - -namespace DB -{ - +#include <Parsers/TokenIterator.h> + + +namespace DB +{ + UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) -{ - /// We have just two kind of parentheses: () and []. - UnmatchedParentheses stack; - +{ + /// We have just two kind of parentheses: () and []. + UnmatchedParentheses stack; + /// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error /// when parser failed in the middle of the query. for (TokenIterator it = begin; it.isValid(); ++it) - { - if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket) - { - stack.push_back(*it); - } - else if (it->type == TokenType::ClosingRoundBracket || it->type == TokenType::ClosingSquareBracket) - { - if (stack.empty()) - { - /// Excessive closing bracket. - stack.push_back(*it); - return stack; - } - else if ((stack.back().type == TokenType::OpeningRoundBracket && it->type == TokenType::ClosingRoundBracket) - || (stack.back().type == TokenType::OpeningSquareBracket && it->type == TokenType::ClosingSquareBracket)) - { - /// Valid match. - stack.pop_back(); - } - else - { - /// Closing bracket type doesn't match opening bracket type. - stack.push_back(*it); - return stack; - } - } - } - - /// If stack is not empty, we have unclosed brackets. - return stack; -} - -} + { + if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket) + { + stack.push_back(*it); + } + else if (it->type == TokenType::ClosingRoundBracket || it->type == TokenType::ClosingSquareBracket) + { + if (stack.empty()) + { + /// Excessive closing bracket. + stack.push_back(*it); + return stack; + } + else if ((stack.back().type == TokenType::OpeningRoundBracket && it->type == TokenType::ClosingRoundBracket) + || (stack.back().type == TokenType::OpeningSquareBracket && it->type == TokenType::ClosingSquareBracket)) + { + /// Valid match. + stack.pop_back(); + } + else + { + /// Closing bracket type doesn't match opening bracket type. + stack.push_back(*it); + return stack; + } + } + } + + /// If stack is not empty, we have unclosed brackets. + return stack; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp index 951900e9c0..13429df5b4 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp @@ -1,114 +1,114 @@ -#include "parseDatabaseAndTableName.h" -#include <Parsers/ExpressionElementParsers.h> -#include <Parsers/ASTIdentifier.h> -#include <Parsers/CommonParsers.h> - - -namespace DB -{ - -bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str) -{ - ParserToken s_dot(TokenType::Dot); - ParserIdentifier table_parser; - - ASTPtr database; - ASTPtr table; - - database_str = ""; - table_str = ""; - - if (!table_parser.parse(pos, database, expected)) - return false; - - if (s_dot.ignore(pos)) - { - if (!table_parser.parse(pos, table, expected)) - { - database_str = ""; - return false; - } - - tryGetIdentifierNameInto(database, database_str); - tryGetIdentifierNameInto(table, table_str); - } - else - { - database_str = ""; - tryGetIdentifierNameInto(database, table_str); - } - - return true; -} - - -bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table) -{ - return IParserBase::wrapParseImpl(pos, [&] - { - if (ParserToken{TokenType::Asterisk}.ignore(pos, expected)) - { - auto pos_before_dot = pos; - if (ParserToken{TokenType::Dot}.ignore(pos, expected) - && ParserToken{TokenType::Asterisk}.ignore(pos, expected)) - { - /// *.* - any_database = true; - database.clear(); - any_table = true; - table.clear(); - return true; - } - - /// * - pos = pos_before_dot; - any_database = false; - database.clear(); - any_table = true; - table.clear(); - return true; - } - - ASTPtr ast; - ParserIdentifier identifier_parser; - if (identifier_parser.parse(pos, ast, expected)) - { - String first_identifier = getIdentifierName(ast); - auto pos_before_dot = pos; - - if (ParserToken{TokenType::Dot}.ignore(pos, expected)) - { - if (ParserToken{TokenType::Asterisk}.ignore(pos, expected)) - { - /// db.* - any_database = false; - database = std::move(first_identifier); - any_table = true; - table.clear(); - return true; - } - else if (identifier_parser.parse(pos, ast, expected)) - { - /// db.table - any_database = false; - database = std::move(first_identifier); - any_table = false; - table = getIdentifierName(ast); - return true; - } - } - - /// table - pos = pos_before_dot; - any_database = false; - database.clear(); - any_table = false; - table = std::move(first_identifier); - return true; - } - - return false; - }); -} - -} +#include "parseDatabaseAndTableName.h" +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/CommonParsers.h> + + +namespace DB +{ + +bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str) +{ + ParserToken s_dot(TokenType::Dot); + ParserIdentifier table_parser; + + ASTPtr database; + ASTPtr table; + + database_str = ""; + table_str = ""; + + if (!table_parser.parse(pos, database, expected)) + return false; + + if (s_dot.ignore(pos)) + { + if (!table_parser.parse(pos, table, expected)) + { + database_str = ""; + return false; + } + + tryGetIdentifierNameInto(database, database_str); + tryGetIdentifierNameInto(table, table_str); + } + else + { + database_str = ""; + tryGetIdentifierNameInto(database, table_str); + } + + return true; +} + + +bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table) +{ + return IParserBase::wrapParseImpl(pos, [&] + { + if (ParserToken{TokenType::Asterisk}.ignore(pos, expected)) + { + auto pos_before_dot = pos; + if (ParserToken{TokenType::Dot}.ignore(pos, expected) + && ParserToken{TokenType::Asterisk}.ignore(pos, expected)) + { + /// *.* + any_database = true; + database.clear(); + any_table = true; + table.clear(); + return true; + } + + /// * + pos = pos_before_dot; + any_database = false; + database.clear(); + any_table = true; + table.clear(); + return true; + } + + ASTPtr ast; + ParserIdentifier identifier_parser; + if (identifier_parser.parse(pos, ast, expected)) + { + String first_identifier = getIdentifierName(ast); + auto pos_before_dot = pos; + + if (ParserToken{TokenType::Dot}.ignore(pos, expected)) + { + if (ParserToken{TokenType::Asterisk}.ignore(pos, expected)) + { + /// db.* + any_database = false; + database = std::move(first_identifier); + any_table = true; + table.clear(); + return true; + } + else if (identifier_parser.parse(pos, ast, expected)) + { + /// db.table + any_database = false; + database = std::move(first_identifier); + any_table = false; + table = getIdentifierName(ast); + return true; + } + } + + /// table + pos = pos_before_dot; + any_database = false; + database.clear(); + any_table = false; + table = std::move(first_identifier); + return true; + } + + return false; + }); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h index e12e4f07c2..e4699c8ad9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h @@ -1,13 +1,13 @@ -#pragma once -#include <Parsers/IParser.h> - -namespace DB -{ - -/// Parses [db.]name -bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str); - -/// Parses [db.]name or [db.]* or [*.]* -bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table); - -} +#pragma once +#include <Parsers/IParser.h> + +namespace DB +{ + +/// Parses [db.]name +bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str); + +/// Parses [db.]name or [db.]* or [*.]* +bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table); + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp index b6c6ff6466..1f25f51ef2 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp @@ -1,46 +1,46 @@ -#include <Parsers/parseUserName.h> -#include <Parsers/ParserUserNameWithHost.h> -#include <Parsers/ASTUserNameWithHost.h> -#include <Parsers/CommonParsers.h> - - -namespace DB -{ - -bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name) -{ - ASTPtr ast; - if (!ParserUserNameWithHost{}.parse(pos, ast, expected)) - return false; - user_name = ast->as<const ASTUserNameWithHost &>().toString(); - return true; -} - - -bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names) -{ - ASTPtr ast; - if (!ParserUserNamesWithHost{}.parse(pos, ast, expected)) - return false; - user_names = ast->as<const ASTUserNamesWithHost &>().toStrings(); - return true; -} - - -bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected) -{ - return IParserBase::wrapParseImpl(pos, [&] - { - if (!ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) && !ParserKeyword{"currentUser"}.ignore(pos, expected)) - return false; - - if (ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected)) - { - if (!ParserToken{TokenType::ClosingRoundBracket}.ignore(pos, expected)) - return false; - } - return true; - }); -} - -} +#include <Parsers/parseUserName.h> +#include <Parsers/ParserUserNameWithHost.h> +#include <Parsers/ASTUserNameWithHost.h> +#include <Parsers/CommonParsers.h> + + +namespace DB +{ + +bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name) +{ + ASTPtr ast; + if (!ParserUserNameWithHost{}.parse(pos, ast, expected)) + return false; + user_name = ast->as<const ASTUserNameWithHost &>().toString(); + return true; +} + + +bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names) +{ + ASTPtr ast; + if (!ParserUserNamesWithHost{}.parse(pos, ast, expected)) + return false; + user_names = ast->as<const ASTUserNamesWithHost &>().toStrings(); + return true; +} + + +bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected) +{ + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) && !ParserKeyword{"currentUser"}.ignore(pos, expected)) + return false; + + if (ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected)) + { + if (!ParserToken{TokenType::ClosingRoundBracket}.ignore(pos, expected)) + return false; + } + return true; + }); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h index 678e73daec..c1ad36c936 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h @@ -1,36 +1,36 @@ -#pragma once - -#include <Parsers/IParser.h> - - -namespace DB -{ -/// Parses a user name. It can be a simple string or identifier or something like `name@host`. -/// In the last case `host` specifies the hosts user is allowed to connect from. -/// The `host` can be an ip address, ip subnet, or a host name. -/// The % and _ wildcard characters are permitted in `host`. -/// These have the same meaning as for pattern-matching operations performed with the LIKE operator. -bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name); - -/// Parses a comma-separated list of user names. -bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names); - - -/// Parses either the 'CURRENT_USER' keyword (or some of its aliases). -bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected); - - -/// Parses a role name. It follows the same rules as a user name, but allowed hosts are never checked -/// (because roles are not used to connect to server). -inline bool parseRoleName(IParser::Pos & pos, Expected & expected, String & role_name) -{ - return parseUserName(pos, expected, role_name); -} - -/// Parses a comma-separated list of role names. -inline bool parseRoleNames(IParser::Pos & pos, Expected & expected, Strings & role_names) -{ - return parseUserNames(pos, expected, role_names); -} - -} +#pragma once + +#include <Parsers/IParser.h> + + +namespace DB +{ +/// Parses a user name. It can be a simple string or identifier or something like `name@host`. +/// In the last case `host` specifies the hosts user is allowed to connect from. +/// The `host` can be an ip address, ip subnet, or a host name. +/// The % and _ wildcard characters are permitted in `host`. +/// These have the same meaning as for pattern-matching operations performed with the LIKE operator. +bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name); + +/// Parses a comma-separated list of user names. +bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names); + + +/// Parses either the 'CURRENT_USER' keyword (or some of its aliases). +bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected); + + +/// Parses a role name. It follows the same rules as a user name, but allowed hosts are never checked +/// (because roles are not used to connect to server). +inline bool parseRoleName(IParser::Pos & pos, Expected & expected, String & role_name) +{ + return parseUserName(pos, expected, role_name); +} + +/// Parses a comma-separated list of role names. +inline bool parseRoleNames(IParser::Pos & pos, Expected & expected, Strings & role_names) +{ + return parseUserNames(pos, expected, role_names); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp index 65e3998168..4800bfca2c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp @@ -1,170 +1,170 @@ -#include <Processors/Chunk.h> -#include <IO/WriteHelpers.h> -#include <IO/Operators.h> - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int POSITION_OUT_OF_BOUND; -} - -Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_) -{ - checkNumRowsIsConsistent(); -} - -Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - -static Columns unmuteColumns(MutableColumns && mut_columns) -{ - Columns columns; - columns.reserve(mut_columns.size()); - for (auto & col : mut_columns) - columns.emplace_back(std::move(col)); - - return columns; -} - -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) - : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_) -{ - checkNumRowsIsConsistent(); -} - -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - -Chunk Chunk::clone() const -{ - return Chunk(getColumns(), getNumRows(), chunk_info); -} - -void Chunk::setColumns(Columns columns_, UInt64 num_rows_) -{ - columns = std::move(columns_); - num_rows = num_rows_; - checkNumRowsIsConsistent(); -} - -void Chunk::setColumns(MutableColumns columns_, UInt64 num_rows_) -{ - columns = unmuteColumns(std::move(columns_)); - num_rows = num_rows_; - checkNumRowsIsConsistent(); -} - -void Chunk::checkNumRowsIsConsistent() -{ +#include <Processors/Chunk.h> +#include <IO/WriteHelpers.h> +#include <IO/Operators.h> + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int POSITION_OUT_OF_BOUND; +} + +Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_) +{ + checkNumRowsIsConsistent(); +} + +Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + +static Columns unmuteColumns(MutableColumns && mut_columns) +{ + Columns columns; + columns.reserve(mut_columns.size()); + for (auto & col : mut_columns) + columns.emplace_back(std::move(col)); + + return columns; +} + +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) + : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_) +{ + checkNumRowsIsConsistent(); +} + +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + +Chunk Chunk::clone() const +{ + return Chunk(getColumns(), getNumRows(), chunk_info); +} + +void Chunk::setColumns(Columns columns_, UInt64 num_rows_) +{ + columns = std::move(columns_); + num_rows = num_rows_; + checkNumRowsIsConsistent(); +} + +void Chunk::setColumns(MutableColumns columns_, UInt64 num_rows_) +{ + columns = unmuteColumns(std::move(columns_)); + num_rows = num_rows_; + checkNumRowsIsConsistent(); +} + +void Chunk::checkNumRowsIsConsistent() +{ for (size_t i = 0; i < columns.size(); ++i) { auto & column = columns[i]; - if (column->size() != num_rows) + if (column->size() != num_rows) throw Exception("Invalid number of rows in Chunk column " + column->getName()+ " position " + toString(i) + ": expected " + - toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); + toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); } -} - -MutableColumns Chunk::mutateColumns() -{ - size_t num_columns = columns.size(); - MutableColumns mut_columns(num_columns); - for (size_t i = 0; i < num_columns; ++i) - mut_columns[i] = IColumn::mutate(std::move(columns[i])); - - columns.clear(); - num_rows = 0; - - return mut_columns; -} - -MutableColumns Chunk::cloneEmptyColumns() const -{ - size_t num_columns = columns.size(); - MutableColumns mut_columns(num_columns); - for (size_t i = 0; i < num_columns; ++i) - mut_columns[i] = columns[i]->cloneEmpty(); - return mut_columns; -} - -Columns Chunk::detachColumns() -{ - num_rows = 0; - return std::move(columns); -} - -void Chunk::addColumn(ColumnPtr column) -{ - if (column->size() != num_rows) - throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " + - toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); - - columns.emplace_back(std::move(column)); -} - -void Chunk::erase(size_t position) -{ - if (columns.empty()) - throw Exception("Chunk is empty", ErrorCodes::POSITION_OUT_OF_BOUND); - - if (position >= columns.size()) - throw Exception("Position " + toString(position) + " out of bound in Chunk::erase(), max position = " - + toString(columns.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND); - - columns.erase(columns.begin() + position); -} - -UInt64 Chunk::bytes() const -{ - UInt64 res = 0; - for (const auto & column : columns) - res += column->byteSize(); - - return res; -} - -UInt64 Chunk::allocatedBytes() const -{ - UInt64 res = 0; - for (const auto & column : columns) - res += column->allocatedBytes(); - - return res; -} - -std::string Chunk::dumpStructure() const -{ - WriteBufferFromOwnString out; - for (const auto & column : columns) - out << ' ' << column->dumpStructure(); - - return out.str(); -} - - -void ChunkMissingValues::setBit(size_t column_idx, size_t row_idx) -{ - RowsBitMask & mask = rows_mask_by_column_id[column_idx]; - mask.resize(row_idx + 1); - mask[row_idx] = true; -} - -const ChunkMissingValues::RowsBitMask & ChunkMissingValues::getDefaultsBitmask(size_t column_idx) const -{ - static RowsBitMask none; - auto it = rows_mask_by_column_id.find(column_idx); - if (it != rows_mask_by_column_id.end()) - return it->second; - return none; -} - -} +} + +MutableColumns Chunk::mutateColumns() +{ + size_t num_columns = columns.size(); + MutableColumns mut_columns(num_columns); + for (size_t i = 0; i < num_columns; ++i) + mut_columns[i] = IColumn::mutate(std::move(columns[i])); + + columns.clear(); + num_rows = 0; + + return mut_columns; +} + +MutableColumns Chunk::cloneEmptyColumns() const +{ + size_t num_columns = columns.size(); + MutableColumns mut_columns(num_columns); + for (size_t i = 0; i < num_columns; ++i) + mut_columns[i] = columns[i]->cloneEmpty(); + return mut_columns; +} + +Columns Chunk::detachColumns() +{ + num_rows = 0; + return std::move(columns); +} + +void Chunk::addColumn(ColumnPtr column) +{ + if (column->size() != num_rows) + throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " + + toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); + + columns.emplace_back(std::move(column)); +} + +void Chunk::erase(size_t position) +{ + if (columns.empty()) + throw Exception("Chunk is empty", ErrorCodes::POSITION_OUT_OF_BOUND); + + if (position >= columns.size()) + throw Exception("Position " + toString(position) + " out of bound in Chunk::erase(), max position = " + + toString(columns.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND); + + columns.erase(columns.begin() + position); +} + +UInt64 Chunk::bytes() const +{ + UInt64 res = 0; + for (const auto & column : columns) + res += column->byteSize(); + + return res; +} + +UInt64 Chunk::allocatedBytes() const +{ + UInt64 res = 0; + for (const auto & column : columns) + res += column->allocatedBytes(); + + return res; +} + +std::string Chunk::dumpStructure() const +{ + WriteBufferFromOwnString out; + for (const auto & column : columns) + out << ' ' << column->dumpStructure(); + + return out.str(); +} + + +void ChunkMissingValues::setBit(size_t column_idx, size_t row_idx) +{ + RowsBitMask & mask = rows_mask_by_column_id[column_idx]; + mask.resize(row_idx + 1); + mask[row_idx] = true; +} + +const ChunkMissingValues::RowsBitMask & ChunkMissingValues::getDefaultsBitmask(size_t column_idx) const +{ + static RowsBitMask none; + auto it = rows_mask_by_column_id.find(column_idx); + if (it != rows_mask_by_column_id.end()) + return it->second; + return none; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp index 5eb4504027..f4648caf0f 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp @@ -1,64 +1,64 @@ -#include <Processors/ConcatProcessor.h> - - -namespace DB -{ - -ConcatProcessor::ConcatProcessor(const Block & header, size_t num_inputs) - : IProcessor(InputPorts(num_inputs, header), OutputPorts{header}), current_input(inputs.begin()) -{ -} - -ConcatProcessor::Status ConcatProcessor::prepare() -{ - auto & output = outputs.front(); - - /// Check can output. - - if (output.isFinished()) - { - for (; current_input != inputs.end(); ++current_input) - current_input->close(); - - return Status::Finished; - } - - if (!output.isNeeded()) - { - if (current_input != inputs.end()) - current_input->setNotNeeded(); - - return Status::PortFull; - } - - if (!output.canPush()) - return Status::PortFull; - - /// Check can input. - - while (current_input != inputs.end() && current_input->isFinished()) - ++current_input; - - if (current_input == inputs.end()) - { - output.finish(); - return Status::Finished; - } - - auto & input = *current_input; - - input.setNeeded(); - - if (!input.hasData()) - return Status::NeedData; - - /// Move data. - output.push(input.pull()); - - /// Now, we pushed to output, and it must be full. - return Status::PortFull; -} - -} - - +#include <Processors/ConcatProcessor.h> + + +namespace DB +{ + +ConcatProcessor::ConcatProcessor(const Block & header, size_t num_inputs) + : IProcessor(InputPorts(num_inputs, header), OutputPorts{header}), current_input(inputs.begin()) +{ +} + +ConcatProcessor::Status ConcatProcessor::prepare() +{ + auto & output = outputs.front(); + + /// Check can output. + + if (output.isFinished()) + { + for (; current_input != inputs.end(); ++current_input) + current_input->close(); + + return Status::Finished; + } + + if (!output.isNeeded()) + { + if (current_input != inputs.end()) + current_input->setNotNeeded(); + + return Status::PortFull; + } + + if (!output.canPush()) + return Status::PortFull; + + /// Check can input. + + while (current_input != inputs.end() && current_input->isFinished()) + ++current_input; + + if (current_input == inputs.end()) + { + output.finish(); + return Status::Finished; + } + + auto & input = *current_input; + + input.setNeeded(); + + if (!input.hasData()) + return Status::NeedData; + + /// Move data. + output.push(input.pull()); + + /// Now, we pushed to output, and it must be full. + return Status::PortFull; +} + +} + + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h index b60d07e516..4a1fc58041 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h @@ -1,32 +1,32 @@ -#pragma once - -#include <Processors/IProcessor.h> - - -namespace DB -{ - +#pragma once + +#include <Processors/IProcessor.h> + + +namespace DB +{ + /** Has arbitrary non zero number of inputs and one output. - * All of them have the same structure. - * - * Pulls all data from first input, then all data from second input, etc... - * Doesn't do any heavy calculations. - * Preserves an order of data. - */ -class ConcatProcessor : public IProcessor -{ -public: - ConcatProcessor(const Block & header, size_t num_inputs); - - String getName() const override { return "Concat"; } - - Status prepare() override; - - OutputPort & getOutputPort() { return outputs.front(); } - -private: - InputPorts::iterator current_input; -}; - -} - + * All of them have the same structure. + * + * Pulls all data from first input, then all data from second input, etc... + * Doesn't do any heavy calculations. + * Preserves an order of data. + */ +class ConcatProcessor : public IProcessor +{ +public: + ConcatProcessor(const Block & header, size_t num_inputs); + + String getName() const override { return "Concat"; } + + Status prepare() override; + + OutputPort & getOutputPort() { return outputs.front(); } + +private: + InputPorts::iterator current_input; +}; + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 19eb0f030b..d62cc112d1 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -1,508 +1,508 @@ -#include <IO/ReadHelpers.h> -#include <IO/Operators.h> - -#include <Formats/verbosePrintString.h> -#include <Processors/Formats/Impl/CSVRowInputFormat.h> -#include <Formats/FormatFactory.h> +#include <IO/ReadHelpers.h> +#include <IO/Operators.h> + +#include <Formats/verbosePrintString.h> +#include <Processors/Formats/Impl/CSVRowInputFormat.h> +#include <Formats/FormatFactory.h> #include <DataTypes/Serializations/SerializationNullable.h> -#include <DataTypes/DataTypeNothing.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int INCORRECT_DATA; +#include <DataTypes/DataTypeNothing.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int INCORRECT_DATA; extern const int LOGICAL_ERROR; -} - - -CSVRowInputFormat::CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, const FormatSettings & format_settings_) - : RowInputFormatWithDiagnosticInfo(header_, in_, params_) - , with_names(with_names_) - , format_settings(format_settings_) -{ - - const String bad_delimiters = " \t\"'.UL"; - if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos) - throw Exception(String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter + - "'. Try use CustomSeparated format instead.", ErrorCodes::BAD_ARGUMENTS); - - const auto & sample = getPort().getHeader(); - size_t num_columns = sample.columns(); - - data_types.resize(num_columns); - column_indexes_by_names.reserve(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - const auto & column_info = sample.getByPosition(i); - - data_types[i] = column_info.type; - column_indexes_by_names.emplace(column_info.name, i); - } -} - - -/// Map an input file column to a table column, based on its name. -void CSVRowInputFormat::addInputColumn(const String & column_name) -{ - const auto column_it = column_indexes_by_names.find(column_name); - if (column_it == column_indexes_by_names.end()) - { - if (format_settings.skip_unknown_fields) - { +} + + +CSVRowInputFormat::CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, + bool with_names_, const FormatSettings & format_settings_) + : RowInputFormatWithDiagnosticInfo(header_, in_, params_) + , with_names(with_names_) + , format_settings(format_settings_) +{ + + const String bad_delimiters = " \t\"'.UL"; + if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos) + throw Exception(String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter + + "'. Try use CustomSeparated format instead.", ErrorCodes::BAD_ARGUMENTS); + + const auto & sample = getPort().getHeader(); + size_t num_columns = sample.columns(); + + data_types.resize(num_columns); + column_indexes_by_names.reserve(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const auto & column_info = sample.getByPosition(i); + + data_types[i] = column_info.type; + column_indexes_by_names.emplace(column_info.name, i); + } +} + + +/// Map an input file column to a table column, based on its name. +void CSVRowInputFormat::addInputColumn(const String & column_name) +{ + const auto column_it = column_indexes_by_names.find(column_name); + if (column_it == column_indexes_by_names.end()) + { + if (format_settings.skip_unknown_fields) + { column_mapping->column_indexes_for_input_fields.push_back(std::nullopt); - return; - } - - throw Exception( - "Unknown field found in CSV header: '" + column_name + "' " + + return; + } + + throw Exception( + "Unknown field found in CSV header: '" + column_name + "' " + "at position " + std::to_string(column_mapping->column_indexes_for_input_fields.size()) + - "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", - ErrorCodes::INCORRECT_DATA - ); - } - - const auto column_index = column_it->second; - + "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", + ErrorCodes::INCORRECT_DATA + ); + } + + const auto column_index = column_it->second; + if (column_mapping->read_columns[column_index]) - throw Exception("Duplicate field found while parsing CSV header: " + column_name, ErrorCodes::INCORRECT_DATA); - + throw Exception("Duplicate field found while parsing CSV header: " + column_name, ErrorCodes::INCORRECT_DATA); + column_mapping->read_columns[column_index] = true; column_mapping->column_indexes_for_input_fields.emplace_back(column_index); -} - -static void skipEndOfLine(ReadBuffer & in) -{ - /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic) - - if (*in.position() == '\n') - { - ++in.position(); - if (!in.eof() && *in.position() == '\r') - ++in.position(); - } - else if (*in.position() == '\r') - { - ++in.position(); - if (!in.eof() && *in.position() == '\n') - ++in.position(); - else - throw Exception("Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)." - " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.", ErrorCodes::INCORRECT_DATA); - } - else if (!in.eof()) - throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA); -} - - -static void skipDelimiter(ReadBuffer & in, const char delimiter, bool is_last_column) -{ - if (is_last_column) - { - if (in.eof()) - return; - - /// we support the extra delimiter at the end of the line - if (*in.position() == delimiter) - { - ++in.position(); - if (in.eof()) - return; - } - - skipEndOfLine(in); - } - else - assertChar(delimiter, in); -} - - -/// Skip `whitespace` symbols allowed in CSV. -static inline void skipWhitespacesAndTabs(ReadBuffer & in) -{ - while (!in.eof() - && (*in.position() == ' ' - || *in.position() == '\t')) - ++in.position(); -} - - -static void skipRow(ReadBuffer & in, const FormatSettings::CSV & settings, size_t num_columns) -{ - String tmp; - for (size_t i = 0; i < num_columns; ++i) - { - skipWhitespacesAndTabs(in); - readCSVString(tmp, in, settings); - skipWhitespacesAndTabs(in); - - skipDelimiter(in, settings.delimiter, i + 1 == num_columns); - } -} - +} + +static void skipEndOfLine(ReadBuffer & in) +{ + /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic) + + if (*in.position() == '\n') + { + ++in.position(); + if (!in.eof() && *in.position() == '\r') + ++in.position(); + } + else if (*in.position() == '\r') + { + ++in.position(); + if (!in.eof() && *in.position() == '\n') + ++in.position(); + else + throw Exception("Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)." + " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.", ErrorCodes::INCORRECT_DATA); + } + else if (!in.eof()) + throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA); +} + + +static void skipDelimiter(ReadBuffer & in, const char delimiter, bool is_last_column) +{ + if (is_last_column) + { + if (in.eof()) + return; + + /// we support the extra delimiter at the end of the line + if (*in.position() == delimiter) + { + ++in.position(); + if (in.eof()) + return; + } + + skipEndOfLine(in); + } + else + assertChar(delimiter, in); +} + + +/// Skip `whitespace` symbols allowed in CSV. +static inline void skipWhitespacesAndTabs(ReadBuffer & in) +{ + while (!in.eof() + && (*in.position() == ' ' + || *in.position() == '\t')) + ++in.position(); +} + + +static void skipRow(ReadBuffer & in, const FormatSettings::CSV & settings, size_t num_columns) +{ + String tmp; + for (size_t i = 0; i < num_columns; ++i) + { + skipWhitespacesAndTabs(in); + readCSVString(tmp, in, settings); + skipWhitespacesAndTabs(in); + + skipDelimiter(in, settings.delimiter, i + 1 == num_columns); + } +} + void CSVRowInputFormat::setupAllColumnsByTableSchema() { const auto & header = getPort().getHeader(); column_mapping->read_columns.assign(header.columns(), true); column_mapping->column_indexes_for_input_fields.resize(header.columns()); - + for (size_t i = 0; i < column_mapping->column_indexes_for_input_fields.size(); ++i) column_mapping->column_indexes_for_input_fields[i] = i; } -void CSVRowInputFormat::readPrefix() -{ - /// In this format, we assume, that if first string field contain BOM as value, it will be written in quotes, - /// so BOM at beginning of stream cannot be confused with BOM in first string value, and it is safe to skip it. - skipBOMIfExists(in); - - size_t num_columns = data_types.size(); - const auto & header = getPort().getHeader(); - +void CSVRowInputFormat::readPrefix() +{ + /// In this format, we assume, that if first string field contain BOM as value, it will be written in quotes, + /// so BOM at beginning of stream cannot be confused with BOM in first string value, and it is safe to skip it. + skipBOMIfExists(in); + + size_t num_columns = data_types.size(); + const auto & header = getPort().getHeader(); + /// This is a bit of abstraction leakage, but we have almost the same code in other places. /// Thus, we check if this InputFormat is working with the "real" beginning of the data in case of parallel parsing. if (with_names && getCurrentUnitNumber() == 0) - { - /// This CSV file has a header row with column names. Depending on the - /// settings, use it or skip it. - if (format_settings.with_names_use_header) - { - /// Look at the file header to see which columns we have there. - /// The missing columns are filled with defaults. + { + /// This CSV file has a header row with column names. Depending on the + /// settings, use it or skip it. + if (format_settings.with_names_use_header) + { + /// Look at the file header to see which columns we have there. + /// The missing columns are filled with defaults. column_mapping->read_columns.assign(header.columns(), false); - do - { - String column_name; - skipWhitespacesAndTabs(in); - readCSVString(column_name, in, format_settings.csv); - skipWhitespacesAndTabs(in); - - addInputColumn(column_name); - } - while (checkChar(format_settings.csv.delimiter, in)); - - skipDelimiter(in, format_settings.csv.delimiter, true); - + do + { + String column_name; + skipWhitespacesAndTabs(in); + readCSVString(column_name, in, format_settings.csv); + skipWhitespacesAndTabs(in); + + addInputColumn(column_name); + } + while (checkChar(format_settings.csv.delimiter, in)); + + skipDelimiter(in, format_settings.csv.delimiter, true); + for (auto read_column : column_mapping->read_columns) - { - if (!read_column) - { + { + if (!read_column) + { column_mapping->have_always_default_columns = true; - break; - } - } - - return; - } - else + break; + } + } + + return; + } + else { - skipRow(in, format_settings.csv, num_columns); + skipRow(in, format_settings.csv, num_columns); setupAllColumnsByTableSchema(); } - } + } else if (!column_mapping->is_set) setupAllColumnsByTableSchema(); -} - - -bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) -{ - if (in.eof()) - return false; - - updateDiagnosticInfo(); - - /// Track whether we have to fill any columns in this row with default - /// values. If not, we return an empty column mask to the caller, so that - /// it doesn't have to check it. +} + + +bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) +{ + if (in.eof()) + return false; + + updateDiagnosticInfo(); + + /// Track whether we have to fill any columns in this row with default + /// values. If not, we return an empty column mask to the caller, so that + /// it doesn't have to check it. bool have_default_columns = column_mapping->have_always_default_columns; - + ext.read_columns.assign(column_mapping->read_columns.size(), true); - const auto delimiter = format_settings.csv.delimiter; + const auto delimiter = format_settings.csv.delimiter; for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) - { + { const auto & table_column = column_mapping->column_indexes_for_input_fields[file_column]; const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); - - if (table_column) - { - skipWhitespacesAndTabs(in); + + if (table_column) + { + skipWhitespacesAndTabs(in); ext.read_columns[*table_column] = readField(*columns[*table_column], data_types[*table_column], serializations[*table_column], is_last_file_column); - if (!ext.read_columns[*table_column]) - have_default_columns = true; - skipWhitespacesAndTabs(in); - } - else - { - /// We never read this column from the file, just skip it. - String tmp; - readCSVString(tmp, in, format_settings.csv); - } - - skipDelimiter(in, delimiter, is_last_file_column); - } - - if (have_default_columns) - { + if (!ext.read_columns[*table_column]) + have_default_columns = true; + skipWhitespacesAndTabs(in); + } + else + { + /// We never read this column from the file, just skip it. + String tmp; + readCSVString(tmp, in, format_settings.csv); + } + + skipDelimiter(in, delimiter, is_last_file_column); + } + + if (have_default_columns) + { for (size_t i = 0; i < column_mapping->read_columns.size(); i++) - { + { if (!column_mapping->read_columns[i]) - { - /// The column value for this row is going to be overwritten - /// with default by the caller, but the general assumption is - /// that the column size increases for each row, so we have - /// to insert something. Since we do not care about the exact - /// value, we do not have to use the default value specified by - /// the data type, and can just use IColumn::insertDefault(). - columns[i]->insertDefault(); - ext.read_columns[i] = false; - } - } - } - - return true; -} - -bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) -{ - const char delimiter = format_settings.csv.delimiter; - + { + /// The column value for this row is going to be overwritten + /// with default by the caller, but the general assumption is + /// that the column size increases for each row, so we have + /// to insert something. Since we do not care about the exact + /// value, we do not have to use the default value specified by + /// the data type, and can just use IColumn::insertDefault(). + columns[i]->insertDefault(); + ext.read_columns[i] = false; + } + } + } + + return true; +} + +bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) +{ + const char delimiter = format_settings.csv.delimiter; + for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) - { - if (file_column == 0 && in.eof()) - { - out << "<End of stream>\n"; - return false; - } - - skipWhitespacesAndTabs(in); + { + if (file_column == 0 && in.eof()) + { + out << "<End of stream>\n"; + return false; + } + + skipWhitespacesAndTabs(in); if (column_mapping->column_indexes_for_input_fields[file_column].has_value()) - { - const auto & header = getPort().getHeader(); + { + const auto & header = getPort().getHeader(); size_t col_idx = column_mapping->column_indexes_for_input_fields[file_column].value(); - if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx], - out, file_column)) - return false; - } - else - { - static const String skipped_column_str = "<SKIPPED COLUMN>"; - static const DataTypePtr skipped_column_type = std::make_shared<DataTypeNothing>(); - static const MutableColumnPtr skipped_column = skipped_column_type->createColumn(); - if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column)) - return false; - } - skipWhitespacesAndTabs(in); - - /// Delimiters + if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx], + out, file_column)) + return false; + } + else + { + static const String skipped_column_str = "<SKIPPED COLUMN>"; + static const DataTypePtr skipped_column_type = std::make_shared<DataTypeNothing>(); + static const MutableColumnPtr skipped_column = skipped_column_type->createColumn(); + if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column)) + return false; + } + skipWhitespacesAndTabs(in); + + /// Delimiters if (file_column + 1 == column_mapping->column_indexes_for_input_fields.size()) - { - if (in.eof()) - return false; - - /// we support the extra delimiter at the end of the line - if (*in.position() == delimiter) - { - ++in.position(); - if (in.eof()) - break; - } - - if (!in.eof() && *in.position() != '\n' && *in.position() != '\r') - { - out << "ERROR: There is no line feed. "; - verbosePrintString(in.position(), in.position() + 1, out); - out << " found instead.\n" - " It's like your file has more columns than expected.\n" + { + if (in.eof()) + return false; + + /// we support the extra delimiter at the end of the line + if (*in.position() == delimiter) + { + ++in.position(); + if (in.eof()) + break; + } + + if (!in.eof() && *in.position() != '\n' && *in.position() != '\r') + { + out << "ERROR: There is no line feed. "; + verbosePrintString(in.position(), in.position() + 1, out); + out << " found instead.\n" + " It's like your file has more columns than expected.\n" "And if your file has the right number of columns, maybe it has an unquoted string value with a comma.\n"; - - return false; - } - - skipEndOfLine(in); - } - else - { - try - { - assertChar(delimiter, in); - } - catch (const DB::Exception &) - { - if (*in.position() == '\n' || *in.position() == '\r') - { - out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected." - " It's like your file has less columns than expected.\n" + + return false; + } + + skipEndOfLine(in); + } + else + { + try + { + assertChar(delimiter, in); + } + catch (const DB::Exception &) + { + if (*in.position() == '\n' || *in.position() == '\r') + { + out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected." + " It's like your file has less columns than expected.\n" "And if your file has the right number of columns, maybe it has unescaped quotes in values.\n"; - } - else - { - out << "ERROR: There is no delimiter (" << delimiter << "). "; - verbosePrintString(in.position(), in.position() + 1, out); - out << " found instead.\n"; - } - return false; - } - } - } - - return true; -} - - -void CSVRowInputFormat::syncAfterError() -{ - skipToNextLineOrEOF(in); -} - -void CSVRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) -{ + } + else + { + out << "ERROR: There is no delimiter (" << delimiter << "). "; + verbosePrintString(in.position(), in.position() + 1, out); + out << " found instead.\n"; + } + return false; + } + } + } + + return true; +} + + +void CSVRowInputFormat::syncAfterError() +{ + skipToNextLineOrEOF(in); +} + +void CSVRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) +{ const auto & index = column_mapping->column_indexes_for_input_fields[file_column]; if (index) - { + { const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); readField(column, type, serializations[*index], is_last_file_column); - } - else - { - String tmp; - readCSVString(tmp, in, format_settings.csv); - } -} - + } + else + { + String tmp; + readCSVString(tmp, in, format_settings.csv); + } +} + bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column) -{ - const bool at_delimiter = !in.eof() && *in.position() == format_settings.csv.delimiter; - const bool at_last_column_line_end = is_last_file_column - && (in.eof() || *in.position() == '\n' || *in.position() == '\r'); - - /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default - /// only one empty or NULL column will be expected - if (format_settings.csv.empty_as_default - && (at_delimiter || at_last_column_line_end)) - { - /// Treat empty unquoted column value as default value, if - /// specified in the settings. Tuple columns might seem - /// problematic, because they are never quoted but still contain - /// commas, which might be also used as delimiters. However, - /// they do not contain empty unquoted fields, so this check - /// works for tuples as well. - column.insertDefault(); - return false; - } - else if (format_settings.null_as_default && !type->isNullable()) - { - /// If value is null but type is not nullable then use default value instead. +{ + const bool at_delimiter = !in.eof() && *in.position() == format_settings.csv.delimiter; + const bool at_last_column_line_end = is_last_file_column + && (in.eof() || *in.position() == '\n' || *in.position() == '\r'); + + /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default + /// only one empty or NULL column will be expected + if (format_settings.csv.empty_as_default + && (at_delimiter || at_last_column_line_end)) + { + /// Treat empty unquoted column value as default value, if + /// specified in the settings. Tuple columns might seem + /// problematic, because they are never quoted but still contain + /// commas, which might be also used as delimiters. However, + /// they do not contain empty unquoted fields, so this check + /// works for tuples as well. + column.insertDefault(); + return false; + } + else if (format_settings.null_as_default && !type->isNullable()) + { + /// If value is null but type is not nullable then use default value instead. return SerializationNullable::deserializeTextCSVImpl(column, in, format_settings, serialization); - } - else - { - /// Read the column normally. + } + else + { + /// Read the column normally. serialization->deserializeTextCSV(column, in, format_settings); - return true; - } -} - -void CSVRowInputFormat::resetParser() -{ - RowInputFormatWithDiagnosticInfo::resetParser(); + return true; + } +} + +void CSVRowInputFormat::resetParser() +{ + RowInputFormatWithDiagnosticInfo::resetParser(); column_mapping->column_indexes_for_input_fields.clear(); column_mapping->read_columns.clear(); column_mapping->have_always_default_columns = false; -} - - -void registerInputFormatProcessorCSV(FormatFactory & factory) -{ - for (bool with_names : {false, true}) - { - factory.registerInputFormatProcessor(with_names ? "CSVWithNames" : "CSV", [=]( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared<CSVRowInputFormat>(sample, buf, params, with_names, settings); - }); - } -} - +} + + +void registerInputFormatProcessorCSV(FormatFactory & factory) +{ + for (bool with_names : {false, true}) + { + factory.registerInputFormatProcessor(with_names ? "CSVWithNames" : "CSV", [=]( + ReadBuffer & buf, + const Block & sample, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared<CSVRowInputFormat>(sample, buf, params, with_names, settings); + }); + } +} + static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) -{ - char * pos = in.position(); - bool quotes = false; - bool need_more_data = true; +{ + char * pos = in.position(); + bool quotes = false; + bool need_more_data = true; size_t number_of_rows = 0; - - while (loadAtPosition(in, memory, pos) && need_more_data) - { - if (quotes) - { - pos = find_first_symbols<'"'>(pos, in.buffer().end()); + + while (loadAtPosition(in, memory, pos) && need_more_data) + { + if (quotes) + { + pos = find_first_symbols<'"'>(pos, in.buffer().end()); if (pos > in.buffer().end()) throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); else if (pos == in.buffer().end()) - continue; + continue; else if (*pos == '"') - { - ++pos; - if (loadAtPosition(in, memory, pos) && *pos == '"') - ++pos; - else - quotes = false; - } - } - else - { - pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end()); + { + ++pos; + if (loadAtPosition(in, memory, pos) && *pos == '"') + ++pos; + else + quotes = false; + } + } + else + { + pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end()); if (pos > in.buffer().end()) throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); else if (pos == in.buffer().end()) - continue; + continue; else if (*pos == '"') - { - quotes = true; - ++pos; - } - else if (*pos == '\n') - { + { + quotes = true; + ++pos; + } + else if (*pos == '\n') + { ++number_of_rows; - if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size) - need_more_data = false; - ++pos; - if (loadAtPosition(in, memory, pos) && *pos == '\r') - ++pos; - } - else if (*pos == '\r') - { - if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size) - need_more_data = false; - ++pos; - if (loadAtPosition(in, memory, pos) && *pos == '\n') + if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size) + need_more_data = false; + ++pos; + if (loadAtPosition(in, memory, pos) && *pos == '\r') + ++pos; + } + else if (*pos == '\r') + { + if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size) + need_more_data = false; + ++pos; + if (loadAtPosition(in, memory, pos) && *pos == '\n') { - ++pos; + ++pos; ++number_of_rows; } - } - } - } - - saveUpToPosition(in, memory, pos); + } + } + } + + saveUpToPosition(in, memory, pos); return {loadAtPosition(in, memory, pos), number_of_rows}; -} - -void registerFileSegmentationEngineCSV(FormatFactory & factory) -{ - factory.registerFileSegmentationEngine("CSV", &fileSegmentationEngineCSVImpl); +} + +void registerFileSegmentationEngineCSV(FormatFactory & factory) +{ + factory.registerFileSegmentationEngine("CSV", &fileSegmentationEngineCSVImpl); factory.registerFileSegmentationEngine("CSVWithNames", &fileSegmentationEngineCSVImpl); -} - -} +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h index 69f8d85559..b6075745b3 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -1,54 +1,54 @@ -#pragma once - -#include <optional> -#include <unordered_map> - -#include <Core/Block.h> -#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h> -#include <Formats/FormatSettings.h> - - -namespace DB -{ - -/** A stream for inputting data in csv format. - * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values. - */ -class CSVRowInputFormat : public RowInputFormatWithDiagnosticInfo -{ -public: - /** with_names - in the first line the header with column names - */ - CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, const FormatSettings & format_settings_); - - String getName() const override { return "CSVRowInputFormat"; } - - bool readRow(MutableColumns & columns, RowReadExtension & ext) override; - void readPrefix() override; - bool allowSyncAfterError() const override { return true; } - void syncAfterError() override; - void resetParser() override; - -private: +#pragma once + +#include <optional> +#include <unordered_map> + +#include <Core/Block.h> +#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h> +#include <Formats/FormatSettings.h> + + +namespace DB +{ + +/** A stream for inputting data in csv format. + * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values. + */ +class CSVRowInputFormat : public RowInputFormatWithDiagnosticInfo +{ +public: + /** with_names - in the first line the header with column names + */ + CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, + bool with_names_, const FormatSettings & format_settings_); + + String getName() const override { return "CSVRowInputFormat"; } + + bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + void readPrefix() override; + bool allowSyncAfterError() const override { return true; } + void syncAfterError() override; + void resetParser() override; + +private: /// There fields are computed in constructor. - bool with_names; - const FormatSettings format_settings; - DataTypes data_types; - using IndexesMap = std::unordered_map<String, size_t>; - IndexesMap column_indexes_by_names; - - void addInputColumn(const String & column_name); - + bool with_names; + const FormatSettings format_settings; + DataTypes data_types; + using IndexesMap = std::unordered_map<String, size_t>; + IndexesMap column_indexes_by_names; + + void addInputColumn(const String & column_name); + void setupAllColumnsByTableSchema(); - bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; - void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; - bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override - { - return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter && *pos != ' ' && *pos != '\t'; - } - + bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; + void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; + bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override + { + return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter && *pos != ' ' && *pos != '\t'; + } + bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column); -}; - -} +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 775a93cbbe..85937935f1 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -1,31 +1,31 @@ -#include <IO/ReadHelpers.h> -#include <Processors/Formats/Impl/TSKVRowInputFormat.h> -#include <Formats/FormatFactory.h> +#include <IO/ReadHelpers.h> +#include <Processors/Formats/Impl/TSKVRowInputFormat.h> +#include <Formats/FormatFactory.h> #include <DataTypes/Serializations/SerializationNullable.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int INCORRECT_DATA; - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; - extern const int CANNOT_READ_ALL_DATA; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; -} - - -TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; + extern const int CANNOT_READ_ALL_DATA; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; +} + + +TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) : IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_) -{ - const auto & sample_block = getPort().getHeader(); - size_t num_columns = sample_block.columns(); - for (size_t i = 0; i < num_columns; ++i) - name_map[sample_block.getByPosition(i).name] = i; /// NOTE You could place names more cache-locally. -} - - +{ + const auto & sample_block = getPort().getHeader(); + size_t num_columns = sample_block.columns(); + for (size_t i = 0; i < num_columns; ++i) + name_map[sample_block.getByPosition(i).name] = i; /// NOTE You could place names more cache-locally. +} + + void TSKVRowInputFormat::readPrefix() { /// In this format, we assume that column name cannot contain BOM, @@ -34,193 +34,193 @@ void TSKVRowInputFormat::readPrefix() } -/** Read the field name in the `tskv` format. - * Return true if the field is followed by an equal sign, - * otherwise (field with no value) return false. - * The reference to the field name will be written to `ref`. - * A temporary `tmp` buffer can also be used to copy the field name to it. - * When reading, skips the name and the equal sign after it. - */ -static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp) -{ - tmp.clear(); - - while (!buf.eof()) - { - const char * next_pos = find_first_symbols<'\t', '\n', '\\', '='>(buf.position(), buf.buffer().end()); - - if (next_pos == buf.buffer().end()) - { - tmp.append(buf.position(), next_pos - buf.position()); +/** Read the field name in the `tskv` format. + * Return true if the field is followed by an equal sign, + * otherwise (field with no value) return false. + * The reference to the field name will be written to `ref`. + * A temporary `tmp` buffer can also be used to copy the field name to it. + * When reading, skips the name and the equal sign after it. + */ +static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp) +{ + tmp.clear(); + + while (!buf.eof()) + { + const char * next_pos = find_first_symbols<'\t', '\n', '\\', '='>(buf.position(), buf.buffer().end()); + + if (next_pos == buf.buffer().end()) + { + tmp.append(buf.position(), next_pos - buf.position()); buf.position() = buf.buffer().end(); - buf.next(); - continue; - } - - /// Came to the end of the name. - if (*next_pos != '\\') - { - bool have_value = *next_pos == '='; - if (tmp.empty()) - { - /// No need to copy data, you can refer directly to the `buf`. - ref = StringRef(buf.position(), next_pos - buf.position()); - buf.position() += next_pos + have_value - buf.position(); - } - else - { - /// Copy the data to a temporary string and return a reference to it. - tmp.append(buf.position(), next_pos - buf.position()); - buf.position() += next_pos + have_value - buf.position(); - ref = StringRef(tmp); - } - return have_value; - } - /// The name has an escape sequence. - else - { - tmp.append(buf.position(), next_pos - buf.position()); - buf.position() += next_pos + 1 - buf.position(); - if (buf.eof()) - throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); - - tmp.push_back(parseEscapeSequence(*buf.position())); - ++buf.position(); - continue; - } - } - + buf.next(); + continue; + } + + /// Came to the end of the name. + if (*next_pos != '\\') + { + bool have_value = *next_pos == '='; + if (tmp.empty()) + { + /// No need to copy data, you can refer directly to the `buf`. + ref = StringRef(buf.position(), next_pos - buf.position()); + buf.position() += next_pos + have_value - buf.position(); + } + else + { + /// Copy the data to a temporary string and return a reference to it. + tmp.append(buf.position(), next_pos - buf.position()); + buf.position() += next_pos + have_value - buf.position(); + ref = StringRef(tmp); + } + return have_value; + } + /// The name has an escape sequence. + else + { + tmp.append(buf.position(), next_pos - buf.position()); + buf.position() += next_pos + 1 - buf.position(); + if (buf.eof()) + throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); + + tmp.push_back(parseEscapeSequence(*buf.position())); + ++buf.position(); + continue; + } + } + throw ParsingException("Unexpected end of stream while reading key name from TSKV format", ErrorCodes::CANNOT_READ_ALL_DATA); -} - - -bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) -{ - if (in.eof()) - return false; - - const auto & header = getPort().getHeader(); - size_t num_columns = columns.size(); - - /// Set of columns for which the values were read. The rest will be filled with default values. - read_columns.assign(num_columns, false); - seen_columns.assign(num_columns, false); - - if (unlikely(*in.position() == '\n')) - { - /// An empty string. It is permissible, but it is unclear why. - ++in.position(); - } - else - { - while (true) - { - StringRef name_ref; - bool has_value = readName(in, name_ref, name_buf); - ssize_t index = -1; - - if (has_value) - { - /// NOTE Optimization is possible by caching the order of fields (which is almost always the same) - /// and quickly checking for the next expected field, instead of searching the hash table. - - auto * it = name_map.find(name_ref); - if (!it) - { - if (!format_settings.skip_unknown_fields) - throw Exception("Unknown field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); - - /// If the key is not found, skip the value. +} + + +bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) +{ + if (in.eof()) + return false; + + const auto & header = getPort().getHeader(); + size_t num_columns = columns.size(); + + /// Set of columns for which the values were read. The rest will be filled with default values. + read_columns.assign(num_columns, false); + seen_columns.assign(num_columns, false); + + if (unlikely(*in.position() == '\n')) + { + /// An empty string. It is permissible, but it is unclear why. + ++in.position(); + } + else + { + while (true) + { + StringRef name_ref; + bool has_value = readName(in, name_ref, name_buf); + ssize_t index = -1; + + if (has_value) + { + /// NOTE Optimization is possible by caching the order of fields (which is almost always the same) + /// and quickly checking for the next expected field, instead of searching the hash table. + + auto * it = name_map.find(name_ref); + if (!it) + { + if (!format_settings.skip_unknown_fields) + throw Exception("Unknown field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + + /// If the key is not found, skip the value. NullOutput sink; - readEscapedStringInto(sink, in); - } - else - { - index = it->getMapped(); - - if (seen_columns[index]) - throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); - - seen_columns[index] = read_columns[index] = true; - const auto & type = getPort().getHeader().getByPosition(index).type; + readEscapedStringInto(sink, in); + } + else + { + index = it->getMapped(); + + if (seen_columns[index]) + throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + + seen_columns[index] = read_columns[index] = true; + const auto & type = getPort().getHeader().getByPosition(index).type; const auto & serialization = serializations[index]; - if (format_settings.null_as_default && !type->isNullable()) + if (format_settings.null_as_default && !type->isNullable()) read_columns[index] = SerializationNullable::deserializeTextEscapedImpl(*columns[index], in, format_settings, serialization); - else + else serialization->deserializeTextEscaped(*columns[index], in, format_settings); - } - } - else - { - /// The only thing that can go without value is `tskv` fragment that is ignored. - if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4))) - throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); - } - - if (in.eof()) - { + } + } + else + { + /// The only thing that can go without value is `tskv` fragment that is ignored. + if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4))) + throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + } + + if (in.eof()) + { throw ParsingException("Unexpected end of stream after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_READ_ALL_DATA); - } - else if (*in.position() == '\t') - { - ++in.position(); - continue; - } - else if (*in.position() == '\n') - { - ++in.position(); - break; - } - else - { - /// Possibly a garbage was written into column, remove it - if (index >= 0) - { - columns[index]->popBack(1); - seen_columns[index] = read_columns[index] = false; - } - - throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - } - } - } - - /// Fill in the not met columns with default values. - for (size_t i = 0; i < num_columns; ++i) - if (!seen_columns[i]) - header.getByPosition(i).type->insertDefaultInto(*columns[i]); - - /// return info about defaults set - ext.read_columns = read_columns; - - return true; -} - - -void TSKVRowInputFormat::syncAfterError() -{ - skipToUnescapedNextLineOrEOF(in); -} - - -void TSKVRowInputFormat::resetParser() -{ - IRowInputFormat::resetParser(); - read_columns.clear(); - seen_columns.clear(); - name_buf.clear(); -} - -void registerInputFormatProcessorTSKV(FormatFactory & factory) -{ - factory.registerInputFormatProcessor("TSKV", []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared<TSKVRowInputFormat>(buf, sample, std::move(params), settings); - }); -} - -} + } + else if (*in.position() == '\t') + { + ++in.position(); + continue; + } + else if (*in.position() == '\n') + { + ++in.position(); + break; + } + else + { + /// Possibly a garbage was written into column, remove it + if (index >= 0) + { + columns[index]->popBack(1); + seen_columns[index] = read_columns[index] = false; + } + + throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + } + } + } + + /// Fill in the not met columns with default values. + for (size_t i = 0; i < num_columns; ++i) + if (!seen_columns[i]) + header.getByPosition(i).type->insertDefaultInto(*columns[i]); + + /// return info about defaults set + ext.read_columns = read_columns; + + return true; +} + + +void TSKVRowInputFormat::syncAfterError() +{ + skipToUnescapedNextLineOrEOF(in); +} + + +void TSKVRowInputFormat::resetParser() +{ + IRowInputFormat::resetParser(); + read_columns.clear(); + seen_columns.clear(); + name_buf.clear(); +} + +void registerInputFormatProcessorTSKV(FormatFactory & factory) +{ + factory.registerInputFormatProcessor("TSKV", []( + ReadBuffer & buf, + const Block & sample, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared<TSKVRowInputFormat>(buf, sample, std::move(params), settings); + }); +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h index 15fe077e41..bc537158d9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h @@ -1,55 +1,55 @@ -#pragma once - -#include <Core/Block.h> -#include <Processors/Formats/IRowInputFormat.h> -#include <Formats/FormatSettings.h> -#include <Common/HashTable/HashMap.h> - - -namespace DB -{ - -class ReadBuffer; - - -/** Stream for reading data in TSKV format. - * TSKV is a very inefficient data format. - * Similar to TSV, but each field is written as key=value. - * Fields can be listed in any order (including, in different lines there may be different order), - * and some fields may be missing. - * An equal sign can be escaped in the field name. - * Also, as an additional element there may be a useless tskv fragment - it needs to be ignored. - */ -class TSKVRowInputFormat : public IRowInputFormat -{ -public: - TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_); - - String getName() const override { return "TSKVRowInputFormat"; } - +#pragma once + +#include <Core/Block.h> +#include <Processors/Formats/IRowInputFormat.h> +#include <Formats/FormatSettings.h> +#include <Common/HashTable/HashMap.h> + + +namespace DB +{ + +class ReadBuffer; + + +/** Stream for reading data in TSKV format. + * TSKV is a very inefficient data format. + * Similar to TSV, but each field is written as key=value. + * Fields can be listed in any order (including, in different lines there may be different order), + * and some fields may be missing. + * An equal sign can be escaped in the field name. + * Also, as an additional element there may be a useless tskv fragment - it needs to be ignored. + */ +class TSKVRowInputFormat : public IRowInputFormat +{ +public: + TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_); + + String getName() const override { return "TSKVRowInputFormat"; } + void readPrefix() override; - bool readRow(MutableColumns & columns, RowReadExtension &) override; - bool allowSyncAfterError() const override { return true; } - void syncAfterError() override; - void resetParser() override; - - -private: - const FormatSettings format_settings; - - /// Buffer for the read from the stream the field name. Used when you have to copy it. - String name_buf; - - /// Hash table matching `field name -> position in the block`. NOTE You can use perfect hash map. - using NameMap = HashMap<StringRef, size_t, StringRefHash>; - NameMap name_map; - - /// Set of columns for which the values were read. The rest will be filled with default values. - std::vector<UInt8> read_columns; - /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. - std::vector<UInt8> seen_columns; - /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true - /// for row like ..., non-nullable column name=\N, ... -}; - -} + bool readRow(MutableColumns & columns, RowReadExtension &) override; + bool allowSyncAfterError() const override { return true; } + void syncAfterError() override; + void resetParser() override; + + +private: + const FormatSettings format_settings; + + /// Buffer for the read from the stream the field name. Used when you have to copy it. + String name_buf; + + /// Hash table matching `field name -> position in the block`. NOTE You can use perfect hash map. + using NameMap = HashMap<StringRef, size_t, StringRefHash>; + NameMap name_map; + + /// Set of columns for which the values were read. The rest will be filled with default values. + std::vector<UInt8> read_columns; + /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. + std::vector<UInt8> seen_columns; + /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true + /// for row like ..., non-nullable column name=\N, ... +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index c3e6bcb47b..5d56ed1327 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -1,152 +1,152 @@ -#include <IO/ReadHelpers.h> -#include <IO/WriteBufferFromString.h> -#include <IO/Operators.h> - -#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h> +#include <IO/ReadHelpers.h> +#include <IO/WriteBufferFromString.h> +#include <IO/Operators.h> + +#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h> #include <Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h> -#include <Formats/verbosePrintString.h> -#include <Formats/FormatFactory.h> -#include <DataTypes/DataTypeNothing.h> +#include <Formats/verbosePrintString.h> +#include <Formats/FormatFactory.h> +#include <DataTypes/DataTypeNothing.h> #include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/Serializations/SerializationNullable.h> - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int INCORRECT_DATA; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; extern const int LOGICAL_ERROR; -} - - -static void skipTSVRow(ReadBuffer & in, const size_t num_columns) -{ +} + + +static void skipTSVRow(ReadBuffer & in, const size_t num_columns) +{ NullOutput null_sink; - - for (size_t i = 0; i < num_columns; ++i) - { - readEscapedStringInto(null_sink, in); - assertChar(i == num_columns - 1 ? '\n' : '\t', in); - } -} - - -/** Check for a common error case - usage of Windows line feed. - */ -static void checkForCarriageReturn(ReadBuffer & in) -{ - if (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r')) - throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." - "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format." - " You must transform your file to Unix format." - "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.", - ErrorCodes::INCORRECT_DATA); -} - - -TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, bool with_types_, const FormatSettings & format_settings_) - : RowInputFormatWithDiagnosticInfo(header_, in_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) -{ - const auto & sample = getPort().getHeader(); - size_t num_columns = sample.columns(); - - data_types.resize(num_columns); - column_indexes_by_names.reserve(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - const auto & column_info = sample.getByPosition(i); - - data_types[i] = column_info.type; - column_indexes_by_names.emplace(column_info.name, i); - } - + + for (size_t i = 0; i < num_columns; ++i) + { + readEscapedStringInto(null_sink, in); + assertChar(i == num_columns - 1 ? '\n' : '\t', in); + } +} + + +/** Check for a common error case - usage of Windows line feed. + */ +static void checkForCarriageReturn(ReadBuffer & in) +{ + if (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r')) + throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." + "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format." + " You must transform your file to Unix format." + "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.", + ErrorCodes::INCORRECT_DATA); +} + + +TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, + bool with_names_, bool with_types_, const FormatSettings & format_settings_) + : RowInputFormatWithDiagnosticInfo(header_, in_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) +{ + const auto & sample = getPort().getHeader(); + size_t num_columns = sample.columns(); + + data_types.resize(num_columns); + column_indexes_by_names.reserve(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const auto & column_info = sample.getByPosition(i); + + data_types[i] = column_info.type; + column_indexes_by_names.emplace(column_info.name, i); + } + column_mapping->column_indexes_for_input_fields.reserve(num_columns); column_mapping->read_columns.assign(num_columns, false); -} - - -void TabSeparatedRowInputFormat::setupAllColumnsByTableSchema() -{ - const auto & header = getPort().getHeader(); +} + + +void TabSeparatedRowInputFormat::setupAllColumnsByTableSchema() +{ + const auto & header = getPort().getHeader(); column_mapping->read_columns.assign(header.columns(), true); column_mapping->column_indexes_for_input_fields.resize(header.columns()); - + for (size_t i = 0; i < column_mapping->column_indexes_for_input_fields.size(); ++i) column_mapping->column_indexes_for_input_fields[i] = i; -} - - -void TabSeparatedRowInputFormat::addInputColumn(const String & column_name) -{ - const auto column_it = column_indexes_by_names.find(column_name); - if (column_it == column_indexes_by_names.end()) - { - if (format_settings.skip_unknown_fields) - { +} + + +void TabSeparatedRowInputFormat::addInputColumn(const String & column_name) +{ + const auto column_it = column_indexes_by_names.find(column_name); + if (column_it == column_indexes_by_names.end()) + { + if (format_settings.skip_unknown_fields) + { column_mapping->column_indexes_for_input_fields.push_back(std::nullopt); - return; - } - - throw Exception( - "Unknown field found in TSV header: '" + column_name + "' " + + return; + } + + throw Exception( + "Unknown field found in TSV header: '" + column_name + "' " + "at position " + std::to_string(column_mapping->column_indexes_for_input_fields.size()) + - "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", - ErrorCodes::INCORRECT_DATA - ); - } - - const auto column_index = column_it->second; - + "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", + ErrorCodes::INCORRECT_DATA + ); + } + + const auto column_index = column_it->second; + if (column_mapping->read_columns[column_index]) - throw Exception("Duplicate field found while parsing TSV header: " + column_name, ErrorCodes::INCORRECT_DATA); - + throw Exception("Duplicate field found while parsing TSV header: " + column_name, ErrorCodes::INCORRECT_DATA); + column_mapping->read_columns[column_index] = true; column_mapping->column_indexes_for_input_fields.emplace_back(column_index); -} - - -void TabSeparatedRowInputFormat::fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension) -{ - /// It is safe to memorize this on the first run - the format guarantees this does not change - if (unlikely(row_num == 1)) - { - columns_to_fill_with_default_values.clear(); +} + + +void TabSeparatedRowInputFormat::fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension) +{ + /// It is safe to memorize this on the first run - the format guarantees this does not change + if (unlikely(row_num == 1)) + { + columns_to_fill_with_default_values.clear(); for (size_t index = 0; index < column_mapping->read_columns.size(); ++index) if (column_mapping->read_columns[index] == 0) - columns_to_fill_with_default_values.push_back(index); - } - - for (const auto column_index : columns_to_fill_with_default_values) - { - data_types[column_index]->insertDefaultInto(*columns[column_index]); - row_read_extension.read_columns[column_index] = false; - } -} - - -void TabSeparatedRowInputFormat::readPrefix() -{ - if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8()) - { - /// In this format, we assume that column name or type cannot contain BOM, - /// so, if format has header, - /// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it. - skipBOMIfExists(in); - } - + columns_to_fill_with_default_values.push_back(index); + } + + for (const auto column_index : columns_to_fill_with_default_values) + { + data_types[column_index]->insertDefaultInto(*columns[column_index]); + row_read_extension.read_columns[column_index] = false; + } +} + + +void TabSeparatedRowInputFormat::readPrefix() +{ + if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8()) + { + /// In this format, we assume that column name or type cannot contain BOM, + /// so, if format has header, + /// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it. + skipBOMIfExists(in); + } + /// This is a bit of abstraction leakage, but we have almost the same code in other places. /// Thus, we check if this InputFormat is working with the "real" beginning of the data in case of parallel parsing. if (with_names && getCurrentUnitNumber() == 0) - { - if (format_settings.with_names_use_header) - { - String column_name; + { + if (format_settings.with_names_use_header) + { + String column_name; for (;;) - { - readEscapedString(column_name, in); + { + readEscapedString(column_name, in); if (!checkChar('\t', in)) { /// Check last column for \r before adding it, otherwise an error will be: @@ -157,188 +157,188 @@ void TabSeparatedRowInputFormat::readPrefix() } else addInputColumn(column_name); - } - - - if (!in.eof()) - { - assertChar('\n', in); - } - } - else - { - setupAllColumnsByTableSchema(); + } + + + if (!in.eof()) + { + assertChar('\n', in); + } + } + else + { + setupAllColumnsByTableSchema(); skipTSVRow(in, column_mapping->column_indexes_for_input_fields.size()); - } - } + } + } else if (!column_mapping->is_set) - setupAllColumnsByTableSchema(); - - if (with_types) - { + setupAllColumnsByTableSchema(); + + if (with_types) + { skipTSVRow(in, column_mapping->column_indexes_for_input_fields.size()); - } -} - - -bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) -{ - if (in.eof()) - return false; - - updateDiagnosticInfo(); - + } +} + + +bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) +{ + if (in.eof()) + return false; + + updateDiagnosticInfo(); + ext.read_columns.assign(column_mapping->read_columns.size(), true); for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) - { + { const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); - if (column_index) - { - const auto & type = data_types[*column_index]; + if (column_index) + { + const auto & type = data_types[*column_index]; ext.read_columns[*column_index] = readField(*columns[*column_index], type, serializations[*column_index], is_last_file_column); - } - else - { + } + else + { NullOutput null_sink; - readEscapedStringInto(null_sink, in); - } - - /// skip separators + readEscapedStringInto(null_sink, in); + } + + /// skip separators if (file_column + 1 < column_mapping->column_indexes_for_input_fields.size()) - { - assertChar('\t', in); - } - else if (!in.eof()) - { - if (unlikely(row_num == 1)) - checkForCarriageReturn(in); - - assertChar('\n', in); - } - } - - fillUnreadColumnsWithDefaults(columns, ext); - - return true; -} - - + { + assertChar('\t', in); + } + else if (!in.eof()) + { + if (unlikely(row_num == 1)) + checkForCarriageReturn(in); + + assertChar('\n', in); + } + } + + fillUnreadColumnsWithDefaults(columns, ext); + + return true; +} + + bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column) -{ - const bool at_delimiter = !is_last_file_column && !in.eof() && *in.position() == '\t'; - const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n'); - - if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) - { - column.insertDefault(); - return false; - } - else if (format_settings.null_as_default && !type->isNullable()) +{ + const bool at_delimiter = !is_last_file_column && !in.eof() && *in.position() == '\t'; + const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n'); + + if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) + { + column.insertDefault(); + return false; + } + else if (format_settings.null_as_default && !type->isNullable()) return SerializationNullable::deserializeTextEscapedImpl(column, in, format_settings, serialization); serialization->deserializeTextEscaped(column, in, format_settings); - return true; -} - -bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) -{ + return true; +} + +bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) +{ for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) - { - if (file_column == 0 && in.eof()) - { - out << "<End of stream>\n"; - return false; - } - + { + if (file_column == 0 && in.eof()) + { + out << "<End of stream>\n"; + return false; + } + if (column_mapping->column_indexes_for_input_fields[file_column].has_value()) - { - const auto & header = getPort().getHeader(); + { + const auto & header = getPort().getHeader(); size_t col_idx = column_mapping->column_indexes_for_input_fields[file_column].value(); - if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx], - out, file_column)) - return false; - } - else - { - static const String skipped_column_str = "<SKIPPED COLUMN>"; - static const DataTypePtr skipped_column_type = std::make_shared<DataTypeNothing>(); - static const MutableColumnPtr skipped_column = skipped_column_type->createColumn(); - if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column)) - return false; - } - - /// Delimiters + if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx], + out, file_column)) + return false; + } + else + { + static const String skipped_column_str = "<SKIPPED COLUMN>"; + static const DataTypePtr skipped_column_type = std::make_shared<DataTypeNothing>(); + static const MutableColumnPtr skipped_column = skipped_column_type->createColumn(); + if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column)) + return false; + } + + /// Delimiters if (file_column + 1 == column_mapping->column_indexes_for_input_fields.size()) - { - if (!in.eof()) - { - try - { - assertChar('\n', in); - } - catch (const DB::Exception &) - { - if (*in.position() == '\t') - { - out << "ERROR: Tab found where line feed is expected." - " It's like your file has more columns than expected.\n" + { + if (!in.eof()) + { + try + { + assertChar('\n', in); + } + catch (const DB::Exception &) + { + if (*in.position() == '\t') + { + out << "ERROR: Tab found where line feed is expected." + " It's like your file has more columns than expected.\n" "And if your file has the right number of columns, maybe it has an unescaped tab in a value.\n"; - } - else if (*in.position() == '\r') - { - out << "ERROR: Carriage return found where line feed is expected." - " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; - } - else - { - out << "ERROR: There is no line feed. "; - verbosePrintString(in.position(), in.position() + 1, out); - out << " found instead.\n"; - } - return false; - } - } - } - else - { - try - { - assertChar('\t', in); - } - catch (const DB::Exception &) - { - if (*in.position() == '\n') - { - out << "ERROR: Line feed found where tab is expected." - " It's like your file has less columns than expected.\n" + } + else if (*in.position() == '\r') + { + out << "ERROR: Carriage return found where line feed is expected." + " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; + } + else + { + out << "ERROR: There is no line feed. "; + verbosePrintString(in.position(), in.position() + 1, out); + out << " found instead.\n"; + } + return false; + } + } + } + else + { + try + { + assertChar('\t', in); + } + catch (const DB::Exception &) + { + if (*in.position() == '\n') + { + out << "ERROR: Line feed found where tab is expected." + " It's like your file has less columns than expected.\n" "And if your file has the right number of columns, " "maybe it has an unescaped backslash in value before tab, which causes the tab to be escaped.\n"; - } - else if (*in.position() == '\r') - { - out << "ERROR: Carriage return found where tab is expected.\n"; - } - else - { - out << "ERROR: There is no tab. "; - verbosePrintString(in.position(), in.position() + 1, out); - out << " found instead.\n"; - } - return false; - } - } - } - - return true; -} - -void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) -{ + } + else if (*in.position() == '\r') + { + out << "ERROR: Carriage return found where tab is expected.\n"; + } + else + { + out << "ERROR: There is no tab. "; + verbosePrintString(in.position(), in.position() + 1, out); + out << " found instead.\n"; + } + return false; + } + } + } + + return true; +} + +void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) +{ const auto & index = column_mapping->column_indexes_for_input_fields[file_column]; if (index) - { + { bool can_be_parsed_as_null = removeLowCardinality(type)->isNullable(); // check null value for type is not nullable. don't cross buffer bound for simplicity, so maybe missing some case @@ -361,42 +361,42 @@ void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, I const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); readField(column, type, serializations[*index], is_last_file_column); - } - else - { + } + else + { NullOutput null_sink; - readEscapedStringInto(null_sink, in); - } -} - -void TabSeparatedRowInputFormat::syncAfterError() -{ - skipToUnescapedNextLineOrEOF(in); -} - -void TabSeparatedRowInputFormat::resetParser() -{ - RowInputFormatWithDiagnosticInfo::resetParser(); - const auto & sample = getPort().getHeader(); + readEscapedStringInto(null_sink, in); + } +} + +void TabSeparatedRowInputFormat::syncAfterError() +{ + skipToUnescapedNextLineOrEOF(in); +} + +void TabSeparatedRowInputFormat::resetParser() +{ + RowInputFormatWithDiagnosticInfo::resetParser(); + const auto & sample = getPort().getHeader(); column_mapping->read_columns.assign(sample.columns(), false); column_mapping->column_indexes_for_input_fields.clear(); - columns_to_fill_with_default_values.clear(); -} - -void registerInputFormatProcessorTabSeparated(FormatFactory & factory) -{ - for (const auto * name : {"TabSeparated", "TSV"}) - { - factory.registerInputFormatProcessor(name, []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, false, false, settings); - }); - } - + columns_to_fill_with_default_values.clear(); +} + +void registerInputFormatProcessorTabSeparated(FormatFactory & factory) +{ + for (const auto * name : {"TabSeparated", "TSV"}) + { + factory.registerInputFormatProcessor(name, []( + ReadBuffer & buf, + const Block & sample, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, false, false, settings); + }); + } + for (const auto * name : {"TabSeparatedRaw", "TSVRaw"}) { factory.registerInputFormatProcessor(name, []( @@ -409,74 +409,74 @@ void registerInputFormatProcessorTabSeparated(FormatFactory & factory) }); } - for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"}) - { - factory.registerInputFormatProcessor(name, []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, true, false, settings); - }); - } - - for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"}) - { - factory.registerInputFormatProcessor(name, []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, true, true, settings); - }); - } -} - + for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"}) + { + factory.registerInputFormatProcessor(name, []( + ReadBuffer & buf, + const Block & sample, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, true, false, settings); + }); + } + + for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"}) + { + factory.registerInputFormatProcessor(name, []( + ReadBuffer & buf, + const Block & sample, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, true, true, settings); + }); + } +} + static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) -{ - bool need_more_data = true; - char * pos = in.position(); +{ + bool need_more_data = true; + char * pos = in.position(); size_t number_of_rows = 0; - - while (loadAtPosition(in, memory, pos) && need_more_data) - { - pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); - + + while (loadAtPosition(in, memory, pos) && need_more_data) + { + pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); + if (pos > in.buffer().end()) throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); else if (pos == in.buffer().end()) - continue; + continue; else if (*pos == '\\') - { - ++pos; - if (loadAtPosition(in, memory, pos)) - ++pos; - } - else if (*pos == '\n' || *pos == '\r') - { + { + ++pos; + if (loadAtPosition(in, memory, pos)) + ++pos; + } + else if (*pos == '\n' || *pos == '\r') + { if (*pos == '\n') ++number_of_rows; - if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size) - need_more_data = false; - ++pos; - } - } - - saveUpToPosition(in, memory, pos); - + if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size) + need_more_data = false; + ++pos; + } + } + + saveUpToPosition(in, memory, pos); + return {loadAtPosition(in, memory, pos), number_of_rows}; -} - -void registerFileSegmentationEngineTabSeparated(FormatFactory & factory) -{ - // We can use the same segmentation engine for TSKV. +} + +void registerFileSegmentationEngineTabSeparated(FormatFactory & factory) +{ + // We can use the same segmentation engine for TSKV. for (const auto & name : {"TabSeparated", "TSV", "TSKV", "TabSeparatedWithNames", "TSVWithNames"}) - { - factory.registerFileSegmentationEngine(name, &fileSegmentationEngineTabSeparatedImpl); - } -} - -} + { + factory.registerFileSegmentationEngine(name, &fileSegmentationEngineTabSeparatedImpl); + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index f56665da86..8127b5ceba 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -1,56 +1,56 @@ -#pragma once - -#include <Core/Block.h> -#include <Formats/FormatSettings.h> -#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h> - - -namespace DB -{ - -/** A stream to input data in tsv format. - */ -class TabSeparatedRowInputFormat : public RowInputFormatWithDiagnosticInfo -{ -public: - /** with_names - the first line is the header with the names of the columns - * with_types - on the next line header with type names - */ - TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, bool with_types_, const FormatSettings & format_settings_); - - String getName() const override { return "TabSeparatedRowInputFormat"; } - - bool readRow(MutableColumns & columns, RowReadExtension &) override; - void readPrefix() override; - bool allowSyncAfterError() const override { return true; } - void syncAfterError() override; - - void resetParser() override; - +#pragma once + +#include <Core/Block.h> +#include <Formats/FormatSettings.h> +#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h> + + +namespace DB +{ + +/** A stream to input data in tsv format. + */ +class TabSeparatedRowInputFormat : public RowInputFormatWithDiagnosticInfo +{ +public: + /** with_names - the first line is the header with the names of the columns + * with_types - on the next line header with type names + */ + TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, + bool with_names_, bool with_types_, const FormatSettings & format_settings_); + + String getName() const override { return "TabSeparatedRowInputFormat"; } + + bool readRow(MutableColumns & columns, RowReadExtension &) override; + void readPrefix() override; + bool allowSyncAfterError() const override { return true; } + void syncAfterError() override; + + void resetParser() override; + protected: - bool with_names; - bool with_types; - const FormatSettings format_settings; + bool with_names; + bool with_types; + const FormatSettings format_settings; virtual bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column); private: - DataTypes data_types; - - using IndexesMap = std::unordered_map<String, size_t>; - IndexesMap column_indexes_by_names; - - std::vector<size_t> columns_to_fill_with_default_values; - - void addInputColumn(const String & column_name); - void setupAllColumnsByTableSchema(); - void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension); - - bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; - void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; - bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } -}; - -} + DataTypes data_types; + + using IndexesMap = std::unordered_map<String, size_t>; + IndexesMap column_indexes_by_names; + + std::vector<size_t> columns_to_fill_with_default_values; + + void addInputColumn(const String & column_name); + void setupAllColumnsByTableSchema(); + void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension); + + bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; + void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; + bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp index f617c2baa3..4dddc9fff9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp @@ -1,183 +1,183 @@ -#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h> -#include <Formats/verbosePrintString.h> -#include <IO/Operators.h> -#include <IO/WriteBufferFromString.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -static String alignedName(const String & name, size_t max_length) -{ - size_t spaces_count = max_length >= name.size() ? max_length - name.size() : 0; - return name + ", " + std::string(spaces_count, ' '); -} - - -RowInputFormatWithDiagnosticInfo::RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_) - : IRowInputFormat(header_, in_, params_) -{ -} - -void RowInputFormatWithDiagnosticInfo::updateDiagnosticInfo() -{ - ++row_num; - - bytes_read_at_start_of_buffer_on_prev_row = bytes_read_at_start_of_buffer_on_current_row; - bytes_read_at_start_of_buffer_on_current_row = in.count() - in.offset(); - - offset_of_prev_row = offset_of_current_row; - offset_of_current_row = in.offset(); -} - -String RowInputFormatWithDiagnosticInfo::getDiagnosticInfo() -{ - if (in.eof()) - return "Buffer has gone, cannot extract information about what has been parsed."; - - WriteBufferFromOwnString out; - - const auto & header = getPort().getHeader(); - MutableColumns columns = header.cloneEmptyColumns(); - - /// It is possible to display detailed diagnostics only if the last and next to last rows are still in the read buffer. - size_t bytes_read_at_start_of_buffer = in.count() - in.offset(); - if (bytes_read_at_start_of_buffer != bytes_read_at_start_of_buffer_on_prev_row) - { - out << "Could not print diagnostic info because two last rows aren't in buffer (rare case)\n"; - return out.str(); - } - - max_length_of_column_name = 0; - for (size_t i = 0; i < header.columns(); ++i) - if (header.safeGetByPosition(i).name.size() > max_length_of_column_name) - max_length_of_column_name = header.safeGetByPosition(i).name.size(); - - max_length_of_data_type_name = 0; - for (size_t i = 0; i < header.columns(); ++i) - if (header.safeGetByPosition(i).type->getName().size() > max_length_of_data_type_name) - max_length_of_data_type_name = header.safeGetByPosition(i).type->getName().size(); - - /// Roll back the cursor to the beginning of the previous or current row and parse all over again. But now we derive detailed information. - - if (offset_of_prev_row <= in.buffer().size()) - { - in.position() = in.buffer().begin() + offset_of_prev_row; - - out << "\nRow " << (row_num - 1) << ":\n"; - if (!parseRowAndPrintDiagnosticInfo(columns, out)) - return out.str(); - } - else - { - if (in.buffer().size() < offset_of_current_row) - { - out << "Could not print diagnostic info because parsing of data hasn't started.\n"; - return out.str(); - } - - in.position() = in.buffer().begin() + offset_of_current_row; - } - - out << "\nRow " << row_num << ":\n"; - parseRowAndPrintDiagnosticInfo(columns, out); - out << "\n"; - - return out.str(); -} - -bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(const String & col_name, - const DataTypePtr & type, - IColumn & column, - WriteBuffer & out, - size_t file_column) -{ - out << "Column " << file_column << ", " << std::string((file_column < 10 ? 2 : file_column < 100 ? 1 : 0), ' ') - << "name: " << alignedName(col_name, max_length_of_column_name) - << "type: " << alignedName(type->getName(), max_length_of_data_type_name); - - auto * prev_position = in.position(); - std::exception_ptr exception; - - try - { - tryDeserializeField(type, column, file_column); - } - catch (...) - { - exception = std::current_exception(); - } - auto * curr_position = in.position(); - - if (curr_position < prev_position) - throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR); - +#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h> +#include <Formats/verbosePrintString.h> +#include <IO/Operators.h> +#include <IO/WriteBufferFromString.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +static String alignedName(const String & name, size_t max_length) +{ + size_t spaces_count = max_length >= name.size() ? max_length - name.size() : 0; + return name + ", " + std::string(spaces_count, ' '); +} + + +RowInputFormatWithDiagnosticInfo::RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_) + : IRowInputFormat(header_, in_, params_) +{ +} + +void RowInputFormatWithDiagnosticInfo::updateDiagnosticInfo() +{ + ++row_num; + + bytes_read_at_start_of_buffer_on_prev_row = bytes_read_at_start_of_buffer_on_current_row; + bytes_read_at_start_of_buffer_on_current_row = in.count() - in.offset(); + + offset_of_prev_row = offset_of_current_row; + offset_of_current_row = in.offset(); +} + +String RowInputFormatWithDiagnosticInfo::getDiagnosticInfo() +{ + if (in.eof()) + return "Buffer has gone, cannot extract information about what has been parsed."; + + WriteBufferFromOwnString out; + + const auto & header = getPort().getHeader(); + MutableColumns columns = header.cloneEmptyColumns(); + + /// It is possible to display detailed diagnostics only if the last and next to last rows are still in the read buffer. + size_t bytes_read_at_start_of_buffer = in.count() - in.offset(); + if (bytes_read_at_start_of_buffer != bytes_read_at_start_of_buffer_on_prev_row) + { + out << "Could not print diagnostic info because two last rows aren't in buffer (rare case)\n"; + return out.str(); + } + + max_length_of_column_name = 0; + for (size_t i = 0; i < header.columns(); ++i) + if (header.safeGetByPosition(i).name.size() > max_length_of_column_name) + max_length_of_column_name = header.safeGetByPosition(i).name.size(); + + max_length_of_data_type_name = 0; + for (size_t i = 0; i < header.columns(); ++i) + if (header.safeGetByPosition(i).type->getName().size() > max_length_of_data_type_name) + max_length_of_data_type_name = header.safeGetByPosition(i).type->getName().size(); + + /// Roll back the cursor to the beginning of the previous or current row and parse all over again. But now we derive detailed information. + + if (offset_of_prev_row <= in.buffer().size()) + { + in.position() = in.buffer().begin() + offset_of_prev_row; + + out << "\nRow " << (row_num - 1) << ":\n"; + if (!parseRowAndPrintDiagnosticInfo(columns, out)) + return out.str(); + } + else + { + if (in.buffer().size() < offset_of_current_row) + { + out << "Could not print diagnostic info because parsing of data hasn't started.\n"; + return out.str(); + } + + in.position() = in.buffer().begin() + offset_of_current_row; + } + + out << "\nRow " << row_num << ":\n"; + parseRowAndPrintDiagnosticInfo(columns, out); + out << "\n"; + + return out.str(); +} + +bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(const String & col_name, + const DataTypePtr & type, + IColumn & column, + WriteBuffer & out, + size_t file_column) +{ + out << "Column " << file_column << ", " << std::string((file_column < 10 ? 2 : file_column < 100 ? 1 : 0), ' ') + << "name: " << alignedName(col_name, max_length_of_column_name) + << "type: " << alignedName(type->getName(), max_length_of_data_type_name); + + auto * prev_position = in.position(); + std::exception_ptr exception; + + try + { + tryDeserializeField(type, column, file_column); + } + catch (...) + { + exception = std::current_exception(); + } + auto * curr_position = in.position(); + + if (curr_position < prev_position) + throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR); + if (isNativeNumber(type) || isDate(type) || isDateTime(type) || isDateTime64(type)) - { - /// An empty string instead of a value. - if (curr_position == prev_position) - { - out << "ERROR: text "; - verbosePrintString(prev_position, std::min(prev_position + 10, in.buffer().end()), out); - out << " is not like " << type->getName() << "\n"; - return false; - } - } - - out << "parsed text: "; - verbosePrintString(prev_position, curr_position, out); - - if (exception) - { - if (type->getName() == "DateTime") - out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n"; - else if (type->getName() == "Date") - out << "ERROR: Date must be in YYYY-MM-DD format.\n"; - else - out << "ERROR\n"; + { + /// An empty string instead of a value. + if (curr_position == prev_position) + { + out << "ERROR: text "; + verbosePrintString(prev_position, std::min(prev_position + 10, in.buffer().end()), out); + out << " is not like " << type->getName() << "\n"; + return false; + } + } + + out << "parsed text: "; + verbosePrintString(prev_position, curr_position, out); + + if (exception) + { + if (type->getName() == "DateTime") + out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n"; + else if (type->getName() == "Date") + out << "ERROR: Date must be in YYYY-MM-DD format.\n"; + else + out << "ERROR\n"; // Print exception message out << getExceptionMessage(exception, false) << '\n'; - return false; - } - - out << "\n"; - - if (type->haveMaximumSizeOfValue()) - { - if (isGarbageAfterField(file_column, curr_position)) - { - out << "ERROR: garbage after " << type->getName() << ": "; - verbosePrintString(curr_position, std::min(curr_position + 10, in.buffer().end()), out); - out << "\n"; - - if (type->getName() == "DateTime") - out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n"; - else if (type->getName() == "Date") - out << "ERROR: Date must be in YYYY-MM-DD format.\n"; - - return false; - } - } - - return true; -} - -void RowInputFormatWithDiagnosticInfo::resetParser() -{ - IRowInputFormat::resetParser(); - row_num = 0; - bytes_read_at_start_of_buffer_on_current_row = 0; - bytes_read_at_start_of_buffer_on_prev_row = 0; - offset_of_current_row = std::numeric_limits<size_t>::max(); - offset_of_prev_row = std::numeric_limits<size_t>::max(); - max_length_of_column_name = 0; - max_length_of_data_type_name = 0; -} - - -} + return false; + } + + out << "\n"; + + if (type->haveMaximumSizeOfValue()) + { + if (isGarbageAfterField(file_column, curr_position)) + { + out << "ERROR: garbage after " << type->getName() << ": "; + verbosePrintString(curr_position, std::min(curr_position + 10, in.buffer().end()), out); + out << "\n"; + + if (type->getName() == "DateTime") + out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n"; + else if (type->getName() == "Date") + out << "ERROR: Date must be in YYYY-MM-DD format.\n"; + + return false; + } + } + + return true; +} + +void RowInputFormatWithDiagnosticInfo::resetParser() +{ + IRowInputFormat::resetParser(); + row_num = 0; + bytes_read_at_start_of_buffer_on_current_row = 0; + bytes_read_at_start_of_buffer_on_prev_row = 0; + offset_of_current_row = std::numeric_limits<size_t>::max(); + offset_of_prev_row = std::numeric_limits<size_t>::max(); + max_length_of_column_name = 0; + max_length_of_data_type_name = 0; +} + + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h index 3ad737dd63..5bad24cd48 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h @@ -1,46 +1,46 @@ -#pragma once - -#include <Core/Block.h> -#include <Processors/Formats/IRowInputFormat.h> -#include <IO/ReadBuffer.h> -#include <limits> - - -namespace DB -{ - -class RowInputFormatWithDiagnosticInfo : public IRowInputFormat -{ -public: - RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_); - - String getDiagnosticInfo() override; - - void resetParser() override; - -protected: - void updateDiagnosticInfo(); - bool deserializeFieldAndPrintDiagnosticInfo(const String & col_name, const DataTypePtr & type, IColumn & column, - WriteBuffer & out, size_t file_column); - - virtual bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) = 0; - virtual void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) = 0; - virtual bool isGarbageAfterField(size_t after_input_pos_idx, ReadBuffer::Position pos) = 0; - - /// For convenient diagnostics in case of an error. - size_t row_num = 0; - -private: - /// How many bytes were read, not counting those still in the buffer. - size_t bytes_read_at_start_of_buffer_on_current_row = 0; - size_t bytes_read_at_start_of_buffer_on_prev_row = 0; - - size_t offset_of_current_row = std::numeric_limits<size_t>::max(); - size_t offset_of_prev_row = std::numeric_limits<size_t>::max(); - - /// For alignment of diagnostic info. - size_t max_length_of_column_name = 0; - size_t max_length_of_data_type_name = 0; -}; - -} +#pragma once + +#include <Core/Block.h> +#include <Processors/Formats/IRowInputFormat.h> +#include <IO/ReadBuffer.h> +#include <limits> + + +namespace DB +{ + +class RowInputFormatWithDiagnosticInfo : public IRowInputFormat +{ +public: + RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_); + + String getDiagnosticInfo() override; + + void resetParser() override; + +protected: + void updateDiagnosticInfo(); + bool deserializeFieldAndPrintDiagnosticInfo(const String & col_name, const DataTypePtr & type, IColumn & column, + WriteBuffer & out, size_t file_column); + + virtual bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) = 0; + virtual void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) = 0; + virtual bool isGarbageAfterField(size_t after_input_pos_idx, ReadBuffer::Position pos) = 0; + + /// For convenient diagnostics in case of an error. + size_t row_num = 0; + +private: + /// How many bytes were read, not counting those still in the buffer. + size_t bytes_read_at_start_of_buffer_on_current_row = 0; + size_t bytes_read_at_start_of_buffer_on_prev_row = 0; + + size_t offset_of_current_row = std::numeric_limits<size_t>::max(); + size_t offset_of_prev_row = std::numeric_limits<size_t>::max(); + + /// For alignment of diagnostic info. + size_t max_length_of_column_name = 0; + size_t max_length_of_data_type_name = 0; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp index a591c7d000..64bdbe2410 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp @@ -1,19 +1,19 @@ -#include <Processors/IAccumulatingTransform.h> - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}), - input(inputs.front()), output(outputs.front()) -{ -} - +#include <Processors/IAccumulatingTransform.h> + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) + : IProcessor({std::move(input_header)}, {std::move(output_header)}), + input(inputs.front()), output(outputs.front()) +{ +} + InputPort * IAccumulatingTransform::addTotalsPort() { if (inputs.size() > 1) @@ -22,33 +22,33 @@ InputPort * IAccumulatingTransform::addTotalsPort() return &inputs.emplace_back(getInputPort().getHeader(), this); } -IAccumulatingTransform::Status IAccumulatingTransform::prepare() -{ - /// Check can output. - if (output.isFinished()) - { +IAccumulatingTransform::Status IAccumulatingTransform::prepare() +{ + /// Check can output. + if (output.isFinished()) + { for (auto & in : inputs) in.close(); - return Status::Finished; - } - - if (!output.canPush()) - { - input.setNotNeeded(); - return Status::PortFull; - } - - /// Output if has data. - if (current_output_chunk) - output.push(std::move(current_output_chunk)); - - if (finished_generate) - { - output.finish(); - return Status::Finished; - } - + return Status::Finished; + } + + if (!output.canPush()) + { + input.setNotNeeded(); + return Status::PortFull; + } + + /// Output if has data. + if (current_output_chunk) + output.push(std::move(current_output_chunk)); + + if (finished_generate) + { + output.finish(); + return Status::Finished; + } + if (input.isFinished()) finished_input = true; @@ -71,48 +71,48 @@ IAccumulatingTransform::Status IAccumulatingTransform::prepare() totals_input.close(); } } - + /// Generate output block. - return Status::Ready; - } - - /// Check can input. - if (!has_input) - { - input.setNeeded(); - if (!input.hasData()) - return Status::NeedData; - - current_input_chunk = input.pull(); - has_input = true; - } - - return Status::Ready; -} - -void IAccumulatingTransform::work() -{ - if (!finished_input) - { - consume(std::move(current_input_chunk)); - has_input = false; - } - else - { - current_output_chunk = generate(); - if (!current_output_chunk) - finished_generate = true; - } -} - -void IAccumulatingTransform::setReadyChunk(Chunk chunk) -{ - if (current_output_chunk) - throw Exception("IAccumulatingTransform already has input. Cannot set another chunk. " - "Probably, setReadyChunk method was called twice per consume().", ErrorCodes::LOGICAL_ERROR); - - current_output_chunk = std::move(chunk); -} - -} - + return Status::Ready; + } + + /// Check can input. + if (!has_input) + { + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + current_input_chunk = input.pull(); + has_input = true; + } + + return Status::Ready; +} + +void IAccumulatingTransform::work() +{ + if (!finished_input) + { + consume(std::move(current_input_chunk)); + has_input = false; + } + else + { + current_output_chunk = generate(); + if (!current_output_chunk) + finished_generate = true; + } +} + +void IAccumulatingTransform::setReadyChunk(Chunk chunk) +{ + if (current_output_chunk) + throw Exception("IAccumulatingTransform already has input. Cannot set another chunk. " + "Probably, setReadyChunk method was called twice per consume().", ErrorCodes::LOGICAL_ERROR); + + current_output_chunk = std::move(chunk); +} + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h index ba9727d9ba..b51753199c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h @@ -1,47 +1,47 @@ -#pragma once - -#include <Processors/IProcessor.h> - - -namespace DB -{ - -/** Has one input and one output. - * Pulls all blocks from input, and only then produce output. - * Examples: ORDER BY, GROUP BY. - */ -class IAccumulatingTransform : public IProcessor -{ -protected: - InputPort & input; - OutputPort & output; - - Chunk current_input_chunk; - Chunk current_output_chunk; +#pragma once + +#include <Processors/IProcessor.h> + + +namespace DB +{ + +/** Has one input and one output. + * Pulls all blocks from input, and only then produce output. + * Examples: ORDER BY, GROUP BY. + */ +class IAccumulatingTransform : public IProcessor +{ +protected: + InputPort & input; + OutputPort & output; + + Chunk current_input_chunk; + Chunk current_output_chunk; Chunk totals; - bool has_input = false; - bool finished_input = false; - bool finished_generate = false; - - virtual void consume(Chunk chunk) = 0; - virtual Chunk generate() = 0; - - /// This method can be called once per consume call. In case if some chunks are ready. - void setReadyChunk(Chunk chunk); - void finishConsume() { finished_input = true; } - -public: - IAccumulatingTransform(Block input_header, Block output_header); - - Status prepare() override; - void work() override; - + bool has_input = false; + bool finished_input = false; + bool finished_generate = false; + + virtual void consume(Chunk chunk) = 0; + virtual Chunk generate() = 0; + + /// This method can be called once per consume call. In case if some chunks are ready. + void setReadyChunk(Chunk chunk); + void finishConsume() { finished_input = true; } + +public: + IAccumulatingTransform(Block input_header, Block output_header); + + Status prepare() override; + void work() override; + /// Adds additional port for totals. /// If added, totals will have been ready by the first generate() call (in totals chunk). InputPort * addTotalsPort(); - InputPort & getInputPort() { return input; } - OutputPort & getOutputPort() { return output; } -}; - -} + InputPort & getInputPort() { return input; } + OutputPort & getOutputPort() { return output; } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp index 8f52bd6a4d..a2533ee4c8 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp @@ -1,44 +1,44 @@ -#include <iostream> -#include <Processors/IProcessor.h> - - -namespace DB -{ - -void IProcessor::dump() const -{ - std::cerr << getName() << "\n"; - - std::cerr << "inputs:\n"; - for (const auto & port : inputs) - std::cerr << "\t" << port.hasData() << " " << port.isFinished() << "\n"; - - std::cerr << "outputs:\n"; - for (const auto & port : outputs) - std::cerr << "\t" << port.hasData() << " " << port.isNeeded() << "\n"; -} - - -std::string IProcessor::statusToName(Status status) -{ - switch (status) - { - case Status::NeedData: - return "NeedData"; - case Status::PortFull: - return "PortFull"; - case Status::Finished: - return "Finished"; - case Status::Ready: - return "Ready"; - case Status::Async: - return "Async"; - case Status::ExpandPipeline: - return "ExpandPipeline"; - } - - __builtin_unreachable(); -} - -} - +#include <iostream> +#include <Processors/IProcessor.h> + + +namespace DB +{ + +void IProcessor::dump() const +{ + std::cerr << getName() << "\n"; + + std::cerr << "inputs:\n"; + for (const auto & port : inputs) + std::cerr << "\t" << port.hasData() << " " << port.isFinished() << "\n"; + + std::cerr << "outputs:\n"; + for (const auto & port : outputs) + std::cerr << "\t" << port.hasData() << " " << port.isNeeded() << "\n"; +} + + +std::string IProcessor::statusToName(Status status) +{ + switch (status) + { + case Status::NeedData: + return "NeedData"; + case Status::PortFull: + return "PortFull"; + case Status::Finished: + return "Finished"; + case Status::Ready: + return "Ready"; + case Status::Async: + return "Async"; + case Status::ExpandPipeline: + return "ExpandPipeline"; + } + + __builtin_unreachable(); +} + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp index 905b6a48b9..ac8f2f8b7a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp @@ -1,110 +1,110 @@ -#include <Processors/ISimpleTransform.h> - - -namespace DB -{ - -ISimpleTransform::ISimpleTransform(Block input_header_, Block output_header_, bool skip_empty_chunks_) - : IProcessor({std::move(input_header_)}, {std::move(output_header_)}) - , input(inputs.front()) - , output(outputs.front()) - , skip_empty_chunks(skip_empty_chunks_) -{ -} - -ISimpleTransform::Status ISimpleTransform::prepare() -{ - /// Check can output. - - if (output.isFinished()) - { - input.close(); - return Status::Finished; - } - - if (!output.canPush()) - { - input.setNotNeeded(); - return Status::PortFull; - } - - /// Output if has data. - if (has_output) - { - output.pushData(std::move(output_data)); - has_output = false; - - if (!no_more_data_needed) - return Status::PortFull; - - } - - /// Stop if don't need more data. - if (no_more_data_needed) - { - input.close(); - output.finish(); - return Status::Finished; - } - - /// Check can input. - if (!has_input) - { - if (input.isFinished()) - { - output.finish(); - return Status::Finished; - } - - input.setNeeded(); - - if (!input.hasData()) - return Status::NeedData; - - input_data = input.pullData(set_input_not_needed_after_read); - has_input = true; - - if (input_data.exception) - /// No more data needed. Exception will be thrown (or swallowed) later. - input.setNotNeeded(); - } - - /// Now transform. - return Status::Ready; -} - -void ISimpleTransform::work() -{ - if (input_data.exception) - { - /// Skip transform in case of exception. - output_data = std::move(input_data); - has_input = false; - has_output = true; - return; - } - - try - { - transform(input_data.chunk, output_data.chunk); - } - catch (DB::Exception &) - { - output_data.exception = std::current_exception(); - has_output = true; - has_input = false; - return; - } - - has_input = !needInputData(); - - if (!skip_empty_chunks || output_data.chunk) - has_output = true; - - if (has_output && !output_data.chunk && getOutputPort().getHeader()) - /// Support invariant that chunks must have the same number of columns as header. - output_data.chunk = Chunk(getOutputPort().getHeader().cloneEmpty().getColumns(), 0); -} - -} - +#include <Processors/ISimpleTransform.h> + + +namespace DB +{ + +ISimpleTransform::ISimpleTransform(Block input_header_, Block output_header_, bool skip_empty_chunks_) + : IProcessor({std::move(input_header_)}, {std::move(output_header_)}) + , input(inputs.front()) + , output(outputs.front()) + , skip_empty_chunks(skip_empty_chunks_) +{ +} + +ISimpleTransform::Status ISimpleTransform::prepare() +{ + /// Check can output. + + if (output.isFinished()) + { + input.close(); + return Status::Finished; + } + + if (!output.canPush()) + { + input.setNotNeeded(); + return Status::PortFull; + } + + /// Output if has data. + if (has_output) + { + output.pushData(std::move(output_data)); + has_output = false; + + if (!no_more_data_needed) + return Status::PortFull; + + } + + /// Stop if don't need more data. + if (no_more_data_needed) + { + input.close(); + output.finish(); + return Status::Finished; + } + + /// Check can input. + if (!has_input) + { + if (input.isFinished()) + { + output.finish(); + return Status::Finished; + } + + input.setNeeded(); + + if (!input.hasData()) + return Status::NeedData; + + input_data = input.pullData(set_input_not_needed_after_read); + has_input = true; + + if (input_data.exception) + /// No more data needed. Exception will be thrown (or swallowed) later. + input.setNotNeeded(); + } + + /// Now transform. + return Status::Ready; +} + +void ISimpleTransform::work() +{ + if (input_data.exception) + { + /// Skip transform in case of exception. + output_data = std::move(input_data); + has_input = false; + has_output = true; + return; + } + + try + { + transform(input_data.chunk, output_data.chunk); + } + catch (DB::Exception &) + { + output_data.exception = std::current_exception(); + has_output = true; + has_input = false; + return; + } + + has_input = !needInputData(); + + if (!skip_empty_chunks || output_data.chunk) + has_output = true; + + if (has_output && !output_data.chunk && getOutputPort().getHeader()) + /// Support invariant that chunks must have the same number of columns as header. + output_data.chunk = Chunk(getOutputPort().getHeader().cloneEmpty().getColumns(), 0); +} + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h index 20134b59dd..ee92b574d7 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h @@ -1,61 +1,61 @@ -#pragma once - -#include <Processors/IProcessor.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -/** Has one input and one output. - * Simply pull a block from input, transform it, and push it to output. - */ -class ISimpleTransform : public IProcessor -{ -protected: - InputPort & input; - OutputPort & output; - - Port::Data input_data; - Port::Data output_data; - bool has_input = false; - bool has_output = false; - bool no_more_data_needed = false; - const bool skip_empty_chunks; - - /// Set input port NotNeeded after chunk was pulled. - /// Input port will become needed again only after data was transformed. - /// This allows to escape caching chunks in input port, which can lead to uneven data distribution. - bool set_input_not_needed_after_read = true; - - virtual void transform(Chunk &) - { - throw Exception("Method transform is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - virtual void transform(Chunk & input_chunk, Chunk & output_chunk) - { - transform(input_chunk); - output_chunk.swap(input_chunk); - } - - virtual bool needInputData() const { return true; } - void stopReading() { no_more_data_needed = true; } - -public: - ISimpleTransform(Block input_header_, Block output_header_, bool skip_empty_chunks_); - - Status prepare() override; - void work() override; - - InputPort & getInputPort() { return input; } - OutputPort & getOutputPort() { return output; } - - void setInputNotNeededAfterRead(bool value) { set_input_not_needed_after_read = value; } -}; - -} +#pragma once + +#include <Processors/IProcessor.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +/** Has one input and one output. + * Simply pull a block from input, transform it, and push it to output. + */ +class ISimpleTransform : public IProcessor +{ +protected: + InputPort & input; + OutputPort & output; + + Port::Data input_data; + Port::Data output_data; + bool has_input = false; + bool has_output = false; + bool no_more_data_needed = false; + const bool skip_empty_chunks; + + /// Set input port NotNeeded after chunk was pulled. + /// Input port will become needed again only after data was transformed. + /// This allows to escape caching chunks in input port, which can lead to uneven data distribution. + bool set_input_not_needed_after_read = true; + + virtual void transform(Chunk &) + { + throw Exception("Method transform is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + virtual void transform(Chunk & input_chunk, Chunk & output_chunk) + { + transform(input_chunk); + output_chunk.swap(input_chunk); + } + + virtual bool needInputData() const { return true; } + void stopReading() { no_more_data_needed = true; } + +public: + ISimpleTransform(Block input_header_, Block output_header_, bool skip_empty_chunks_); + + Status prepare() override; + void work() override; + + InputPort & getInputPort() { return input; } + OutputPort & getOutputPort() { return output; } + + void setInputNotNeededAfterRead(bool value) { set_input_not_needed_after_read = value; } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp index 4b5ef0f8df..0de3ed37a6 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp @@ -1,41 +1,41 @@ -#include <Processors/ISink.h> - - -namespace DB -{ - -ISink::ISink(Block header) - : IProcessor({std::move(header)}, {}), input(inputs.front()) -{ -} - -ISink::Status ISink::prepare() -{ +#include <Processors/ISink.h> + + +namespace DB +{ + +ISink::ISink(Block header) + : IProcessor({std::move(header)}, {}), input(inputs.front()) +{ +} + +ISink::Status ISink::prepare() +{ if (!was_on_start_called) return Status::Ready; - if (has_input) - return Status::Ready; - - if (input.isFinished()) - { + if (has_input) + return Status::Ready; + + if (input.isFinished()) + { if (!was_on_finish_called) return Status::Ready; - return Status::Finished; - } - - input.setNeeded(); - if (!input.hasData()) - return Status::NeedData; - + return Status::Finished; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + current_chunk = input.pull(true); - has_input = true; - return Status::Ready; -} - -void ISink::work() -{ + has_input = true; + return Status::Ready; +} + +void ISink::work() +{ if (!was_on_start_called) { was_on_start_called = true; @@ -51,6 +51,6 @@ void ISink::work() was_on_finish_called = true; onFinish(); } -} - -} +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h index 1406583e61..f960def1cd 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h @@ -1,31 +1,31 @@ -#pragma once - -#include <Processors/IProcessor.h> - - -namespace DB -{ - -class ISink : public IProcessor -{ -protected: - InputPort & input; - Chunk current_chunk; - bool has_input = false; +#pragma once + +#include <Processors/IProcessor.h> + + +namespace DB +{ + +class ISink : public IProcessor +{ +protected: + InputPort & input; + Chunk current_chunk; + bool has_input = false; bool was_on_start_called = false; bool was_on_finish_called = false; - - virtual void consume(Chunk block) = 0; + + virtual void consume(Chunk block) = 0; virtual void onStart() {} - virtual void onFinish() {} - -public: - explicit ISink(Block header); - - Status prepare() override; - void work() override; - - InputPort & getPort() { return input; } -}; - -} + virtual void onFinish() {} + +public: + explicit ISink(Block header); + + Status prepare() override; + void work() override; + + InputPort & getPort() { return input; } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp index d1c06046b6..36c58e1454 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp @@ -1,296 +1,296 @@ -#include <Processors/LimitTransform.h> - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -LimitTransform::LimitTransform( +#include <Processors/LimitTransform.h> + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +LimitTransform::LimitTransform( const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams, - bool always_read_till_end_, bool with_ties_, - SortDescription description_) - : IProcessor(InputPorts(num_streams, header_), OutputPorts(num_streams, header_)) - , limit(limit_), offset(offset_) - , always_read_till_end(always_read_till_end_) - , with_ties(with_ties_), description(std::move(description_)) -{ - if (num_streams != 1 && with_ties) - throw Exception("Cannot use LimitTransform with multiple ports and ties.", ErrorCodes::LOGICAL_ERROR); - - ports_data.resize(num_streams); - - size_t cur_stream = 0; - for (auto & input : inputs) - { - ports_data[cur_stream].input_port = &input; - ++cur_stream; - } - - cur_stream = 0; - for (auto & output : outputs) - { - ports_data[cur_stream].output_port = &output; - ++cur_stream; - } - - for (const auto & desc : description) - { - if (!desc.column_name.empty()) - sort_column_positions.push_back(header_.getPositionByName(desc.column_name)); - else - sort_column_positions.push_back(desc.column_number); - } -} - + bool always_read_till_end_, bool with_ties_, + SortDescription description_) + : IProcessor(InputPorts(num_streams, header_), OutputPorts(num_streams, header_)) + , limit(limit_), offset(offset_) + , always_read_till_end(always_read_till_end_) + , with_ties(with_ties_), description(std::move(description_)) +{ + if (num_streams != 1 && with_ties) + throw Exception("Cannot use LimitTransform with multiple ports and ties.", ErrorCodes::LOGICAL_ERROR); + + ports_data.resize(num_streams); + + size_t cur_stream = 0; + for (auto & input : inputs) + { + ports_data[cur_stream].input_port = &input; + ++cur_stream; + } + + cur_stream = 0; + for (auto & output : outputs) + { + ports_data[cur_stream].output_port = &output; + ++cur_stream; + } + + for (const auto & desc : description) + { + if (!desc.column_name.empty()) + sort_column_positions.push_back(header_.getPositionByName(desc.column_name)); + else + sort_column_positions.push_back(desc.column_number); + } +} + Chunk LimitTransform::makeChunkWithPreviousRow(const Chunk & chunk, UInt64 row) const -{ - assert(row < chunk.getNumRows()); - ColumnRawPtrs current_columns = extractSortColumns(chunk.getColumns()); - MutableColumns last_row_sort_columns; - for (size_t i = 0; i < current_columns.size(); ++i) - { - last_row_sort_columns.emplace_back(current_columns[i]->cloneEmpty()); - last_row_sort_columns[i]->insertFrom(*current_columns[i], row); - } - return Chunk(std::move(last_row_sort_columns), 1); -} - - -IProcessor::Status LimitTransform::prepare( - const PortNumbers & updated_input_ports, - const PortNumbers & updated_output_ports) -{ - bool has_full_port = false; - - auto process_pair = [&](size_t pos) - { - auto status = preparePair(ports_data[pos]); - - switch (status) - { - case IProcessor::Status::Finished: - { - if (!ports_data[pos].is_finished) - { - ports_data[pos].is_finished = true; - ++num_finished_port_pairs; - } - - return; - } - case IProcessor::Status::PortFull: - { - has_full_port = true; - return; - } - case IProcessor::Status::NeedData: - return; - default: - throw Exception( - "Unexpected status for LimitTransform::preparePair : " + IProcessor::statusToName(status), - ErrorCodes::LOGICAL_ERROR); - } - }; - - for (auto pos : updated_input_ports) - process_pair(pos); - - for (auto pos : updated_output_ports) - process_pair(pos); - - /// All ports are finished. It may happen even before we reached the limit (has less data then limit). - if (num_finished_port_pairs == ports_data.size()) - return Status::Finished; - +{ + assert(row < chunk.getNumRows()); + ColumnRawPtrs current_columns = extractSortColumns(chunk.getColumns()); + MutableColumns last_row_sort_columns; + for (size_t i = 0; i < current_columns.size(); ++i) + { + last_row_sort_columns.emplace_back(current_columns[i]->cloneEmpty()); + last_row_sort_columns[i]->insertFrom(*current_columns[i], row); + } + return Chunk(std::move(last_row_sort_columns), 1); +} + + +IProcessor::Status LimitTransform::prepare( + const PortNumbers & updated_input_ports, + const PortNumbers & updated_output_ports) +{ + bool has_full_port = false; + + auto process_pair = [&](size_t pos) + { + auto status = preparePair(ports_data[pos]); + + switch (status) + { + case IProcessor::Status::Finished: + { + if (!ports_data[pos].is_finished) + { + ports_data[pos].is_finished = true; + ++num_finished_port_pairs; + } + + return; + } + case IProcessor::Status::PortFull: + { + has_full_port = true; + return; + } + case IProcessor::Status::NeedData: + return; + default: + throw Exception( + "Unexpected status for LimitTransform::preparePair : " + IProcessor::statusToName(status), + ErrorCodes::LOGICAL_ERROR); + } + }; + + for (auto pos : updated_input_ports) + process_pair(pos); + + for (auto pos : updated_output_ports) + process_pair(pos); + + /// All ports are finished. It may happen even before we reached the limit (has less data then limit). + if (num_finished_port_pairs == ports_data.size()) + return Status::Finished; + bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset); - /// If we reached limit for some port, then close others. Otherwise some sources may infinitely read data. - /// Example: SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1 + /// If we reached limit for some port, then close others. Otherwise some sources may infinitely read data. + /// Example: SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1 if ((!limit_is_unreachable && rows_read >= offset + limit) && !previous_row_chunk && !always_read_till_end) - { - for (auto & input : inputs) - input.close(); - - for (auto & output : outputs) - output.finish(); - - return Status::Finished; - } - - if (has_full_port) - return Status::PortFull; - - return Status::NeedData; -} - -LimitTransform::Status LimitTransform::prepare() -{ - if (ports_data.size() != 1) - throw Exception("prepare without arguments is not supported for multi-port LimitTransform.", - ErrorCodes::LOGICAL_ERROR); - - return prepare({0}, {0}); -} - -LimitTransform::Status LimitTransform::preparePair(PortsData & data) -{ - auto & output = *data.output_port; - auto & input = *data.input_port; - - /// Check can output. - bool output_finished = false; - if (output.isFinished()) - { - output_finished = true; - if (!always_read_till_end) - { - input.close(); - return Status::Finished; - } - } - - if (!output_finished && !output.canPush()) - { - input.setNotNeeded(); - return Status::PortFull; - } - + { + for (auto & input : inputs) + input.close(); + + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + if (has_full_port) + return Status::PortFull; + + return Status::NeedData; +} + +LimitTransform::Status LimitTransform::prepare() +{ + if (ports_data.size() != 1) + throw Exception("prepare without arguments is not supported for multi-port LimitTransform.", + ErrorCodes::LOGICAL_ERROR); + + return prepare({0}, {0}); +} + +LimitTransform::Status LimitTransform::preparePair(PortsData & data) +{ + auto & output = *data.output_port; + auto & input = *data.input_port; + + /// Check can output. + bool output_finished = false; + if (output.isFinished()) + { + output_finished = true; + if (!always_read_till_end) + { + input.close(); + return Status::Finished; + } + } + + if (!output_finished && !output.canPush()) + { + input.setNotNeeded(); + return Status::PortFull; + } + bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset); - /// Check if we are done with pushing. + /// Check if we are done with pushing. bool is_limit_reached = !limit_is_unreachable && rows_read >= offset + limit && !previous_row_chunk; - if (is_limit_reached) - { - if (!always_read_till_end) - { - output.finish(); - input.close(); - return Status::Finished; - } - } - - /// Check can input. - - if (input.isFinished()) - { - output.finish(); - return Status::Finished; - } - - input.setNeeded(); - if (!input.hasData()) - return Status::NeedData; - - data.current_chunk = input.pull(true); - - auto rows = data.current_chunk.getNumRows(); - - if (rows_before_limit_at_least) - rows_before_limit_at_least->add(rows); - - /// Skip block (for 'always_read_till_end' case). - if (is_limit_reached || output_finished) - { - data.current_chunk.clear(); - if (input.isFinished()) - { - output.finish(); - return Status::Finished; - } - - /// Now, we pulled from input, and it must be empty. - input.setNeeded(); - return Status::NeedData; - } - - /// Process block. - - rows_read += rows; - - if (rows_read <= offset) - { - data.current_chunk.clear(); - - if (input.isFinished()) - { - output.finish(); - return Status::Finished; - } - - /// Now, we pulled from input, and it must be empty. - input.setNeeded(); - return Status::NeedData; - } - + if (is_limit_reached) + { + if (!always_read_till_end) + { + output.finish(); + input.close(); + return Status::Finished; + } + } + + /// Check can input. + + if (input.isFinished()) + { + output.finish(); + return Status::Finished; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + data.current_chunk = input.pull(true); + + auto rows = data.current_chunk.getNumRows(); + + if (rows_before_limit_at_least) + rows_before_limit_at_least->add(rows); + + /// Skip block (for 'always_read_till_end' case). + if (is_limit_reached || output_finished) + { + data.current_chunk.clear(); + if (input.isFinished()) + { + output.finish(); + return Status::Finished; + } + + /// Now, we pulled from input, and it must be empty. + input.setNeeded(); + return Status::NeedData; + } + + /// Process block. + + rows_read += rows; + + if (rows_read <= offset) + { + data.current_chunk.clear(); + + if (input.isFinished()) + { + output.finish(); + return Status::Finished; + } + + /// Now, we pulled from input, and it must be empty. + input.setNeeded(); + return Status::NeedData; + } + if (rows <= std::numeric_limits<UInt64>::max() - offset && rows_read >= offset + rows && !limit_is_unreachable && rows_read <= offset + limit) - { - /// Return the whole chunk. - - /// Save the last row of current chunk to check if next block begins with the same row (for WITH TIES). - if (with_ties && rows_read == offset + limit) - previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, data.current_chunk.getNumRows() - 1); - } - else + { + /// Return the whole chunk. + + /// Save the last row of current chunk to check if next block begins with the same row (for WITH TIES). + if (with_ties && rows_read == offset + limit) + previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, data.current_chunk.getNumRows() - 1); + } + else /// This function may be heavy to execute in prepare. But it happens no more than twice, and make code simpler. - splitChunk(data); - - bool may_need_more_data_for_ties = previous_row_chunk || rows_read - rows <= offset + limit; - /// No more data is needed. + splitChunk(data); + + bool may_need_more_data_for_ties = previous_row_chunk || rows_read - rows <= offset + limit; + /// No more data is needed. if (!always_read_till_end && !limit_is_unreachable && rows_read >= offset + limit && !may_need_more_data_for_ties) - input.close(); - - output.push(std::move(data.current_chunk)); - - return Status::PortFull; -} - - -void LimitTransform::splitChunk(PortsData & data) -{ - auto current_chunk_sort_columns = extractSortColumns(data.current_chunk.getColumns()); + input.close(); + + output.push(std::move(data.current_chunk)); + + return Status::PortFull; +} + + +void LimitTransform::splitChunk(PortsData & data) +{ + auto current_chunk_sort_columns = extractSortColumns(data.current_chunk.getColumns()); UInt64 num_rows = data.current_chunk.getNumRows(); UInt64 num_columns = data.current_chunk.getNumColumns(); - + bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset); if (previous_row_chunk && !limit_is_unreachable && rows_read >= offset + limit) - { - /// Scan until the first row, which is not equal to previous_row_chunk (for WITH TIES) + { + /// Scan until the first row, which is not equal to previous_row_chunk (for WITH TIES) UInt64 current_row_num = 0; - for (; current_row_num < num_rows; ++current_row_num) - { - if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num)) - break; - } - - auto columns = data.current_chunk.detachColumns(); - - if (current_row_num < num_rows) - { - previous_row_chunk = {}; + for (; current_row_num < num_rows; ++current_row_num) + { + if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num)) + break; + } + + auto columns = data.current_chunk.detachColumns(); + + if (current_row_num < num_rows) + { + previous_row_chunk = {}; for (UInt64 i = 0; i < num_columns; ++i) - columns[i] = columns[i]->cut(0, current_row_num); - } - - data.current_chunk.setColumns(std::move(columns), current_row_num); - return; - } - - /// return a piece of the block + columns[i] = columns[i]->cut(0, current_row_num); + } + + data.current_chunk.setColumns(std::move(columns), current_row_num); + return; + } + + /// return a piece of the block UInt64 start = 0; - + /// ------------[....(...).] /// <----------------------> rows_read /// <----------> num_rows /// <---------------> offset /// <---> start - + assert(offset < rows_read); if (offset + num_rows > rows_read) @@ -324,55 +324,55 @@ void LimitTransform::splitChunk(PortsData & data) length = offset + limit - (rows_read - num_rows) - start; } - /// check if other rows in current block equals to last one in limit - if (with_ties && length) - { + /// check if other rows in current block equals to last one in limit + if (with_ties && length) + { UInt64 current_row_num = start + length; - previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, current_row_num - 1); - - for (; current_row_num < num_rows; ++current_row_num) - { - if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num)) - { - previous_row_chunk = {}; - break; - } - } - - length = current_row_num - start; - } - - if (length == num_rows) - return; - - auto columns = data.current_chunk.detachColumns(); - + previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, current_row_num - 1); + + for (; current_row_num < num_rows; ++current_row_num) + { + if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num)) + { + previous_row_chunk = {}; + break; + } + } + + length = current_row_num - start; + } + + if (length == num_rows) + return; + + auto columns = data.current_chunk.detachColumns(); + for (UInt64 i = 0; i < num_columns; ++i) - columns[i] = columns[i]->cut(start, length); - - data.current_chunk.setColumns(std::move(columns), length); -} - -ColumnRawPtrs LimitTransform::extractSortColumns(const Columns & columns) const -{ - ColumnRawPtrs res; - res.reserve(description.size()); - for (size_t pos : sort_column_positions) - res.push_back(columns[pos].get()); - - return res; -} - + columns[i] = columns[i]->cut(start, length); + + data.current_chunk.setColumns(std::move(columns), length); +} + +ColumnRawPtrs LimitTransform::extractSortColumns(const Columns & columns) const +{ + ColumnRawPtrs res; + res.reserve(description.size()); + for (size_t pos : sort_column_positions) + res.push_back(columns[pos].get()); + + return res; +} + bool LimitTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const -{ - assert(current_chunk_sort_columns.size() == previous_row_chunk.getNumColumns()); - size_t size = current_chunk_sort_columns.size(); - const auto & previous_row_sort_columns = previous_row_chunk.getColumns(); - for (size_t i = 0; i < size; ++i) - if (0 != current_chunk_sort_columns[i]->compareAt(current_chunk_row_num, 0, *previous_row_sort_columns[i], 1)) - return false; - return true; -} - -} - +{ + assert(current_chunk_sort_columns.size() == previous_row_chunk.getNumColumns()); + size_t size = current_chunk_sort_columns.size(); + const auto & previous_row_sort_columns = previous_row_chunk.getColumns(); + for (size_t i = 0; i < size; ++i) + if (0 != current_chunk_sort_columns[i]->compareAt(current_chunk_row_num, 0, *previous_row_sort_columns[i], 1)) + return false; + return true; +} + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h index 46ffc891c2..8865eab732 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h @@ -1,72 +1,72 @@ -#pragma once - -#include <Processors/IProcessor.h> -#include <Processors/RowsBeforeLimitCounter.h> -#include <Core/SortDescription.h> - -namespace DB -{ - -/// Implementation for LIMIT N OFFSET M -/// This processor support multiple inputs and outputs (the same number). -/// Each pair of input and output port works independently. -/// The reason to have multiple ports is to be able to stop all sources when limit is reached, in a query like: -/// SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1 -/// -/// always_read_till_end - read all data from input ports even if limit was reached. -/// with_ties, description - implementation of LIMIT WITH TIES. It works only for single port. -class LimitTransform : public IProcessor -{ -private: +#pragma once + +#include <Processors/IProcessor.h> +#include <Processors/RowsBeforeLimitCounter.h> +#include <Core/SortDescription.h> + +namespace DB +{ + +/// Implementation for LIMIT N OFFSET M +/// This processor support multiple inputs and outputs (the same number). +/// Each pair of input and output port works independently. +/// The reason to have multiple ports is to be able to stop all sources when limit is reached, in a query like: +/// SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1 +/// +/// always_read_till_end - read all data from input ports even if limit was reached. +/// with_ties, description - implementation of LIMIT WITH TIES. It works only for single port. +class LimitTransform : public IProcessor +{ +private: UInt64 limit; UInt64 offset; - - bool always_read_till_end; - - bool with_ties; - const SortDescription description; - - Chunk previous_row_chunk; /// for WITH TIES, contains only sort columns - std::vector<size_t> sort_column_positions; - + + bool always_read_till_end; + + bool with_ties; + const SortDescription description; + + Chunk previous_row_chunk; /// for WITH TIES, contains only sort columns + std::vector<size_t> sort_column_positions; + UInt64 rows_read = 0; /// including the last read block - RowsBeforeLimitCounterPtr rows_before_limit_at_least; - - /// State of port's pair. - /// Chunks from different port pairs are not mixed for better cache locality. - struct PortsData - { - Chunk current_chunk; - - InputPort * input_port = nullptr; - OutputPort * output_port = nullptr; - bool is_finished = false; - }; - - std::vector<PortsData> ports_data; - size_t num_finished_port_pairs = 0; - + RowsBeforeLimitCounterPtr rows_before_limit_at_least; + + /// State of port's pair. + /// Chunks from different port pairs are not mixed for better cache locality. + struct PortsData + { + Chunk current_chunk; + + InputPort * input_port = nullptr; + OutputPort * output_port = nullptr; + bool is_finished = false; + }; + + std::vector<PortsData> ports_data; + size_t num_finished_port_pairs = 0; + Chunk makeChunkWithPreviousRow(const Chunk & current_chunk, UInt64 row_num) const; - ColumnRawPtrs extractSortColumns(const Columns & columns) const; + ColumnRawPtrs extractSortColumns(const Columns & columns) const; bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const; - -public: - LimitTransform( + +public: + LimitTransform( const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams = 1, - bool always_read_till_end_ = false, bool with_ties_ = false, - SortDescription description_ = {}); - - String getName() const override { return "Limit"; } - - Status prepare(const PortNumbers & /*updated_input_ports*/, const PortNumbers & /*updated_output_ports*/) override; - Status prepare() override; /// Compatibility for TreeExecutor. - Status preparePair(PortsData & data); - void splitChunk(PortsData & data); - - InputPort & getInputPort() { return inputs.front(); } - OutputPort & getOutputPort() { return outputs.front(); } - - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_at_least.swap(counter); } -}; - -} + bool always_read_till_end_ = false, bool with_ties_ = false, + SortDescription description_ = {}); + + String getName() const override { return "Limit"; } + + Status prepare(const PortNumbers & /*updated_input_ports*/, const PortNumbers & /*updated_output_ports*/) override; + Status prepare() override; /// Compatibility for TreeExecutor. + Status preparePair(PortsData & data); + void splitChunk(PortsData & data); + + InputPort & getInputPort() { return inputs.front(); } + OutputPort & getOutputPort() { return outputs.front(); } + + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_at_least.swap(counter); } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp index 3ad0687fbd..e0da79f148 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp @@ -1,6 +1,6 @@ -#include <Processors/Pipe.h> -#include <IO/WriteHelpers.h> -#include <Processors/Sources/SourceFromInputStream.h> +#include <Processors/Pipe.h> +#include <IO/WriteHelpers.h> +#include <Processors/Sources/SourceFromInputStream.h> #include <Processors/ResizeProcessor.h> #include <Processors/ConcatProcessor.h> #include <Processors/LimitTransform.h> @@ -10,17 +10,17 @@ #include <Processors/Formats/IOutputFormat.h> #include <Processors/Sources/NullSource.h> #include <Columns/ColumnConst.h> - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + static void checkSource(const IProcessor & source) -{ +{ if (!source.getInputs().empty()) throw Exception("Source for pipe shouldn't have any input, but " + source.getName() + " has " + toString(source.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); @@ -32,10 +32,10 @@ static void checkSource(const IProcessor & source) if (source.getOutputs().size() > 1) throw Exception("Source for pipe should have single output, but " + source.getName() + " has " + toString(source.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); -} - +} + static OutputPort * uniteExtremes(const OutputPortRawPtrs & ports, const Block & header, Processors & processors) -{ +{ if (ports.empty()) return nullptr; @@ -66,10 +66,10 @@ static OutputPort * uniteExtremes(const OutputPortRawPtrs & ports, const Block & processors.emplace_back(std::move(sink)); return extremes_port; -} - +} + static OutputPort * uniteTotals(const OutputPortRawPtrs & ports, const Block & header, Processors & processors) -{ +{ if (ports.empty()) return nullptr; @@ -96,10 +96,10 @@ static OutputPort * uniteTotals(const OutputPortRawPtrs & ports, const Block & h processors.emplace_back(std::move(limit)); return totals_port; -} - +} + Pipe::Holder & Pipe::Holder::operator=(Holder && rhs) -{ +{ table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); interpreter_context.insert(interpreter_context.end(), @@ -110,18 +110,18 @@ Pipe::Holder & Pipe::Holder::operator=(Holder && rhs) query_id_holder = std::move(rhs.query_id_holder); return *this; -} - +} + Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes) -{ +{ if (!source->getInputs().empty()) throw Exception("Source for pipe shouldn't have any input, but " + source->getName() + " has " + toString(source->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); - + if (!output) throw Exception("Cannot create Pipe from source because specified output port is nullptr", - ErrorCodes::LOGICAL_ERROR); - + ErrorCodes::LOGICAL_ERROR); + if (output == totals || output == extremes || (totals && totals == extremes)) throw Exception("Cannot create Pipe from source because some of specified ports are the same", ErrorCodes::LOGICAL_ERROR); @@ -163,30 +163,30 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output output_ports.push_back(output); processors.emplace_back(std::move(source)); max_parallel_streams = 1; -} - -Pipe::Pipe(ProcessorPtr source) -{ - if (auto * source_from_input_stream = typeid_cast<SourceFromInputStream *>(source.get())) - { +} + +Pipe::Pipe(ProcessorPtr source) +{ + if (auto * source_from_input_stream = typeid_cast<SourceFromInputStream *>(source.get())) + { /// Special case for SourceFromInputStream. Will remove it later. totals_port = source_from_input_stream->getTotalsPort(); extremes_port = source_from_input_stream->getExtremesPort(); - } - else if (source->getOutputs().size() != 1) - checkSource(*source); - + } + else if (source->getOutputs().size() != 1) + checkSource(*source); + if (collected_processors) collected_processors->emplace_back(source); - + output_ports.push_back(&source->getOutputs().front()); header = output_ports.front()->getHeader(); - processors.emplace_back(std::move(source)); - max_parallel_streams = 1; -} - + processors.emplace_back(std::move(source)); + max_parallel_streams = 1; +} + Pipe::Pipe(Processors processors_) : processors(std::move(processors_)) -{ +{ /// Create hash table with processors. std::unordered_set<const IProcessor *> set; for (const auto & processor : processors) @@ -236,19 +236,19 @@ Pipe::Pipe(Processors processors_) : processors(std::move(processors_)) if (collected_processors) for (const auto & processor : processors) collected_processors->emplace_back(processor); -} - +} + static Pipes removeEmptyPipes(Pipes pipes) -{ +{ Pipes res; res.reserve(pipes.size()); - + for (auto & pipe : pipes) { if (!pipe.empty()) res.emplace_back(std::move(pipe)); } - + return res; } @@ -302,7 +302,7 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow { Pipe res; - for (auto & pipe : pipes) + for (auto & pipe : pipes) res.holder = std::move(pipe.holder); /// see move assignment for Pipe::Holder. pipes = removeEmptyPipes(std::move(pipes)); @@ -311,11 +311,11 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow return res; if (pipes.size() == 1) - { + { pipes[0].holder = std::move(res.holder); return std::move(pipes[0]); } - + OutputPortRawPtrs totals; OutputPortRawPtrs extremes; res.collected_processors = collected_processors; @@ -336,8 +336,8 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow if (pipe.extremes_port) extremes.emplace_back(pipe.extremes_port); - } - + } + size_t num_processors = res.processors.size(); res.totals_port = uniteTotals(totals, res.header, res.processors); @@ -350,10 +350,10 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow } return res; -} - +} + void Pipe::addSource(ProcessorPtr source) -{ +{ checkSource(*source); const auto & source_header = source->getOutputs().front().getHeader(); @@ -369,10 +369,10 @@ void Pipe::addSource(ProcessorPtr source) processors.emplace_back(std::move(source)); max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size()); -} - +} + void Pipe::addTotalsSource(ProcessorPtr source) -{ +{ if (output_ports.empty()) throw Exception("Cannot add totals source to empty Pipe.", ErrorCodes::LOGICAL_ERROR); @@ -389,10 +389,10 @@ void Pipe::addTotalsSource(ProcessorPtr source) totals_port = &source->getOutputs().front(); processors.emplace_back(std::move(source)); -} - +} + void Pipe::addExtremesSource(ProcessorPtr source) -{ +{ if (output_ports.empty()) throw Exception("Cannot add extremes source to empty Pipe.", ErrorCodes::LOGICAL_ERROR); @@ -409,10 +409,10 @@ void Pipe::addExtremesSource(ProcessorPtr source) extremes_port = &source->getOutputs().front(); processors.emplace_back(std::move(source)); -} - +} + static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors) -{ +{ if (port == nullptr) return; @@ -467,10 +467,10 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort size_t next_output = 0; for (auto & input : inputs) - { + { connect(*output_ports[next_output], input); ++next_output; - } + } auto & outputs = transform->getOutputs(); @@ -519,8 +519,8 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort processors.emplace_back(std::move(transform)); max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size()); -} - +} + void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes) { if (output_ports.empty()) @@ -607,14 +607,14 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * } void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) -{ +{ if (output_ports.empty()) throw Exception("Cannot add simple transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR); Block new_header; auto add_transform = [&](OutputPort *& port, StreamType stream_type) - { + { if (!port) return; @@ -784,7 +784,7 @@ void Pipe::transform(const Transformer & transformer) port->getHeader().dumpStructure() + ") is not connected", ErrorCodes::LOGICAL_ERROR); set.emplace(&port->getProcessor()); - } + } output_ports.clear(); @@ -842,17 +842,17 @@ void Pipe::transform(const Transformer & transformer) processors.insert(processors.end(), new_processors.begin(), new_processors.end()); max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size()); -} - +} + void Pipe::setLimits(const StreamLocalLimits & limits) -{ - for (auto & processor : processors) - { +{ + for (auto & processor : processors) + { if (auto * source_with_progress = dynamic_cast<ISourceWithProgress *>(processor.get())) source_with_progress->setLimits(limits); - } -} - + } +} + void Pipe::setLeafLimits(const SizeLimits & leaf_limits) { for (auto & processor : processors) @@ -863,12 +863,12 @@ void Pipe::setLeafLimits(const SizeLimits & leaf_limits) } void Pipe::setQuota(const std::shared_ptr<const EnabledQuota> & quota) -{ - for (auto & processor : processors) - { +{ + for (auto & processor : processors) + { if (auto * source_with_progress = dynamic_cast<ISourceWithProgress *>(processor.get())) source_with_progress->setQuota(quota); - } -} - -} + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp index 02add2e09e..0a6026b27f 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp @@ -1,27 +1,27 @@ -#include <Processors/Port.h> -#include <Processors/IProcessor.h> - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -void connect(OutputPort & output, InputPort & input) -{ - if (input.state || output.state) - throw Exception("Port is already connected", ErrorCodes::LOGICAL_ERROR); - - auto out_name = output.getProcessor().getName(); - auto in_name = input.getProcessor().getName(); - +#include <Processors/Port.h> +#include <Processors/IProcessor.h> + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void connect(OutputPort & output, InputPort & input) +{ + if (input.state || output.state) + throw Exception("Port is already connected", ErrorCodes::LOGICAL_ERROR); + + auto out_name = output.getProcessor().getName(); + auto in_name = input.getProcessor().getName(); + assertCompatibleHeader(output.getHeader(), input.getHeader(), " function connect between " + out_name + " and " + in_name); - - input.output_port = &output; - output.input_port = &input; - input.state = std::make_shared<Port::State>(); - output.state = input.state; -} - -} + + input.output_port = &output; + output.input_port = &input; + input.state = std::make_shared<Port::State>(); + output.state = input.state; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp index d51772cafc..d652a34215 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp @@ -1,421 +1,421 @@ -#include <Processors/ResizeProcessor.h> -#include <iostream> - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -ResizeProcessor::Status ResizeProcessor::prepare() -{ - bool is_first_output = true; - auto output_end = current_output; - - bool all_outs_full_or_unneeded = true; - bool all_outs_finished = true; - - bool is_first_input = true; - auto input_end = current_input; - - bool all_inputs_finished = true; - - auto is_end_input = [&]() { return !is_first_input && current_input == input_end; }; - auto is_end_output = [&]() { return !is_first_output && current_output == output_end; }; - - auto inc_current_input = [&]() - { - is_first_input = false; - ++current_input; - - if (current_input == inputs.end()) - current_input = inputs.begin(); - }; - - auto inc_current_output = [&]() - { - is_first_output = false; - ++current_output; - - if (current_output == outputs.end()) - current_output = outputs.begin(); - }; - - /// Find next output where can push. - auto get_next_out = [&, this]() -> OutputPorts::iterator - { - while (!is_end_output()) - { - if (!current_output->isFinished()) - { - all_outs_finished = false; - - if (current_output->canPush()) - { - all_outs_full_or_unneeded = false; - auto res_output = current_output; - inc_current_output(); - return res_output; - } - } - - inc_current_output(); - } - - return outputs.end(); - }; - - /// Find next input from where can pull. - auto get_next_input = [&, this]() -> InputPorts::iterator - { - while (!is_end_input()) - { - if (!current_input->isFinished()) - { - all_inputs_finished = false; - - current_input->setNeeded(); - if (current_input->hasData()) - { - auto res_input = current_input; - inc_current_input(); - return res_input; - } - } - - inc_current_input(); - } - - return inputs.end(); - }; - - auto get_status_if_no_outputs = [&]() -> Status - { - if (all_outs_finished) - { - for (auto & in : inputs) - in.close(); - - return Status::Finished; - } - - if (all_outs_full_or_unneeded) - { - for (auto & in : inputs) - in.setNotNeeded(); - - return Status::PortFull; - } - - /// Now, we pushed to output, and it must be full. - return Status::PortFull; - }; - - auto get_status_if_no_inputs = [&]() -> Status - { - if (all_inputs_finished) - { - for (auto & out : outputs) - out.finish(); - - return Status::Finished; - } - - return Status::NeedData; - }; - - /// Set all inputs needed in order to evenly process them. - /// Otherwise, in case num_outputs < num_inputs and chunks are consumed faster than produced, - /// some inputs can be skipped. -// auto set_all_unprocessed_inputs_needed = [&]() -// { -// for (; cur_input != inputs.end(); ++cur_input) -// if (!cur_input->isFinished()) -// cur_input->setNeeded(); -// }; - - while (!is_end_input() && !is_end_output()) - { - auto output = get_next_out(); - auto input = get_next_input(); - - if (output == outputs.end()) - return get_status_if_no_outputs(); - - - if (input == inputs.end()) - return get_status_if_no_inputs(); - - output->push(input->pull()); - } - - if (is_end_input()) - return get_status_if_no_outputs(); - - /// cur_input == inputs_end() - return get_status_if_no_inputs(); -} - -IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) -{ - if (!initialized) - { - initialized = true; - - for (auto & input : inputs) - { - input.setNeeded(); - input_ports.push_back({.port = &input, .status = InputStatus::NotActive}); - } - - for (auto & output : outputs) - output_ports.push_back({.port = &output, .status = OutputStatus::NotActive}); - } - - for (const auto & output_number : updated_outputs) - { - auto & output = output_ports[output_number]; - if (output.port->isFinished()) - { - if (output.status != OutputStatus::Finished) - { - ++num_finished_outputs; - output.status = OutputStatus::Finished; - } - - continue; - } - - if (output.port->canPush()) - { - if (output.status != OutputStatus::NeedData) - { - output.status = OutputStatus::NeedData; - waiting_outputs.push(output_number); - } - } - } - - if (num_finished_outputs == outputs.size()) - { - for (auto & input : inputs) - input.close(); - - return Status::Finished; - } - - for (const auto & input_number : updated_inputs) - { - auto & input = input_ports[input_number]; - if (input.port->isFinished()) - { - if (input.status != InputStatus::Finished) - { - input.status = InputStatus::Finished; - ++num_finished_inputs; - } - continue; - } - - if (input.port->hasData()) - { - if (input.status != InputStatus::HasData) - { - input.status = InputStatus::HasData; - inputs_with_data.push(input_number); - } - } - } - - while (!waiting_outputs.empty() && !inputs_with_data.empty()) - { - auto & waiting_output = output_ports[waiting_outputs.front()]; - waiting_outputs.pop(); - - auto & input_with_data = input_ports[inputs_with_data.front()]; - inputs_with_data.pop(); - - waiting_output.port->pushData(input_with_data.port->pullData()); - input_with_data.status = InputStatus::NotActive; - waiting_output.status = OutputStatus::NotActive; - - if (input_with_data.port->isFinished()) - { - input_with_data.status = InputStatus::Finished; - ++num_finished_inputs; - } - } - - if (num_finished_inputs == inputs.size()) - { - for (auto & output : outputs) - output.finish(); - - return Status::Finished; - } - - if (!waiting_outputs.empty()) - return Status::NeedData; - - return Status::PortFull; -} - -IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) -{ - if (!initialized) - { - initialized = true; - - for (auto & input : inputs) - input_ports.push_back({.port = &input, .status = InputStatus::NotActive, .waiting_output = -1}); - - for (UInt64 i = 0; i < input_ports.size(); ++i) - disabled_input_ports.push(i); - - for (auto & output : outputs) - output_ports.push_back({.port = &output, .status = OutputStatus::NotActive}); - } - - for (const auto & output_number : updated_outputs) - { - auto & output = output_ports[output_number]; - if (output.port->isFinished()) - { - if (output.status != OutputStatus::Finished) - { - ++num_finished_outputs; - output.status = OutputStatus::Finished; - } - - continue; - } - - if (output.port->canPush()) - { - if (output.status != OutputStatus::NeedData) - { - output.status = OutputStatus::NeedData; - waiting_outputs.push(output_number); - } - } - } - - if (num_finished_outputs == outputs.size()) - { - for (auto & input : inputs) - input.close(); - - return Status::Finished; - } - - std::queue<UInt64> inputs_with_data; - - for (const auto & input_number : updated_inputs) - { - auto & input = input_ports[input_number]; - if (input.port->isFinished()) - { - if (input.status != InputStatus::Finished) - { - input.status = InputStatus::Finished; - ++num_finished_inputs; - - waiting_outputs.push(input.waiting_output); - } - continue; - } - - if (input.port->hasData()) - { - if (input.status != InputStatus::NotActive) - { - input.status = InputStatus::NotActive; - inputs_with_data.push(input_number); - } - } - } - - while (!inputs_with_data.empty()) - { - auto input_number = inputs_with_data.front(); - auto & input_with_data = input_ports[input_number]; - inputs_with_data.pop(); - - if (input_with_data.waiting_output == -1) - throw Exception("No associated output for input with data.", ErrorCodes::LOGICAL_ERROR); - - auto & waiting_output = output_ports[input_with_data.waiting_output]; - - if (waiting_output.status == OutputStatus::NotActive) - throw Exception("Invalid status NotActive for associated output.", ErrorCodes::LOGICAL_ERROR); - - if (waiting_output.status != OutputStatus::Finished) - { - waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_needed = */ true)); - waiting_output.status = OutputStatus::NotActive; - } - else - abandoned_chunks.emplace_back(input_with_data.port->pullData(/* set_not_needed = */ true)); - - if (input_with_data.port->isFinished()) - { - input_with_data.status = InputStatus::Finished; - ++num_finished_inputs; - } - else - disabled_input_ports.push(input_number); - } - - if (num_finished_inputs == inputs.size()) - { - for (auto & output : outputs) - output.finish(); - - return Status::Finished; - } - - /// Process abandoned chunks if any. - while (!abandoned_chunks.empty() && !waiting_outputs.empty()) - { - auto & waiting_output = output_ports[waiting_outputs.front()]; - waiting_outputs.pop(); - - waiting_output.port->pushData(std::move(abandoned_chunks.back())); - abandoned_chunks.pop_back(); - - waiting_output.status = OutputStatus::NotActive; - } - - /// Enable more inputs if needed. - while (!disabled_input_ports.empty() && !waiting_outputs.empty()) - { - auto & input = input_ports[disabled_input_ports.front()]; - disabled_input_ports.pop(); - - input.port->setNeeded(); - input.status = InputStatus::NeedData; - input.waiting_output = waiting_outputs.front(); - - waiting_outputs.pop(); - } - - /// Close all other waiting for data outputs (there is no corresponding input for them). - while (!waiting_outputs.empty()) - { - auto & output = output_ports[waiting_outputs.front()]; - waiting_outputs.pop(); - - output.status = OutputStatus::Finished; - output.port->finish(); - ++num_finished_outputs; - } - - if (disabled_input_ports.empty()) - return Status::NeedData; - - return Status::PortFull; -} - -} - +#include <Processors/ResizeProcessor.h> +#include <iostream> + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +ResizeProcessor::Status ResizeProcessor::prepare() +{ + bool is_first_output = true; + auto output_end = current_output; + + bool all_outs_full_or_unneeded = true; + bool all_outs_finished = true; + + bool is_first_input = true; + auto input_end = current_input; + + bool all_inputs_finished = true; + + auto is_end_input = [&]() { return !is_first_input && current_input == input_end; }; + auto is_end_output = [&]() { return !is_first_output && current_output == output_end; }; + + auto inc_current_input = [&]() + { + is_first_input = false; + ++current_input; + + if (current_input == inputs.end()) + current_input = inputs.begin(); + }; + + auto inc_current_output = [&]() + { + is_first_output = false; + ++current_output; + + if (current_output == outputs.end()) + current_output = outputs.begin(); + }; + + /// Find next output where can push. + auto get_next_out = [&, this]() -> OutputPorts::iterator + { + while (!is_end_output()) + { + if (!current_output->isFinished()) + { + all_outs_finished = false; + + if (current_output->canPush()) + { + all_outs_full_or_unneeded = false; + auto res_output = current_output; + inc_current_output(); + return res_output; + } + } + + inc_current_output(); + } + + return outputs.end(); + }; + + /// Find next input from where can pull. + auto get_next_input = [&, this]() -> InputPorts::iterator + { + while (!is_end_input()) + { + if (!current_input->isFinished()) + { + all_inputs_finished = false; + + current_input->setNeeded(); + if (current_input->hasData()) + { + auto res_input = current_input; + inc_current_input(); + return res_input; + } + } + + inc_current_input(); + } + + return inputs.end(); + }; + + auto get_status_if_no_outputs = [&]() -> Status + { + if (all_outs_finished) + { + for (auto & in : inputs) + in.close(); + + return Status::Finished; + } + + if (all_outs_full_or_unneeded) + { + for (auto & in : inputs) + in.setNotNeeded(); + + return Status::PortFull; + } + + /// Now, we pushed to output, and it must be full. + return Status::PortFull; + }; + + auto get_status_if_no_inputs = [&]() -> Status + { + if (all_inputs_finished) + { + for (auto & out : outputs) + out.finish(); + + return Status::Finished; + } + + return Status::NeedData; + }; + + /// Set all inputs needed in order to evenly process them. + /// Otherwise, in case num_outputs < num_inputs and chunks are consumed faster than produced, + /// some inputs can be skipped. +// auto set_all_unprocessed_inputs_needed = [&]() +// { +// for (; cur_input != inputs.end(); ++cur_input) +// if (!cur_input->isFinished()) +// cur_input->setNeeded(); +// }; + + while (!is_end_input() && !is_end_output()) + { + auto output = get_next_out(); + auto input = get_next_input(); + + if (output == outputs.end()) + return get_status_if_no_outputs(); + + + if (input == inputs.end()) + return get_status_if_no_inputs(); + + output->push(input->pull()); + } + + if (is_end_input()) + return get_status_if_no_outputs(); + + /// cur_input == inputs_end() + return get_status_if_no_inputs(); +} + +IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) +{ + if (!initialized) + { + initialized = true; + + for (auto & input : inputs) + { + input.setNeeded(); + input_ports.push_back({.port = &input, .status = InputStatus::NotActive}); + } + + for (auto & output : outputs) + output_ports.push_back({.port = &output, .status = OutputStatus::NotActive}); + } + + for (const auto & output_number : updated_outputs) + { + auto & output = output_ports[output_number]; + if (output.port->isFinished()) + { + if (output.status != OutputStatus::Finished) + { + ++num_finished_outputs; + output.status = OutputStatus::Finished; + } + + continue; + } + + if (output.port->canPush()) + { + if (output.status != OutputStatus::NeedData) + { + output.status = OutputStatus::NeedData; + waiting_outputs.push(output_number); + } + } + } + + if (num_finished_outputs == outputs.size()) + { + for (auto & input : inputs) + input.close(); + + return Status::Finished; + } + + for (const auto & input_number : updated_inputs) + { + auto & input = input_ports[input_number]; + if (input.port->isFinished()) + { + if (input.status != InputStatus::Finished) + { + input.status = InputStatus::Finished; + ++num_finished_inputs; + } + continue; + } + + if (input.port->hasData()) + { + if (input.status != InputStatus::HasData) + { + input.status = InputStatus::HasData; + inputs_with_data.push(input_number); + } + } + } + + while (!waiting_outputs.empty() && !inputs_with_data.empty()) + { + auto & waiting_output = output_ports[waiting_outputs.front()]; + waiting_outputs.pop(); + + auto & input_with_data = input_ports[inputs_with_data.front()]; + inputs_with_data.pop(); + + waiting_output.port->pushData(input_with_data.port->pullData()); + input_with_data.status = InputStatus::NotActive; + waiting_output.status = OutputStatus::NotActive; + + if (input_with_data.port->isFinished()) + { + input_with_data.status = InputStatus::Finished; + ++num_finished_inputs; + } + } + + if (num_finished_inputs == inputs.size()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + if (!waiting_outputs.empty()) + return Status::NeedData; + + return Status::PortFull; +} + +IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) +{ + if (!initialized) + { + initialized = true; + + for (auto & input : inputs) + input_ports.push_back({.port = &input, .status = InputStatus::NotActive, .waiting_output = -1}); + + for (UInt64 i = 0; i < input_ports.size(); ++i) + disabled_input_ports.push(i); + + for (auto & output : outputs) + output_ports.push_back({.port = &output, .status = OutputStatus::NotActive}); + } + + for (const auto & output_number : updated_outputs) + { + auto & output = output_ports[output_number]; + if (output.port->isFinished()) + { + if (output.status != OutputStatus::Finished) + { + ++num_finished_outputs; + output.status = OutputStatus::Finished; + } + + continue; + } + + if (output.port->canPush()) + { + if (output.status != OutputStatus::NeedData) + { + output.status = OutputStatus::NeedData; + waiting_outputs.push(output_number); + } + } + } + + if (num_finished_outputs == outputs.size()) + { + for (auto & input : inputs) + input.close(); + + return Status::Finished; + } + + std::queue<UInt64> inputs_with_data; + + for (const auto & input_number : updated_inputs) + { + auto & input = input_ports[input_number]; + if (input.port->isFinished()) + { + if (input.status != InputStatus::Finished) + { + input.status = InputStatus::Finished; + ++num_finished_inputs; + + waiting_outputs.push(input.waiting_output); + } + continue; + } + + if (input.port->hasData()) + { + if (input.status != InputStatus::NotActive) + { + input.status = InputStatus::NotActive; + inputs_with_data.push(input_number); + } + } + } + + while (!inputs_with_data.empty()) + { + auto input_number = inputs_with_data.front(); + auto & input_with_data = input_ports[input_number]; + inputs_with_data.pop(); + + if (input_with_data.waiting_output == -1) + throw Exception("No associated output for input with data.", ErrorCodes::LOGICAL_ERROR); + + auto & waiting_output = output_ports[input_with_data.waiting_output]; + + if (waiting_output.status == OutputStatus::NotActive) + throw Exception("Invalid status NotActive for associated output.", ErrorCodes::LOGICAL_ERROR); + + if (waiting_output.status != OutputStatus::Finished) + { + waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_needed = */ true)); + waiting_output.status = OutputStatus::NotActive; + } + else + abandoned_chunks.emplace_back(input_with_data.port->pullData(/* set_not_needed = */ true)); + + if (input_with_data.port->isFinished()) + { + input_with_data.status = InputStatus::Finished; + ++num_finished_inputs; + } + else + disabled_input_ports.push(input_number); + } + + if (num_finished_inputs == inputs.size()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + /// Process abandoned chunks if any. + while (!abandoned_chunks.empty() && !waiting_outputs.empty()) + { + auto & waiting_output = output_ports[waiting_outputs.front()]; + waiting_outputs.pop(); + + waiting_output.port->pushData(std::move(abandoned_chunks.back())); + abandoned_chunks.pop_back(); + + waiting_output.status = OutputStatus::NotActive; + } + + /// Enable more inputs if needed. + while (!disabled_input_ports.empty() && !waiting_outputs.empty()) + { + auto & input = input_ports[disabled_input_ports.front()]; + disabled_input_ports.pop(); + + input.port->setNeeded(); + input.status = InputStatus::NeedData; + input.waiting_output = waiting_outputs.front(); + + waiting_outputs.pop(); + } + + /// Close all other waiting for data outputs (there is no corresponding input for them). + while (!waiting_outputs.empty()) + { + auto & output = output_ports[waiting_outputs.front()]; + waiting_outputs.pop(); + + output.status = OutputStatus::Finished; + output.port->finish(); + ++num_finished_outputs; + } + + if (disabled_input_ports.empty()) + return Status::NeedData; + + return Status::PortFull; +} + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h index ba1caa6605..f9c188e041 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h @@ -1,136 +1,136 @@ -#pragma once - -#include <Processors/IProcessor.h> -#include <queue> - - -namespace DB -{ - +#pragma once + +#include <Processors/IProcessor.h> +#include <queue> + + +namespace DB +{ + /** Has arbitrary non zero number of inputs and arbitrary non zero number of outputs. - * All of them have the same structure. - * + * All of them have the same structure. + * * Pulls data from arbitrary input (whenever it is ready) and pushes it to arbitrary output (whenever is is not full). - * Doesn't do any heavy calculations. - * Doesn't preserve an order of data. - * - * Examples: - * - union data from multiple inputs to single output - to serialize data that was processed in parallel. - * - split data from single input to multiple outputs - to allow further parallel processing. - */ -class ResizeProcessor : public IProcessor -{ -public: - /// TODO Check that there is non zero number of inputs and outputs. - ResizeProcessor(const Block & header, size_t num_inputs, size_t num_outputs) - : IProcessor(InputPorts(num_inputs, header), OutputPorts(num_outputs, header)) - , current_input(inputs.begin()) - , current_output(outputs.begin()) - { - } - - String getName() const override { return "Resize"; } - - Status prepare() override; - Status prepare(const PortNumbers &, const PortNumbers &) override; - -private: - InputPorts::iterator current_input; - OutputPorts::iterator current_output; - - size_t num_finished_inputs = 0; - size_t num_finished_outputs = 0; - std::queue<UInt64> waiting_outputs; - std::queue<UInt64> inputs_with_data; - bool initialized = false; - - enum class OutputStatus - { - NotActive, - NeedData, - Finished, - }; - - enum class InputStatus - { - NotActive, - HasData, - Finished, - }; - - struct InputPortWithStatus - { - InputPort * port; - InputStatus status; - }; - - struct OutputPortWithStatus - { - OutputPort * port; - OutputStatus status; - }; - - std::vector<InputPortWithStatus> input_ports; - std::vector<OutputPortWithStatus> output_ports; -}; - -class StrictResizeProcessor : public IProcessor -{ -public: - /// TODO Check that there is non zero number of inputs and outputs. - StrictResizeProcessor(const Block & header, size_t num_inputs, size_t num_outputs) - : IProcessor(InputPorts(num_inputs, header), OutputPorts(num_outputs, header)) - , current_input(inputs.begin()) - , current_output(outputs.begin()) - { - } - - String getName() const override { return "StrictResize"; } - - Status prepare(const PortNumbers &, const PortNumbers &) override; - -private: - InputPorts::iterator current_input; - OutputPorts::iterator current_output; - - size_t num_finished_inputs = 0; - size_t num_finished_outputs = 0; - std::queue<UInt64> disabled_input_ports; - std::queue<UInt64> waiting_outputs; - bool initialized = false; - - enum class OutputStatus - { - NotActive, - NeedData, - Finished, - }; - - enum class InputStatus - { - NotActive, - NeedData, - Finished, - }; - - struct InputPortWithStatus - { - InputPort * port; - InputStatus status; - ssize_t waiting_output; - }; - - struct OutputPortWithStatus - { - OutputPort * port; - OutputStatus status; - }; - - std::vector<InputPortWithStatus> input_ports; - std::vector<OutputPortWithStatus> output_ports; - /// This field contained chunks which were read for output which had became finished while reading was happening. - /// They will be pushed to any next waiting output. - std::vector<Port::Data> abandoned_chunks; -}; - -} + * Doesn't do any heavy calculations. + * Doesn't preserve an order of data. + * + * Examples: + * - union data from multiple inputs to single output - to serialize data that was processed in parallel. + * - split data from single input to multiple outputs - to allow further parallel processing. + */ +class ResizeProcessor : public IProcessor +{ +public: + /// TODO Check that there is non zero number of inputs and outputs. + ResizeProcessor(const Block & header, size_t num_inputs, size_t num_outputs) + : IProcessor(InputPorts(num_inputs, header), OutputPorts(num_outputs, header)) + , current_input(inputs.begin()) + , current_output(outputs.begin()) + { + } + + String getName() const override { return "Resize"; } + + Status prepare() override; + Status prepare(const PortNumbers &, const PortNumbers &) override; + +private: + InputPorts::iterator current_input; + OutputPorts::iterator current_output; + + size_t num_finished_inputs = 0; + size_t num_finished_outputs = 0; + std::queue<UInt64> waiting_outputs; + std::queue<UInt64> inputs_with_data; + bool initialized = false; + + enum class OutputStatus + { + NotActive, + NeedData, + Finished, + }; + + enum class InputStatus + { + NotActive, + HasData, + Finished, + }; + + struct InputPortWithStatus + { + InputPort * port; + InputStatus status; + }; + + struct OutputPortWithStatus + { + OutputPort * port; + OutputStatus status; + }; + + std::vector<InputPortWithStatus> input_ports; + std::vector<OutputPortWithStatus> output_ports; +}; + +class StrictResizeProcessor : public IProcessor +{ +public: + /// TODO Check that there is non zero number of inputs and outputs. + StrictResizeProcessor(const Block & header, size_t num_inputs, size_t num_outputs) + : IProcessor(InputPorts(num_inputs, header), OutputPorts(num_outputs, header)) + , current_input(inputs.begin()) + , current_output(outputs.begin()) + { + } + + String getName() const override { return "StrictResize"; } + + Status prepare(const PortNumbers &, const PortNumbers &) override; + +private: + InputPorts::iterator current_input; + OutputPorts::iterator current_output; + + size_t num_finished_inputs = 0; + size_t num_finished_outputs = 0; + std::queue<UInt64> disabled_input_ports; + std::queue<UInt64> waiting_outputs; + bool initialized = false; + + enum class OutputStatus + { + NotActive, + NeedData, + Finished, + }; + + enum class InputStatus + { + NotActive, + NeedData, + Finished, + }; + + struct InputPortWithStatus + { + InputPort * port; + InputStatus status; + ssize_t waiting_output; + }; + + struct OutputPortWithStatus + { + OutputPort * port; + OutputStatus status; + }; + + std::vector<InputPortWithStatus> input_ports; + std::vector<OutputPortWithStatus> output_ports; + /// This field contained chunks which were read for output which had became finished while reading was happening. + /// They will be pushed to any next waiting output. + std::vector<Port::Data> abandoned_chunks; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h index 5676051537..d1f0ec5e6c 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h @@ -1,18 +1,18 @@ -#pragma once -#include <Processors/ISource.h> - - -namespace DB -{ - -class NullSource : public ISource -{ -public: - explicit NullSource(Block header) : ISource(std::move(header)) {} - String getName() const override { return "NullSource"; } - -protected: - Chunk generate() override { return Chunk(); } -}; - -} +#pragma once +#include <Processors/ISource.h> + + +namespace DB +{ + +class NullSource : public ISource +{ +public: + explicit NullSource(Block header) : ISource(std::move(header)) {} + String getName() const override { return "NullSource"; } + +protected: + Chunk generate() override { return Chunk(); } +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp index bdcb9d461a..7c88c6dfbe 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp @@ -1,200 +1,200 @@ -#include <Processors/Sources/SourceFromInputStream.h> -#include <Processors/Transforms/AggregatingTransform.h> -#include <DataTypes/DataTypeAggregateFunction.h> -#include <DataStreams/RemoteBlockInputStream.h> - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_) - : ISourceWithProgress(stream_->getHeader()) - , force_add_aggregating_info(force_add_aggregating_info_) - , stream(std::move(stream_)) -{ - init(); -} - -void SourceFromInputStream::init() -{ - const auto & sample = getPort().getHeader(); - for (auto & type : sample.getDataTypes()) - if (typeid_cast<const DataTypeAggregateFunction *>(type.get())) - has_aggregate_functions = true; -} - -void SourceFromInputStream::addTotalsPort() -{ - if (totals_port) - throw Exception("Totals port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); - - outputs.emplace_back(outputs.front().getHeader(), this); - totals_port = &outputs.back(); -} - -void SourceFromInputStream::addExtremesPort() -{ - if (extremes_port) - throw Exception("Extremes port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); - - outputs.emplace_back(outputs.front().getHeader(), this); - extremes_port = &outputs.back(); -} - -IProcessor::Status SourceFromInputStream::prepare() -{ - auto status = ISource::prepare(); - - if (status == Status::Finished) - { - is_generating_finished = true; - - /// Read postfix and get totals if needed. - if (!is_stream_finished && !isCancelled()) - return Status::Ready; - - if (totals_port && !totals_port->isFinished()) - { - if (has_totals) - { - if (!totals_port->canPush()) - return Status::PortFull; - - totals_port->push(std::move(totals)); - has_totals = false; - } - - totals_port->finish(); - } - - if (extremes_port && !extremes_port->isFinished()) - { - if (has_extremes) - { - if (!extremes_port->canPush()) - return Status::PortFull; - - extremes_port->push(std::move(extremes)); - has_extremes = false; - } - - extremes_port->finish(); - } - } - - return status; -} - -void SourceFromInputStream::work() -{ - if (!is_generating_finished) - { - try - { - ISource::work(); - } - catch (...) - { - /// Won't read suffix in case of exception. - is_stream_finished = true; - throw; - } - - return; - } - - if (is_stream_finished) - return; - - /// Don't cancel for RemoteBlockInputStream (otherwise readSuffix can stack) - if (!typeid_cast<const RemoteBlockInputStream *>(stream.get())) - stream->cancel(false); - - if (rows_before_limit) - { - const auto & info = stream->getProfileInfo(); - if (info.hasAppliedLimit()) - rows_before_limit->add(info.getRowsBeforeLimit()); - } - - stream->readSuffix(); - - if (auto totals_block = stream->getTotals()) - { - totals.setColumns(totals_block.getColumns(), 1); - has_totals = true; - } - - is_stream_finished = true; -} - -Chunk SourceFromInputStream::generate() -{ - if (is_stream_finished) - return {}; - - if (!is_stream_started) - { - stream->readPrefix(); - is_stream_started = true; - } - - auto block = stream->read(); - if (!block && !isCancelled()) - { - if (rows_before_limit) - { - const auto & info = stream->getProfileInfo(); - if (info.hasAppliedLimit()) - rows_before_limit->add(info.getRowsBeforeLimit()); - } - - stream->readSuffix(); - - if (auto totals_block = stream->getTotals()) - { - if (totals_block.rows() > 0) /// Sometimes we can get empty totals. Skip it. - { - totals.setColumns(totals_block.getColumns(), totals_block.rows()); - has_totals = true; - } - } - - if (auto extremes_block = stream->getExtremes()) - { - if (extremes_block.rows() > 0) /// Sometimes we can get empty extremes. Skip it. - { - extremes.setColumns(extremes_block.getColumns(), extremes_block.rows()); - has_extremes = true; - } - } - - is_stream_finished = true; - return {}; - } - - if (isCancelled()) - return {}; - -#ifndef NDEBUG - assertBlocksHaveEqualStructure(getPort().getHeader(), block, "SourceFromInputStream"); -#endif - - UInt64 num_rows = block.rows(); - Chunk chunk(block.getColumns(), num_rows); - - if (force_add_aggregating_info || has_aggregate_functions) - { - auto info = std::make_shared<AggregatedChunkInfo>(); - info->bucket_num = block.info.bucket_num; - info->is_overflows = block.info.is_overflows; - chunk.setChunkInfo(std::move(info)); - } - - return chunk; -} - -} +#include <Processors/Sources/SourceFromInputStream.h> +#include <Processors/Transforms/AggregatingTransform.h> +#include <DataTypes/DataTypeAggregateFunction.h> +#include <DataStreams/RemoteBlockInputStream.h> + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_) + : ISourceWithProgress(stream_->getHeader()) + , force_add_aggregating_info(force_add_aggregating_info_) + , stream(std::move(stream_)) +{ + init(); +} + +void SourceFromInputStream::init() +{ + const auto & sample = getPort().getHeader(); + for (auto & type : sample.getDataTypes()) + if (typeid_cast<const DataTypeAggregateFunction *>(type.get())) + has_aggregate_functions = true; +} + +void SourceFromInputStream::addTotalsPort() +{ + if (totals_port) + throw Exception("Totals port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); + + outputs.emplace_back(outputs.front().getHeader(), this); + totals_port = &outputs.back(); +} + +void SourceFromInputStream::addExtremesPort() +{ + if (extremes_port) + throw Exception("Extremes port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); + + outputs.emplace_back(outputs.front().getHeader(), this); + extremes_port = &outputs.back(); +} + +IProcessor::Status SourceFromInputStream::prepare() +{ + auto status = ISource::prepare(); + + if (status == Status::Finished) + { + is_generating_finished = true; + + /// Read postfix and get totals if needed. + if (!is_stream_finished && !isCancelled()) + return Status::Ready; + + if (totals_port && !totals_port->isFinished()) + { + if (has_totals) + { + if (!totals_port->canPush()) + return Status::PortFull; + + totals_port->push(std::move(totals)); + has_totals = false; + } + + totals_port->finish(); + } + + if (extremes_port && !extremes_port->isFinished()) + { + if (has_extremes) + { + if (!extremes_port->canPush()) + return Status::PortFull; + + extremes_port->push(std::move(extremes)); + has_extremes = false; + } + + extremes_port->finish(); + } + } + + return status; +} + +void SourceFromInputStream::work() +{ + if (!is_generating_finished) + { + try + { + ISource::work(); + } + catch (...) + { + /// Won't read suffix in case of exception. + is_stream_finished = true; + throw; + } + + return; + } + + if (is_stream_finished) + return; + + /// Don't cancel for RemoteBlockInputStream (otherwise readSuffix can stack) + if (!typeid_cast<const RemoteBlockInputStream *>(stream.get())) + stream->cancel(false); + + if (rows_before_limit) + { + const auto & info = stream->getProfileInfo(); + if (info.hasAppliedLimit()) + rows_before_limit->add(info.getRowsBeforeLimit()); + } + + stream->readSuffix(); + + if (auto totals_block = stream->getTotals()) + { + totals.setColumns(totals_block.getColumns(), 1); + has_totals = true; + } + + is_stream_finished = true; +} + +Chunk SourceFromInputStream::generate() +{ + if (is_stream_finished) + return {}; + + if (!is_stream_started) + { + stream->readPrefix(); + is_stream_started = true; + } + + auto block = stream->read(); + if (!block && !isCancelled()) + { + if (rows_before_limit) + { + const auto & info = stream->getProfileInfo(); + if (info.hasAppliedLimit()) + rows_before_limit->add(info.getRowsBeforeLimit()); + } + + stream->readSuffix(); + + if (auto totals_block = stream->getTotals()) + { + if (totals_block.rows() > 0) /// Sometimes we can get empty totals. Skip it. + { + totals.setColumns(totals_block.getColumns(), totals_block.rows()); + has_totals = true; + } + } + + if (auto extremes_block = stream->getExtremes()) + { + if (extremes_block.rows() > 0) /// Sometimes we can get empty extremes. Skip it. + { + extremes.setColumns(extremes_block.getColumns(), extremes_block.rows()); + has_extremes = true; + } + } + + is_stream_finished = true; + return {}; + } + + if (isCancelled()) + return {}; + +#ifndef NDEBUG + assertBlocksHaveEqualStructure(getPort().getHeader(), block, "SourceFromInputStream"); +#endif + + UInt64 num_rows = block.rows(); + Chunk chunk(block.getColumns(), num_rows); + + if (force_add_aggregating_info || has_aggregate_functions) + { + auto info = std::make_shared<AggregatedChunkInfo>(); + info->bucket_num = block.info.bucket_num; + info->is_overflows = block.info.is_overflows; + chunk.setChunkInfo(std::move(info)); + } + + return chunk; +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h index c30fd8dcb4..9649385909 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h @@ -1,77 +1,77 @@ -#pragma once +#pragma once -#include <Processors/Sources/SourceWithProgress.h> -#include <Processors/RowsBeforeLimitCounter.h> +#include <Processors/Sources/SourceWithProgress.h> +#include <Processors/RowsBeforeLimitCounter.h> #include <DataStreams/IBlockInputStream.h> - - -namespace DB -{ - -class IBlockInputStream; -using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>; - -/// Wrapper for IBlockInputStream which implements ISourceWithProgress. -class SourceFromInputStream : public ISourceWithProgress -{ -public: + + +namespace DB +{ + +class IBlockInputStream; +using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>; + +/// Wrapper for IBlockInputStream which implements ISourceWithProgress. +class SourceFromInputStream : public ISourceWithProgress +{ +public: /// If force_add_aggregating_info is enabled, AggregatedChunkInfo (with bucket number and is_overflows flag) will be added to result chunk. - explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false); - String getName() const override { return "SourceFromInputStream"; } - - Status prepare() override; - void work() override; - - Chunk generate() override; - - BlockInputStreamPtr & getStream() { return stream; } - - void addTotalsPort(); - void addExtremesPort(); - - OutputPort * getTotalsPort() const { return totals_port; } - OutputPort * getExtremesPort() const { return extremes_port; } - - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } - - /// Implementation for methods from ISourceWithProgress. + explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false); + String getName() const override { return "SourceFromInputStream"; } + + Status prepare() override; + void work() override; + + Chunk generate() override; + + BlockInputStreamPtr & getStream() { return stream; } + + void addTotalsPort(); + void addExtremesPort(); + + OutputPort * getTotalsPort() const { return totals_port; } + OutputPort * getExtremesPort() const { return extremes_port; } + + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } + + /// Implementation for methods from ISourceWithProgress. void setLimits(const StreamLocalLimits & limits_) final { stream->setLimits(limits_); } void setLeafLimits(const SizeLimits &) final { } - void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { stream->setQuota(quota_); } - void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); } - void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); } - void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); } - - /// Stop reading from stream if output port is finished. - void onUpdatePorts() override - { - if (getPort().isFinished()) - cancel(); - } - -protected: - void onCancel() override { stream->cancel(false); } - -private: - bool has_aggregate_functions = false; - bool force_add_aggregating_info = false; - BlockInputStreamPtr stream; - - RowsBeforeLimitCounterPtr rows_before_limit; - - Chunk totals; - OutputPort * totals_port = nullptr; - bool has_totals = false; - - Chunk extremes; - OutputPort * extremes_port = nullptr; - bool has_extremes = false; - - bool is_generating_finished = false; - bool is_stream_finished = false; - bool is_stream_started = false; - - void init(); -}; - -} + void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { stream->setQuota(quota_); } + void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); } + void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); } + void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); } + + /// Stop reading from stream if output port is finished. + void onUpdatePorts() override + { + if (getPort().isFinished()) + cancel(); + } + +protected: + void onCancel() override { stream->cancel(false); } + +private: + bool has_aggregate_functions = false; + bool force_add_aggregating_info = false; + BlockInputStreamPtr stream; + + RowsBeforeLimitCounterPtr rows_before_limit; + + Chunk totals; + OutputPort * totals_port = nullptr; + bool has_totals = false; + + Chunk extremes; + OutputPort * extremes_port = nullptr; + bool has_extremes = false; + + bool is_generating_finished = false; + bool is_stream_finished = false; + bool is_stream_started = false; + + void init(); +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h index f6e8c3b22e..d304bdbab9 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h @@ -1,21 +1,21 @@ -#pragma once -#include <Processors/Sources/SourceWithProgress.h> - - -namespace DB -{ - -class SourceFromSingleChunk : public SourceWithProgress -{ -public: - explicit SourceFromSingleChunk(Block header, Chunk chunk_) : SourceWithProgress(std::move(header)), chunk(std::move(chunk_)) {} - String getName() const override { return "SourceFromSingleChunk"; } - -protected: - Chunk generate() override { return std::move(chunk); } - -private: - Chunk chunk; -}; - -} +#pragma once +#include <Processors/Sources/SourceWithProgress.h> + + +namespace DB +{ + +class SourceFromSingleChunk : public SourceWithProgress +{ +public: + explicit SourceFromSingleChunk(Block header, Chunk chunk_) : SourceWithProgress(std::move(header)), chunk(std::move(chunk_)) {} + String getName() const override { return "SourceFromSingleChunk"; } + +protected: + Chunk generate() override { return std::move(chunk); } + +private: + Chunk chunk; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp index 6eca81ce07..647ad0f205 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp @@ -1,28 +1,28 @@ -#include <Processors/Sources/SourceWithProgress.h> - -#include <Interpreters/ProcessList.h> -#include <Access/EnabledQuota.h> - +#include <Processors/Sources/SourceWithProgress.h> + +#include <Interpreters/ProcessList.h> +#include <Access/EnabledQuota.h> + namespace ProfileEvents { extern const Event SelectedRows; extern const Event SelectedBytes; } -namespace DB -{ - -namespace ErrorCodes -{ - extern const int TOO_MANY_ROWS; - extern const int TOO_MANY_BYTES; -} - -SourceWithProgress::SourceWithProgress(Block header, bool enable_auto_progress) - : ISourceWithProgress(header), auto_progress(enable_auto_progress) -{ -} - +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TOO_MANY_ROWS; + extern const int TOO_MANY_BYTES; +} + +SourceWithProgress::SourceWithProgress(Block header, bool enable_auto_progress) + : ISourceWithProgress(header), auto_progress(enable_auto_progress) +{ +} + void SourceWithProgress::setProcessListElement(QueryStatus * elem) { process_list_elem = elem; @@ -47,105 +47,105 @@ void SourceWithProgress::setProcessListElement(QueryStatus * elem) } } -void SourceWithProgress::work() -{ +void SourceWithProgress::work() +{ if (!limits.speed_limits.checkTimeLimit(total_stopwatch, limits.timeout_overflow_mode)) - { - cancel(); - } - else - { - was_progress_called = false; - - ISourceWithProgress::work(); - - if (auto_progress && !was_progress_called && has_input) - progress({ current_chunk.chunk.getNumRows(), current_chunk.chunk.bytes() }); - } -} - -/// Aggregated copy-paste from IBlockInputStream::progressImpl. -/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream. -void SourceWithProgress::progress(const Progress & value) -{ - was_progress_called = true; - - if (total_rows_approx != 0) - { - Progress total_rows_progress = {0, 0, total_rows_approx}; - - if (progress_callback) - progress_callback(total_rows_progress); - - if (process_list_elem) - process_list_elem->updateProgressIn(total_rows_progress); - - total_rows_approx = 0; - } - - if (progress_callback) - progress_callback(value); - - if (process_list_elem) - { - if (!process_list_elem->updateProgressIn(value)) - cancel(); - - /// The total amount of data processed or intended for processing in all sources, possibly on remote servers. - - ProgressValues progress = process_list_elem->getProgressIn(); - - /// If the mode is "throw" and estimate of total rows is known, then throw early if an estimate is too high. - /// If the mode is "break", then allow to read before limit even if estimate is very high. - - size_t rows_to_check_limit = progress.read_rows; - if (limits.size_limits.overflow_mode == OverflowMode::THROW && progress.total_rows_to_read > progress.read_rows) - rows_to_check_limit = progress.total_rows_to_read; - - /// Check the restrictions on the - /// * amount of data to read - /// * speed of the query - /// * quota on the amount of data to read - /// NOTE: Maybe it makes sense to have them checked directly in ProcessList? - - if (limits.mode == LimitsMode::LIMITS_TOTAL) - { - if (!limits.size_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read", - ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES)) - { - cancel(); - } - } - + { + cancel(); + } + else + { + was_progress_called = false; + + ISourceWithProgress::work(); + + if (auto_progress && !was_progress_called && has_input) + progress({ current_chunk.chunk.getNumRows(), current_chunk.chunk.bytes() }); + } +} + +/// Aggregated copy-paste from IBlockInputStream::progressImpl. +/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream. +void SourceWithProgress::progress(const Progress & value) +{ + was_progress_called = true; + + if (total_rows_approx != 0) + { + Progress total_rows_progress = {0, 0, total_rows_approx}; + + if (progress_callback) + progress_callback(total_rows_progress); + + if (process_list_elem) + process_list_elem->updateProgressIn(total_rows_progress); + + total_rows_approx = 0; + } + + if (progress_callback) + progress_callback(value); + + if (process_list_elem) + { + if (!process_list_elem->updateProgressIn(value)) + cancel(); + + /// The total amount of data processed or intended for processing in all sources, possibly on remote servers. + + ProgressValues progress = process_list_elem->getProgressIn(); + + /// If the mode is "throw" and estimate of total rows is known, then throw early if an estimate is too high. + /// If the mode is "break", then allow to read before limit even if estimate is very high. + + size_t rows_to_check_limit = progress.read_rows; + if (limits.size_limits.overflow_mode == OverflowMode::THROW && progress.total_rows_to_read > progress.read_rows) + rows_to_check_limit = progress.total_rows_to_read; + + /// Check the restrictions on the + /// * amount of data to read + /// * speed of the query + /// * quota on the amount of data to read + /// NOTE: Maybe it makes sense to have them checked directly in ProcessList? + + if (limits.mode == LimitsMode::LIMITS_TOTAL) + { + if (!limits.size_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read", + ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES)) + { + cancel(); + } + } + if (!leaf_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read on leaf node", ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES)) { cancel(); } - size_t total_rows = progress.total_rows_to_read; - - constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds - UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds(); - - if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) - { - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. - CurrentThread::updatePerformanceCounters(); - last_profile_events_update_time = total_elapsed_microseconds; - } - - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. - limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); - - if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) - quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes}); - } + size_t total_rows = progress.total_rows_to_read; + + constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds + UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds(); + + if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) + { + /// Should be done in PipelineExecutor. + /// It is here for compatibility with IBlockInputsStream. + CurrentThread::updatePerformanceCounters(); + last_profile_events_update_time = total_elapsed_microseconds; + } + + /// Should be done in PipelineExecutor. + /// It is here for compatibility with IBlockInputsStream. + limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); + + if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) + quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes}); + } ProfileEvents::increment(ProfileEvents::SelectedRows, value.read_rows); ProfileEvents::increment(ProfileEvents::SelectedBytes, value.read_bytes); -} - -} +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h index 256930b6d1..49728be01e 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h @@ -1,90 +1,90 @@ -#pragma once -#include <Processors/ISource.h> -#include <Common/Stopwatch.h> +#pragma once +#include <Processors/ISource.h> +#include <Common/Stopwatch.h> #include <DataStreams/StreamLocalLimits.h> #include <IO/Progress.h> - -namespace DB -{ - + +namespace DB +{ + class QueryStatus; class EnabledQuota; -/// Adds progress to ISource. -/// This class takes care of limits, quotas, callback on progress and updating performance counters for current thread. -class ISourceWithProgress : public ISource -{ -public: - using ISource::ISource; - - /// Set limitations that checked on each chunk. +/// Adds progress to ISource. +/// This class takes care of limits, quotas, callback on progress and updating performance counters for current thread. +class ISourceWithProgress : public ISource +{ +public: + using ISource::ISource; + + /// Set limitations that checked on each chunk. virtual void setLimits(const StreamLocalLimits & limits_) = 0; - + /// Set limitations that checked on each chunk for distributed queries on leaf nodes. virtual void setLeafLimits(const SizeLimits & leaf_limits_) = 0; - /// Set the quota. If you set a quota on the amount of raw data, - /// then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits. - virtual void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) = 0; - - /// Set the pointer to the process list item. - /// General information about the resources spent on the request will be written into it. - /// Based on this information, the quota and some restrictions will be checked. - /// This information will also be available in the SHOW PROCESSLIST request. - virtual void setProcessListElement(QueryStatus * elem) = 0; - - /// Set the execution progress bar callback. - /// It is called after each chunk. - /// The function takes the number of rows in the last chunk, the number of bytes in the last chunk. - /// Note that the callback can be called from different threads. - virtual void setProgressCallback(const ProgressCallback & callback) = 0; - - /// Set the approximate total number of rows to read. - virtual void addTotalRowsApprox(size_t value) = 0; -}; - -/// Implementation for ISourceWithProgress -class SourceWithProgress : public ISourceWithProgress -{ -public: - using ISourceWithProgress::ISourceWithProgress; - /// If enable_auto_progress flag is set, progress() will be automatically called on each generated chunk. - SourceWithProgress(Block header, bool enable_auto_progress); - + /// Set the quota. If you set a quota on the amount of raw data, + /// then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits. + virtual void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) = 0; + + /// Set the pointer to the process list item. + /// General information about the resources spent on the request will be written into it. + /// Based on this information, the quota and some restrictions will be checked. + /// This information will also be available in the SHOW PROCESSLIST request. + virtual void setProcessListElement(QueryStatus * elem) = 0; + + /// Set the execution progress bar callback. + /// It is called after each chunk. + /// The function takes the number of rows in the last chunk, the number of bytes in the last chunk. + /// Note that the callback can be called from different threads. + virtual void setProgressCallback(const ProgressCallback & callback) = 0; + + /// Set the approximate total number of rows to read. + virtual void addTotalRowsApprox(size_t value) = 0; +}; + +/// Implementation for ISourceWithProgress +class SourceWithProgress : public ISourceWithProgress +{ +public: + using ISourceWithProgress::ISourceWithProgress; + /// If enable_auto_progress flag is set, progress() will be automatically called on each generated chunk. + SourceWithProgress(Block header, bool enable_auto_progress); + void setLimits(const StreamLocalLimits & limits_) final { limits = limits_; } void setLeafLimits(const SizeLimits & leaf_limits_) final {leaf_limits = leaf_limits_; } - void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { quota = quota_; } + void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { quota = quota_; } void setProcessListElement(QueryStatus * elem) final; - void setProgressCallback(const ProgressCallback & callback) final { progress_callback = callback; } - void addTotalRowsApprox(size_t value) final { total_rows_approx += value; } - -protected: - /// Call this method to provide information about progress. - void progress(const Progress & value); - - void work() override; - -private: + void setProgressCallback(const ProgressCallback & callback) final { progress_callback = callback; } + void addTotalRowsApprox(size_t value) final { total_rows_approx += value; } + +protected: + /// Call this method to provide information about progress. + void progress(const Progress & value); + + void work() override; + +private: StreamLocalLimits limits; SizeLimits leaf_limits; - std::shared_ptr<const EnabledQuota> quota; - ProgressCallback progress_callback; - QueryStatus * process_list_elem = nullptr; - - /// The approximate total number of rows to read. For progress bar. - size_t total_rows_approx = 0; - - Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time. - /// According to total_stopwatch in microseconds. - UInt64 last_profile_events_update_time = 0; - - /// This flag checks if progress() was manually called at generate() call. - /// If not, it will be called for chunk after generate() was finished. - bool was_progress_called = false; - - /// If enabled, progress() will be automatically called on each generated chunk. - bool auto_progress = true; -}; - -} + std::shared_ptr<const EnabledQuota> quota; + ProgressCallback progress_callback; + QueryStatus * process_list_elem = nullptr; + + /// The approximate total number of rows to read. For progress bar. + size_t total_rows_approx = 0; + + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time. + /// According to total_stopwatch in microseconds. + UInt64 last_profile_events_update_time = 0; + + /// This flag checks if progress() was manually called at generate() call. + /// If not, it will be called for chunk after generate() was finished. + bool was_progress_called = false; + + /// If enabled, progress() will be automatically called on each generated chunk. + bool auto_progress = true; +}; + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp index 24712cec1d..a8a93e5366 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp @@ -1,400 +1,400 @@ -#include <Processors/Transforms/AggregatingTransform.h> - -#include <DataStreams/NativeBlockInputStream.h> -#include <Processors/ISource.h> +#include <Processors/Transforms/AggregatingTransform.h> + +#include <DataStreams/NativeBlockInputStream.h> +#include <Processors/ISource.h> #include <Processors/Pipe.h> -#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h> +#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h> #include <DataStreams/materializeBlock.h> - -namespace ProfileEvents -{ - extern const Event ExternalAggregationMerge; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; - extern const int LOGICAL_ERROR; -} - -/// Convert block to chunk. -/// Adds additional info about aggregation. -Chunk convertToChunk(const Block & block) -{ - auto info = std::make_shared<AggregatedChunkInfo>(); - info->bucket_num = block.info.bucket_num; - info->is_overflows = block.info.is_overflows; - - UInt64 num_rows = block.rows(); - Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); - - return chunk; -} - -namespace -{ - const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) - { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception("Chunk info was not set for chunk.", ErrorCodes::LOGICAL_ERROR); - - const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()); - if (!agg_info) - throw Exception("Chunk should have AggregatedChunkInfo.", ErrorCodes::LOGICAL_ERROR); - - return agg_info; - } - - /// Reads chunks from file in native format. Provide chunks with aggregation info. - class SourceFromNativeStream : public ISource - { - public: - SourceFromNativeStream(const Block & header, const std::string & path) - : ISource(header), file_in(path), compressed_in(file_in), + +namespace ProfileEvents +{ + extern const Event ExternalAggregationMerge; +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; + extern const int LOGICAL_ERROR; +} + +/// Convert block to chunk. +/// Adds additional info about aggregation. +Chunk convertToChunk(const Block & block) +{ + auto info = std::make_shared<AggregatedChunkInfo>(); + info->bucket_num = block.info.bucket_num; + info->is_overflows = block.info.is_overflows; + + UInt64 num_rows = block.rows(); + Chunk chunk(block.getColumns(), num_rows); + chunk.setChunkInfo(std::move(info)); + + return chunk; +} + +namespace +{ + const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) + { + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception("Chunk info was not set for chunk.", ErrorCodes::LOGICAL_ERROR); + + const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()); + if (!agg_info) + throw Exception("Chunk should have AggregatedChunkInfo.", ErrorCodes::LOGICAL_ERROR); + + return agg_info; + } + + /// Reads chunks from file in native format. Provide chunks with aggregation info. + class SourceFromNativeStream : public ISource + { + public: + SourceFromNativeStream(const Block & header, const std::string & path) + : ISource(header), file_in(path), compressed_in(file_in), block_in(std::make_shared<NativeBlockInputStream>(compressed_in, DBMS_TCP_PROTOCOL_VERSION)) - { - block_in->readPrefix(); - } - - String getName() const override { return "SourceFromNativeStream"; } - - Chunk generate() override - { - if (!block_in) - return {}; - - auto block = block_in->read(); - if (!block) - { - block_in->readSuffix(); - block_in.reset(); - return {}; - } - - return convertToChunk(block); - } - - private: - ReadBufferFromFile file_in; - CompressedReadBuffer compressed_in; - BlockInputStreamPtr block_in; - }; -} - -/// Worker which merges buckets for two-level aggregation. -/// Atomically increments bucket counter and returns merged result. -class ConvertingAggregatedToChunksSource : public ISource -{ -public: - static constexpr UInt32 NUM_BUCKETS = 256; - - struct SharedData - { - std::atomic<UInt32> next_bucket_to_merge = 0; + { + block_in->readPrefix(); + } + + String getName() const override { return "SourceFromNativeStream"; } + + Chunk generate() override + { + if (!block_in) + return {}; + + auto block = block_in->read(); + if (!block) + { + block_in->readSuffix(); + block_in.reset(); + return {}; + } + + return convertToChunk(block); + } + + private: + ReadBufferFromFile file_in; + CompressedReadBuffer compressed_in; + BlockInputStreamPtr block_in; + }; +} + +/// Worker which merges buckets for two-level aggregation. +/// Atomically increments bucket counter and returns merged result. +class ConvertingAggregatedToChunksSource : public ISource +{ +public: + static constexpr UInt32 NUM_BUCKETS = 256; + + struct SharedData + { + std::atomic<UInt32> next_bucket_to_merge = 0; std::array<std::atomic<bool>, NUM_BUCKETS> is_bucket_processed{}; - std::atomic<bool> is_cancelled = false; - - SharedData() - { - for (auto & flag : is_bucket_processed) - flag = false; - } - }; - - using SharedDataPtr = std::shared_ptr<SharedData>; - - ConvertingAggregatedToChunksSource( - AggregatingTransformParamsPtr params_, - ManyAggregatedDataVariantsPtr data_, - SharedDataPtr shared_data_, - Arena * arena_) - : ISource(params_->getHeader()) - , params(std::move(params_)) - , data(std::move(data_)) - , shared_data(std::move(shared_data_)) - , arena(arena_) - {} - - String getName() const override { return "ConvertingAggregatedToChunksSource"; } - -protected: - Chunk generate() override - { - UInt32 bucket_num = shared_data->next_bucket_to_merge.fetch_add(1); - - if (bucket_num >= NUM_BUCKETS) - return {}; - - Block block = params->aggregator.mergeAndConvertOneBucketToBlock(*data, arena, params->final, bucket_num, &shared_data->is_cancelled); - Chunk chunk = convertToChunk(block); - - shared_data->is_bucket_processed[bucket_num] = true; - - return chunk; - } - -private: - AggregatingTransformParamsPtr params; - ManyAggregatedDataVariantsPtr data; - SharedDataPtr shared_data; - Arena * arena; -}; - -/// Generates chunks with aggregated data. -/// In single level case, aggregates data itself. -/// In two-level case, creates `ConvertingAggregatedToChunksSource` workers: -/// -/// ConvertingAggregatedToChunksSource -> -/// ConvertingAggregatedToChunksSource -> ConvertingAggregatedToChunksTransform -> AggregatingTransform -/// ConvertingAggregatedToChunksSource -> -/// -/// Result chunks guaranteed to be sorted by bucket number. -class ConvertingAggregatedToChunksTransform : public IProcessor -{ -public: - ConvertingAggregatedToChunksTransform(AggregatingTransformParamsPtr params_, ManyAggregatedDataVariantsPtr data_, size_t num_threads_) - : IProcessor({}, {params_->getHeader()}) - , params(std::move(params_)), data(std::move(data_)), num_threads(num_threads_) {} - - String getName() const override { return "ConvertingAggregatedToChunksTransform"; } - - void work() override - { - if (data->empty()) - { - finished = true; - return; - } - - if (!is_initialized) - { - initialize(); - return; - } - - if (data->at(0)->isTwoLevel()) - { - /// In two-level case will only create sources. - if (inputs.empty()) - createSources(); - } - else - { - mergeSingleLevel(); - } - } - - Processors expandPipeline() override - { - for (auto & source : processors) - { - auto & out = source->getOutputs().front(); - inputs.emplace_back(out.getHeader(), this); - connect(out, inputs.back()); - inputs.back().setNeeded(); - } - - return std::move(processors); - } - - IProcessor::Status prepare() override - { - auto & output = outputs.front(); - - if (finished && !has_input) - { - output.finish(); - return Status::Finished; - } - - /// Check can output. - if (output.isFinished()) - { - for (auto & input : inputs) - input.close(); - - if (shared_data) - shared_data->is_cancelled.store(true); - - return Status::Finished; - } - - if (!output.canPush()) - return Status::PortFull; - - if (!is_initialized) - return Status::Ready; - - if (!processors.empty()) - return Status::ExpandPipeline; - - if (has_input) - return preparePushToOutput(); - - /// Single level case. - if (inputs.empty()) - return Status::Ready; - - /// Two-level case. - return prepareTwoLevel(); - } - -private: - IProcessor::Status preparePushToOutput() - { - auto & output = outputs.front(); - output.push(std::move(current_chunk)); - has_input = false; - - if (finished) - { - output.finish(); - return Status::Finished; - } - - return Status::PortFull; - } - - /// Read all sources and try to push current bucket. - IProcessor::Status prepareTwoLevel() - { - auto & output = outputs.front(); - - for (auto & input : inputs) - { - if (!input.isFinished() && input.hasData()) - { - auto chunk = input.pull(); - auto bucket = getInfoFromChunk(chunk)->bucket_num; - chunks[bucket] = std::move(chunk); - } - } - - if (!shared_data->is_bucket_processed[current_bucket_num]) - return Status::NeedData; - - if (!chunks[current_bucket_num]) - return Status::NeedData; - - output.push(std::move(chunks[current_bucket_num])); - - ++current_bucket_num; - if (current_bucket_num == NUM_BUCKETS) - { - output.finish(); - /// Do not close inputs, they must be finished. - return Status::Finished; - } - - return Status::PortFull; - } - - AggregatingTransformParamsPtr params; - ManyAggregatedDataVariantsPtr data; - ConvertingAggregatedToChunksSource::SharedDataPtr shared_data; - - size_t num_threads; - - bool is_initialized = false; - bool has_input = false; - bool finished = false; - - Chunk current_chunk; - - UInt32 current_bucket_num = 0; - static constexpr Int32 NUM_BUCKETS = 256; - std::array<Chunk, NUM_BUCKETS> chunks; - - Processors processors; - - void setCurrentChunk(Chunk chunk) - { - if (has_input) - throw Exception("Current chunk was already set in " - "ConvertingAggregatedToChunksTransform.", ErrorCodes::LOGICAL_ERROR); - - has_input = true; - current_chunk = std::move(chunk); - } - - void initialize() - { - is_initialized = true; - - AggregatedDataVariantsPtr & first = data->at(0); - - /// At least we need one arena in first data item per thread - if (num_threads > first->aggregates_pools.size()) - { - Arenas & first_pool = first->aggregates_pools; - for (size_t j = first_pool.size(); j < num_threads; j++) - first_pool.emplace_back(std::make_shared<Arena>()); - } - - if (first->type == AggregatedDataVariants::Type::without_key || params->params.overflow_row) - { - params->aggregator.mergeWithoutKeyDataImpl(*data); - auto block = params->aggregator.prepareBlockAndFillWithoutKey( + std::atomic<bool> is_cancelled = false; + + SharedData() + { + for (auto & flag : is_bucket_processed) + flag = false; + } + }; + + using SharedDataPtr = std::shared_ptr<SharedData>; + + ConvertingAggregatedToChunksSource( + AggregatingTransformParamsPtr params_, + ManyAggregatedDataVariantsPtr data_, + SharedDataPtr shared_data_, + Arena * arena_) + : ISource(params_->getHeader()) + , params(std::move(params_)) + , data(std::move(data_)) + , shared_data(std::move(shared_data_)) + , arena(arena_) + {} + + String getName() const override { return "ConvertingAggregatedToChunksSource"; } + +protected: + Chunk generate() override + { + UInt32 bucket_num = shared_data->next_bucket_to_merge.fetch_add(1); + + if (bucket_num >= NUM_BUCKETS) + return {}; + + Block block = params->aggregator.mergeAndConvertOneBucketToBlock(*data, arena, params->final, bucket_num, &shared_data->is_cancelled); + Chunk chunk = convertToChunk(block); + + shared_data->is_bucket_processed[bucket_num] = true; + + return chunk; + } + +private: + AggregatingTransformParamsPtr params; + ManyAggregatedDataVariantsPtr data; + SharedDataPtr shared_data; + Arena * arena; +}; + +/// Generates chunks with aggregated data. +/// In single level case, aggregates data itself. +/// In two-level case, creates `ConvertingAggregatedToChunksSource` workers: +/// +/// ConvertingAggregatedToChunksSource -> +/// ConvertingAggregatedToChunksSource -> ConvertingAggregatedToChunksTransform -> AggregatingTransform +/// ConvertingAggregatedToChunksSource -> +/// +/// Result chunks guaranteed to be sorted by bucket number. +class ConvertingAggregatedToChunksTransform : public IProcessor +{ +public: + ConvertingAggregatedToChunksTransform(AggregatingTransformParamsPtr params_, ManyAggregatedDataVariantsPtr data_, size_t num_threads_) + : IProcessor({}, {params_->getHeader()}) + , params(std::move(params_)), data(std::move(data_)), num_threads(num_threads_) {} + + String getName() const override { return "ConvertingAggregatedToChunksTransform"; } + + void work() override + { + if (data->empty()) + { + finished = true; + return; + } + + if (!is_initialized) + { + initialize(); + return; + } + + if (data->at(0)->isTwoLevel()) + { + /// In two-level case will only create sources. + if (inputs.empty()) + createSources(); + } + else + { + mergeSingleLevel(); + } + } + + Processors expandPipeline() override + { + for (auto & source : processors) + { + auto & out = source->getOutputs().front(); + inputs.emplace_back(out.getHeader(), this); + connect(out, inputs.back()); + inputs.back().setNeeded(); + } + + return std::move(processors); + } + + IProcessor::Status prepare() override + { + auto & output = outputs.front(); + + if (finished && !has_input) + { + output.finish(); + return Status::Finished; + } + + /// Check can output. + if (output.isFinished()) + { + for (auto & input : inputs) + input.close(); + + if (shared_data) + shared_data->is_cancelled.store(true); + + return Status::Finished; + } + + if (!output.canPush()) + return Status::PortFull; + + if (!is_initialized) + return Status::Ready; + + if (!processors.empty()) + return Status::ExpandPipeline; + + if (has_input) + return preparePushToOutput(); + + /// Single level case. + if (inputs.empty()) + return Status::Ready; + + /// Two-level case. + return prepareTwoLevel(); + } + +private: + IProcessor::Status preparePushToOutput() + { + auto & output = outputs.front(); + output.push(std::move(current_chunk)); + has_input = false; + + if (finished) + { + output.finish(); + return Status::Finished; + } + + return Status::PortFull; + } + + /// Read all sources and try to push current bucket. + IProcessor::Status prepareTwoLevel() + { + auto & output = outputs.front(); + + for (auto & input : inputs) + { + if (!input.isFinished() && input.hasData()) + { + auto chunk = input.pull(); + auto bucket = getInfoFromChunk(chunk)->bucket_num; + chunks[bucket] = std::move(chunk); + } + } + + if (!shared_data->is_bucket_processed[current_bucket_num]) + return Status::NeedData; + + if (!chunks[current_bucket_num]) + return Status::NeedData; + + output.push(std::move(chunks[current_bucket_num])); + + ++current_bucket_num; + if (current_bucket_num == NUM_BUCKETS) + { + output.finish(); + /// Do not close inputs, they must be finished. + return Status::Finished; + } + + return Status::PortFull; + } + + AggregatingTransformParamsPtr params; + ManyAggregatedDataVariantsPtr data; + ConvertingAggregatedToChunksSource::SharedDataPtr shared_data; + + size_t num_threads; + + bool is_initialized = false; + bool has_input = false; + bool finished = false; + + Chunk current_chunk; + + UInt32 current_bucket_num = 0; + static constexpr Int32 NUM_BUCKETS = 256; + std::array<Chunk, NUM_BUCKETS> chunks; + + Processors processors; + + void setCurrentChunk(Chunk chunk) + { + if (has_input) + throw Exception("Current chunk was already set in " + "ConvertingAggregatedToChunksTransform.", ErrorCodes::LOGICAL_ERROR); + + has_input = true; + current_chunk = std::move(chunk); + } + + void initialize() + { + is_initialized = true; + + AggregatedDataVariantsPtr & first = data->at(0); + + /// At least we need one arena in first data item per thread + if (num_threads > first->aggregates_pools.size()) + { + Arenas & first_pool = first->aggregates_pools; + for (size_t j = first_pool.size(); j < num_threads; j++) + first_pool.emplace_back(std::make_shared<Arena>()); + } + + if (first->type == AggregatedDataVariants::Type::without_key || params->params.overflow_row) + { + params->aggregator.mergeWithoutKeyDataImpl(*data); + auto block = params->aggregator.prepareBlockAndFillWithoutKey( *first, params->final, first->type != AggregatedDataVariants::Type::without_key); - - setCurrentChunk(convertToChunk(block)); - } - } - - void mergeSingleLevel() - { - AggregatedDataVariantsPtr & first = data->at(0); - - if (current_bucket_num > 0 || first->type == AggregatedDataVariants::Type::without_key) - { - finished = true; - return; - } - - ++current_bucket_num; - - #define M(NAME) \ - else if (first->type == AggregatedDataVariants::Type::NAME) \ - params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data); - if (false) {} // NOLINT - APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) - #undef M - else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - - auto block = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final); - - setCurrentChunk(convertToChunk(block)); - finished = true; - } - - void createSources() - { - AggregatedDataVariantsPtr & first = data->at(0); - shared_data = std::make_shared<ConvertingAggregatedToChunksSource::SharedData>(); - - for (size_t thread = 0; thread < num_threads; ++thread) - { + + setCurrentChunk(convertToChunk(block)); + } + } + + void mergeSingleLevel() + { + AggregatedDataVariantsPtr & first = data->at(0); + + if (current_bucket_num > 0 || first->type == AggregatedDataVariants::Type::without_key) + { + finished = true; + return; + } + + ++current_bucket_num; + + #define M(NAME) \ + else if (first->type == AggregatedDataVariants::Type::NAME) \ + params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data); + if (false) {} // NOLINT + APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) + #undef M + else + throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + + auto block = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final); + + setCurrentChunk(convertToChunk(block)); + finished = true; + } + + void createSources() + { + AggregatedDataVariantsPtr & first = data->at(0); + shared_data = std::make_shared<ConvertingAggregatedToChunksSource::SharedData>(); + + for (size_t thread = 0; thread < num_threads; ++thread) + { /// Select Arena to avoid race conditions - Arena * arena = first->aggregates_pools.at(thread).get(); + Arena * arena = first->aggregates_pools.at(thread).get(); auto source = std::make_shared<ConvertingAggregatedToChunksSource>(params, data, shared_data, arena); - - processors.emplace_back(std::move(source)); - } - } -}; - -AggregatingTransform::AggregatingTransform(Block header, AggregatingTransformParamsPtr params_) - : AggregatingTransform(std::move(header), std::move(params_) - , std::make_unique<ManyAggregatedData>(1), 0, 1, 1) -{ -} - -AggregatingTransform::AggregatingTransform( + + processors.emplace_back(std::move(source)); + } + } +}; + +AggregatingTransform::AggregatingTransform(Block header, AggregatingTransformParamsPtr params_) + : AggregatingTransform(std::move(header), std::move(params_) + , std::make_unique<ManyAggregatedData>(1), 0, 1, 1) +{ +} + +AggregatingTransform::AggregatingTransform( Block header, AggregatingTransformParamsPtr params_, ManyAggregatedDataPtr many_data_, @@ -403,129 +403,129 @@ AggregatingTransform::AggregatingTransform( size_t temporary_data_merge_threads_) : IProcessor({std::move(header)}, {params_->getHeader()}) , params(std::move(params_)) - , key_columns(params->params.keys_size) - , aggregate_columns(params->params.aggregates_size) - , many_data(std::move(many_data_)) - , variants(*many_data->variants[current_variant]) - , max_threads(std::min(many_data->variants.size(), max_threads_)) - , temporary_data_merge_threads(temporary_data_merge_threads_) -{ -} - -AggregatingTransform::~AggregatingTransform() = default; - -IProcessor::Status AggregatingTransform::prepare() -{ - /// There are one or two input ports. - /// The first one is used at aggregation step, the second one - while reading merged data from ConvertingAggregated - - auto & output = outputs.front(); - /// Last output is current. All other outputs should already be closed. - auto & input = inputs.back(); - - /// Check can output. - if (output.isFinished()) - { - input.close(); - return Status::Finished; - } - - if (!output.canPush()) - { - input.setNotNeeded(); - return Status::PortFull; - } - - /// Finish data processing, prepare to generating. - if (is_consume_finished && !is_generate_initialized) - { - /// Close input port in case max_rows_to_group_by was reached but not all data was read. - inputs.front().close(); - - return Status::Ready; - } - - if (is_generate_initialized && !is_pipeline_created && !processors.empty()) - return Status::ExpandPipeline; - - /// Only possible while consuming. - if (read_current_chunk) - return Status::Ready; - - /// Get chunk from input. - if (input.isFinished()) - { - if (is_consume_finished) - { - output.finish(); - return Status::Finished; - } - else - { - /// Finish data processing and create another pipe. - is_consume_finished = true; - return Status::Ready; - } - } - - if (!input.hasData()) - { - input.setNeeded(); - return Status::NeedData; - } - - if (is_consume_finished) - input.setNeeded(); - - current_chunk = input.pull(/*set_not_needed = */ !is_consume_finished); - read_current_chunk = true; - - if (is_consume_finished) - { - output.push(std::move(current_chunk)); - read_current_chunk = false; - return Status::PortFull; - } - - return Status::Ready; -} - -void AggregatingTransform::work() -{ - if (is_consume_finished) - initGenerate(); - else - { - consume(std::move(current_chunk)); - read_current_chunk = false; - } -} - -Processors AggregatingTransform::expandPipeline() -{ - auto & out = processors.back()->getOutputs().front(); - inputs.emplace_back(out.getHeader(), this); - connect(out, inputs.back()); - is_pipeline_created = true; - return std::move(processors); -} - -void AggregatingTransform::consume(Chunk chunk) -{ + , key_columns(params->params.keys_size) + , aggregate_columns(params->params.aggregates_size) + , many_data(std::move(many_data_)) + , variants(*many_data->variants[current_variant]) + , max_threads(std::min(many_data->variants.size(), max_threads_)) + , temporary_data_merge_threads(temporary_data_merge_threads_) +{ +} + +AggregatingTransform::~AggregatingTransform() = default; + +IProcessor::Status AggregatingTransform::prepare() +{ + /// There are one or two input ports. + /// The first one is used at aggregation step, the second one - while reading merged data from ConvertingAggregated + + auto & output = outputs.front(); + /// Last output is current. All other outputs should already be closed. + auto & input = inputs.back(); + + /// Check can output. + if (output.isFinished()) + { + input.close(); + return Status::Finished; + } + + if (!output.canPush()) + { + input.setNotNeeded(); + return Status::PortFull; + } + + /// Finish data processing, prepare to generating. + if (is_consume_finished && !is_generate_initialized) + { + /// Close input port in case max_rows_to_group_by was reached but not all data was read. + inputs.front().close(); + + return Status::Ready; + } + + if (is_generate_initialized && !is_pipeline_created && !processors.empty()) + return Status::ExpandPipeline; + + /// Only possible while consuming. + if (read_current_chunk) + return Status::Ready; + + /// Get chunk from input. + if (input.isFinished()) + { + if (is_consume_finished) + { + output.finish(); + return Status::Finished; + } + else + { + /// Finish data processing and create another pipe. + is_consume_finished = true; + return Status::Ready; + } + } + + if (!input.hasData()) + { + input.setNeeded(); + return Status::NeedData; + } + + if (is_consume_finished) + input.setNeeded(); + + current_chunk = input.pull(/*set_not_needed = */ !is_consume_finished); + read_current_chunk = true; + + if (is_consume_finished) + { + output.push(std::move(current_chunk)); + read_current_chunk = false; + return Status::PortFull; + } + + return Status::Ready; +} + +void AggregatingTransform::work() +{ + if (is_consume_finished) + initGenerate(); + else + { + consume(std::move(current_chunk)); + read_current_chunk = false; + } +} + +Processors AggregatingTransform::expandPipeline() +{ + auto & out = processors.back()->getOutputs().front(); + inputs.emplace_back(out.getHeader(), this); + connect(out, inputs.back()); + is_pipeline_created = true; + return std::move(processors); +} + +void AggregatingTransform::consume(Chunk chunk) +{ const UInt64 num_rows = chunk.getNumRows(); - - if (num_rows == 0 && params->params.empty_result_for_aggregation_by_empty_set) - return; - - if (!is_consume_started) - { - LOG_TRACE(log, "Aggregating"); - is_consume_started = true; - } - + + if (num_rows == 0 && params->params.empty_result_for_aggregation_by_empty_set) + return; + + if (!is_consume_started) + { + LOG_TRACE(log, "Aggregating"); + is_consume_started = true; + } + src_rows += num_rows; - src_bytes += chunk.bytes(); - + src_bytes += chunk.bytes(); + if (params->only_merge) { auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -538,76 +538,76 @@ void AggregatingTransform::consume(Chunk chunk) if (!params->aggregator.executeOnBlock(chunk.detachColumns(), num_rows, variants, key_columns, aggregate_columns, no_more_keys)) is_consume_finished = true; } -} - -void AggregatingTransform::initGenerate() -{ - if (is_generate_initialized) - return; - - is_generate_initialized = true; - - /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation. - /// To do this, we pass a block with zero rows to aggregate. - if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set) +} + +void AggregatingTransform::initGenerate() +{ + if (is_generate_initialized) + return; + + is_generate_initialized = true; + + /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation. + /// To do this, we pass a block with zero rows to aggregate. + if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set) { if (params->only_merge) params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys); else params->aggregator.executeOnBlock(getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys); } - - double elapsed_seconds = watch.elapsedSeconds(); - size_t rows = variants.sizeWithoutOverflowRow(); - + + double elapsed_seconds = watch.elapsedSeconds(); + size_t rows = variants.sizeWithoutOverflowRow(); + LOG_DEBUG(log, "Aggregated. {} to {} rows (from {}) in {} sec. ({:.3f} rows/sec., {}/sec.)", - src_rows, rows, ReadableSize(src_bytes), + src_rows, rows, ReadableSize(src_bytes), elapsed_seconds, src_rows / elapsed_seconds, - ReadableSize(src_bytes / elapsed_seconds)); - - if (params->aggregator.hasTemporaryFiles()) - { - if (variants.isConvertibleToTwoLevel()) - variants.convertToTwoLevel(); - - /// Flush data in the RAM to disk also. It's easier than merging on-disk and RAM data. - if (!variants.empty()) - params->aggregator.writeToTemporaryFile(variants); - } - - if (many_data->num_finished.fetch_add(1) + 1 < many_data->variants.size()) - return; - - if (!params->aggregator.hasTemporaryFiles()) - { - auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants); - auto prepared_data_ptr = std::make_shared<ManyAggregatedDataVariants>(std::move(prepared_data)); - processors.emplace_back(std::make_shared<ConvertingAggregatedToChunksTransform>(params, std::move(prepared_data_ptr), max_threads)); - } - else - { - /// If there are temporary files with partially-aggregated data on the disk, - /// then read and merge them, spending the minimum amount of memory. - - ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge); - - if (many_data->variants.size() > 1) - { - /// It may happen that some data has not yet been flushed, - /// because at the time thread has finished, no data has been flushed to disk, and then some were. - for (auto & cur_variants : many_data->variants) - { - if (cur_variants->isConvertibleToTwoLevel()) - cur_variants->convertToTwoLevel(); - - if (!cur_variants->empty()) - params->aggregator.writeToTemporaryFile(*cur_variants); - } - } - - const auto & files = params->aggregator.getTemporaryFiles(); + ReadableSize(src_bytes / elapsed_seconds)); + + if (params->aggregator.hasTemporaryFiles()) + { + if (variants.isConvertibleToTwoLevel()) + variants.convertToTwoLevel(); + + /// Flush data in the RAM to disk also. It's easier than merging on-disk and RAM data. + if (!variants.empty()) + params->aggregator.writeToTemporaryFile(variants); + } + + if (many_data->num_finished.fetch_add(1) + 1 < many_data->variants.size()) + return; + + if (!params->aggregator.hasTemporaryFiles()) + { + auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants); + auto prepared_data_ptr = std::make_shared<ManyAggregatedDataVariants>(std::move(prepared_data)); + processors.emplace_back(std::make_shared<ConvertingAggregatedToChunksTransform>(params, std::move(prepared_data_ptr), max_threads)); + } + else + { + /// If there are temporary files with partially-aggregated data on the disk, + /// then read and merge them, spending the minimum amount of memory. + + ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge); + + if (many_data->variants.size() > 1) + { + /// It may happen that some data has not yet been flushed, + /// because at the time thread has finished, no data has been flushed to disk, and then some were. + for (auto & cur_variants : many_data->variants) + { + if (cur_variants->isConvertibleToTwoLevel()) + cur_variants->convertToTwoLevel(); + + if (!cur_variants->empty()) + params->aggregator.writeToTemporaryFile(*cur_variants); + } + } + + const auto & files = params->aggregator.getTemporaryFiles(); Pipe pipe; - + { auto header = params->aggregator.getHeader(false); Pipes pipes; @@ -624,11 +624,11 @@ void AggregatingTransform::initGenerate() files.files.size(), ReadableSize(files.sum_size_compressed), ReadableSize(files.sum_size_uncompressed)); - + addMergingAggregatedMemoryEfficientTransform(pipe, params, temporary_data_merge_threads); - + processors = Pipe::detachProcessors(std::move(pipe)); - } -} - -} + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h index a673b4fdfd..1639bc4df4 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h @@ -1,13 +1,13 @@ -#pragma once -#include <Processors/IAccumulatingTransform.h> -#include <Interpreters/Aggregator.h> -#include <IO/ReadBufferFromFile.h> -#include <Compression/CompressedReadBuffer.h> -#include <Common/Stopwatch.h> - -namespace DB -{ - +#pragma once +#include <Processors/IAccumulatingTransform.h> +#include <Interpreters/Aggregator.h> +#include <IO/ReadBufferFromFile.h> +#include <Compression/CompressedReadBuffer.h> +#include <Common/Stopwatch.h> + +namespace DB +{ + class AggregatedArenasChunkInfo : public ChunkInfo { public: @@ -17,22 +17,22 @@ public: {} }; -class AggregatedChunkInfo : public ChunkInfo -{ -public: - bool is_overflows = false; - Int32 bucket_num = -1; -}; - -class IBlockInputStream; -using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>; - +class AggregatedChunkInfo : public ChunkInfo +{ +public: + bool is_overflows = false; + Int32 bucket_num = -1; +}; + +class IBlockInputStream; +using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>; + using AggregatorList = std::list<Aggregator>; using AggregatorListPtr = std::shared_ptr<AggregatorList>; -struct AggregatingTransformParams -{ - Aggregator::Params params; +struct AggregatingTransformParams +{ + Aggregator::Params params; /// Each params holds a list of aggregators which are used in query. It's needed because we need /// to use a pointer of aggregator to proper destroy complex aggregation states on exception @@ -41,17 +41,17 @@ struct AggregatingTransformParams /// projections, and one of them might gets destroyed before used. AggregatorListPtr aggregator_list_ptr; Aggregator & aggregator; - bool final; + bool final; bool only_merge = false; - - AggregatingTransformParams(const Aggregator::Params & params_, bool final_) + + AggregatingTransformParams(const Aggregator::Params & params_, bool final_) : params(params_) , aggregator_list_ptr(std::make_shared<AggregatorList>()) , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), params)) , final(final_) { } - + AggregatingTransformParams(const Aggregator::Params & params_, const AggregatorListPtr & aggregator_list_ptr_, bool final_) : params(params_) , aggregator_list_ptr(aggregator_list_ptr_) @@ -60,51 +60,51 @@ struct AggregatingTransformParams { } - Block getHeader() const { return aggregator.getHeader(final); } - - Block getCustomHeader(bool final_) const { return aggregator.getHeader(final_); } -}; - -struct ManyAggregatedData -{ - ManyAggregatedDataVariants variants; - std::vector<std::unique_ptr<std::mutex>> mutexes; - std::atomic<UInt32> num_finished = 0; - - explicit ManyAggregatedData(size_t num_threads = 0) : variants(num_threads), mutexes(num_threads) - { - for (auto & elem : variants) - elem = std::make_shared<AggregatedDataVariants>(); - - for (auto & mut : mutexes) - mut = std::make_unique<std::mutex>(); - } -}; - -using AggregatingTransformParamsPtr = std::shared_ptr<AggregatingTransformParams>; -using ManyAggregatedDataPtr = std::shared_ptr<ManyAggregatedData>; - -/** Aggregates the stream of blocks using the specified key columns and aggregate functions. - * Columns with aggregate functions adds to the end of the block. - * If final = false, the aggregate functions are not finalized, that is, they are not replaced by their value, but contain an intermediate state of calculations. - * This is necessary so that aggregation can continue (for example, by combining streams of partially aggregated data). - * - * For every separate stream of data separate AggregatingTransform is created. - * Every AggregatingTransform reads data from the first port till is is not run out, or max_rows_to_group_by reached. - * When the last AggregatingTransform finish reading, the result of aggregation is needed to be merged together. - * This task is performed by ConvertingAggregatedToChunksTransform. - * Last AggregatingTransform expands pipeline and adds second input port, which reads from ConvertingAggregated. - * - * Aggregation data is passed by ManyAggregatedData structure, which is shared between all aggregating transforms. - * At aggregation step, every transform uses it's own AggregatedDataVariants structure. - * At merging step, all structures pass to ConvertingAggregatedToChunksTransform. - */ -class AggregatingTransform : public IProcessor -{ -public: - AggregatingTransform(Block header, AggregatingTransformParamsPtr params_); - - /// For Parallel aggregating. + Block getHeader() const { return aggregator.getHeader(final); } + + Block getCustomHeader(bool final_) const { return aggregator.getHeader(final_); } +}; + +struct ManyAggregatedData +{ + ManyAggregatedDataVariants variants; + std::vector<std::unique_ptr<std::mutex>> mutexes; + std::atomic<UInt32> num_finished = 0; + + explicit ManyAggregatedData(size_t num_threads = 0) : variants(num_threads), mutexes(num_threads) + { + for (auto & elem : variants) + elem = std::make_shared<AggregatedDataVariants>(); + + for (auto & mut : mutexes) + mut = std::make_unique<std::mutex>(); + } +}; + +using AggregatingTransformParamsPtr = std::shared_ptr<AggregatingTransformParams>; +using ManyAggregatedDataPtr = std::shared_ptr<ManyAggregatedData>; + +/** Aggregates the stream of blocks using the specified key columns and aggregate functions. + * Columns with aggregate functions adds to the end of the block. + * If final = false, the aggregate functions are not finalized, that is, they are not replaced by their value, but contain an intermediate state of calculations. + * This is necessary so that aggregation can continue (for example, by combining streams of partially aggregated data). + * + * For every separate stream of data separate AggregatingTransform is created. + * Every AggregatingTransform reads data from the first port till is is not run out, or max_rows_to_group_by reached. + * When the last AggregatingTransform finish reading, the result of aggregation is needed to be merged together. + * This task is performed by ConvertingAggregatedToChunksTransform. + * Last AggregatingTransform expands pipeline and adds second input port, which reads from ConvertingAggregated. + * + * Aggregation data is passed by ManyAggregatedData structure, which is shared between all aggregating transforms. + * At aggregation step, every transform uses it's own AggregatedDataVariants structure. + * At merging step, all structures pass to ConvertingAggregatedToChunksTransform. + */ +class AggregatingTransform : public IProcessor +{ +public: + AggregatingTransform(Block header, AggregatingTransformParamsPtr params_); + + /// For Parallel aggregating. AggregatingTransform( Block header, AggregatingTransformParamsPtr params_, @@ -112,56 +112,56 @@ public: size_t current_variant, size_t max_threads, size_t temporary_data_merge_threads); - ~AggregatingTransform() override; - - String getName() const override { return "AggregatingTransform"; } - Status prepare() override; - void work() override; - Processors expandPipeline() override; - -protected: - void consume(Chunk chunk); - -private: - /// To read the data that was flushed into the temporary data file. - Processors processors; - - AggregatingTransformParamsPtr params; - Poco::Logger * log = &Poco::Logger::get("AggregatingTransform"); - - ColumnRawPtrs key_columns; - Aggregator::AggregateColumns aggregate_columns; + ~AggregatingTransform() override; + + String getName() const override { return "AggregatingTransform"; } + Status prepare() override; + void work() override; + Processors expandPipeline() override; + +protected: + void consume(Chunk chunk); + +private: + /// To read the data that was flushed into the temporary data file. + Processors processors; + + AggregatingTransformParamsPtr params; + Poco::Logger * log = &Poco::Logger::get("AggregatingTransform"); + + ColumnRawPtrs key_columns; + Aggregator::AggregateColumns aggregate_columns; /** Used if there is a limit on the maximum number of rows in the aggregation, * and if group_by_overflow_mode == ANY. * In this case, new keys are not added to the set, but aggregation is performed only by * keys that have already managed to get into the set. */ - bool no_more_keys = false; - - ManyAggregatedDataPtr many_data; - AggregatedDataVariants & variants; - size_t max_threads = 1; - size_t temporary_data_merge_threads = 1; - - /// TODO: calculate time only for aggregation. - Stopwatch watch; - - UInt64 src_rows = 0; - UInt64 src_bytes = 0; - - bool is_generate_initialized = false; - bool is_consume_finished = false; - bool is_pipeline_created = false; - - Chunk current_chunk; - bool read_current_chunk = false; - - bool is_consume_started = false; - - void initGenerate(); -}; - -Chunk convertToChunk(const Block & block); - -} + bool no_more_keys = false; + + ManyAggregatedDataPtr many_data; + AggregatedDataVariants & variants; + size_t max_threads = 1; + size_t temporary_data_merge_threads = 1; + + /// TODO: calculate time only for aggregation. + Stopwatch watch; + + UInt64 src_rows = 0; + UInt64 src_bytes = 0; + + bool is_generate_initialized = false; + bool is_consume_finished = false; + bool is_pipeline_created = false; + + Chunk current_chunk; + bool read_current_chunk = false; + + bool is_consume_started = false; + + void initGenerate(); +}; + +Chunk convertToChunk(const Block & block); + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp index 8245b22465..526bbc6337 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp @@ -1,123 +1,123 @@ -#include <Processors/Transforms/ExtremesTransform.h> - -#include <Core/Field.h> - -namespace DB -{ - -ExtremesTransform::ExtremesTransform(const Block & header) - : ISimpleTransform(header, header, true) -{ - /// Port for Extremes. - outputs.emplace_back(outputs.front().getHeader(), this); -} - -IProcessor::Status ExtremesTransform::prepare() -{ - if (!finished_transform) - { - auto status = ISimpleTransform::prepare(); - - if (status != Status::Finished) - return status; - - finished_transform = true; - } - - auto & totals_output = getExtremesPort(); - - /// Check can output. - if (totals_output.isFinished()) - return Status::Finished; - - if (!totals_output.canPush()) - return Status::PortFull; - - if (!extremes && !extremes_columns.empty()) - return Status::Ready; - - if (extremes) - totals_output.push(std::move(extremes)); - - totals_output.finish(); - return Status::Finished; -} - -void ExtremesTransform::work() -{ - if (finished_transform) - { - if (!extremes && !extremes_columns.empty()) - extremes.setColumns(std::move(extremes_columns), 2); - } - else - ISimpleTransform::work(); -} - -void ExtremesTransform::transform(DB::Chunk & chunk) -{ - - if (chunk.getNumRows() == 0) - return; - - size_t num_columns = chunk.getNumColumns(); - const auto & columns = chunk.getColumns(); - - if (extremes_columns.empty()) - { - extremes_columns.resize(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - const ColumnPtr & src = columns[i]; - - if (isColumnConst(*src)) - { - /// Equal min and max. - extremes_columns[i] = src->cloneResized(2); - } - else - { - Field min_value; - Field max_value; - - src->getExtremes(min_value, max_value); - - extremes_columns[i] = src->cloneEmpty(); - - extremes_columns[i]->insert(min_value); - extremes_columns[i]->insert(max_value); - } - } - } - else - { - for (size_t i = 0; i < num_columns; ++i) - { - if (isColumnConst(*extremes_columns[i])) - continue; - - Field min_value = (*extremes_columns[i])[0]; - Field max_value = (*extremes_columns[i])[1]; - - Field cur_min_value; - Field cur_max_value; - - columns[i]->getExtremes(cur_min_value, cur_max_value); - - if (cur_min_value < min_value) - min_value = cur_min_value; - if (cur_max_value > max_value) - max_value = cur_max_value; - - MutableColumnPtr new_extremes = extremes_columns[i]->cloneEmpty(); - - new_extremes->insert(min_value); - new_extremes->insert(max_value); - - extremes_columns[i] = std::move(new_extremes); - } - } -} - -} +#include <Processors/Transforms/ExtremesTransform.h> + +#include <Core/Field.h> + +namespace DB +{ + +ExtremesTransform::ExtremesTransform(const Block & header) + : ISimpleTransform(header, header, true) +{ + /// Port for Extremes. + outputs.emplace_back(outputs.front().getHeader(), this); +} + +IProcessor::Status ExtremesTransform::prepare() +{ + if (!finished_transform) + { + auto status = ISimpleTransform::prepare(); + + if (status != Status::Finished) + return status; + + finished_transform = true; + } + + auto & totals_output = getExtremesPort(); + + /// Check can output. + if (totals_output.isFinished()) + return Status::Finished; + + if (!totals_output.canPush()) + return Status::PortFull; + + if (!extremes && !extremes_columns.empty()) + return Status::Ready; + + if (extremes) + totals_output.push(std::move(extremes)); + + totals_output.finish(); + return Status::Finished; +} + +void ExtremesTransform::work() +{ + if (finished_transform) + { + if (!extremes && !extremes_columns.empty()) + extremes.setColumns(std::move(extremes_columns), 2); + } + else + ISimpleTransform::work(); +} + +void ExtremesTransform::transform(DB::Chunk & chunk) +{ + + if (chunk.getNumRows() == 0) + return; + + size_t num_columns = chunk.getNumColumns(); + const auto & columns = chunk.getColumns(); + + if (extremes_columns.empty()) + { + extremes_columns.resize(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const ColumnPtr & src = columns[i]; + + if (isColumnConst(*src)) + { + /// Equal min and max. + extremes_columns[i] = src->cloneResized(2); + } + else + { + Field min_value; + Field max_value; + + src->getExtremes(min_value, max_value); + + extremes_columns[i] = src->cloneEmpty(); + + extremes_columns[i]->insert(min_value); + extremes_columns[i]->insert(max_value); + } + } + } + else + { + for (size_t i = 0; i < num_columns; ++i) + { + if (isColumnConst(*extremes_columns[i])) + continue; + + Field min_value = (*extremes_columns[i])[0]; + Field max_value = (*extremes_columns[i])[1]; + + Field cur_min_value; + Field cur_max_value; + + columns[i]->getExtremes(cur_min_value, cur_max_value); + + if (cur_min_value < min_value) + min_value = cur_min_value; + if (cur_max_value > max_value) + max_value = cur_max_value; + + MutableColumnPtr new_extremes = extremes_columns[i]->cloneEmpty(); + + new_extremes->insert(min_value); + new_extremes->insert(max_value); + + extremes_columns[i] = std::move(new_extremes); + } + } +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h index 1c951b3540..8c3f6348d4 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h @@ -1,31 +1,31 @@ #pragma once -#include <Processors/ISimpleTransform.h> - -namespace DB -{ - -class ExtremesTransform : public ISimpleTransform -{ - -public: - explicit ExtremesTransform(const Block & header); - - String getName() const override { return "ExtremesTransform"; } - - OutputPort & getExtremesPort() { return outputs.back(); } - - Status prepare() override; - void work() override; - -protected: - void transform(Chunk & chunk) override; - - bool finished_transform = false; - Chunk extremes; - -private: - MutableColumns extremes_columns; -}; - -} - +#include <Processors/ISimpleTransform.h> + +namespace DB +{ + +class ExtremesTransform : public ISimpleTransform +{ + +public: + explicit ExtremesTransform(const Block & header); + + String getName() const override { return "ExtremesTransform"; } + + OutputPort & getExtremesPort() { return outputs.back(); } + + Status prepare() override; + void work() override; + +protected: + void transform(Chunk & chunk) override; + + bool finished_transform = false; + Chunk extremes; + +private: + MutableColumns extremes_columns; +}; + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index 1fab9ca34c..df2ea4b03f 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -1,525 +1,525 @@ -#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h> - +#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h> + #include <Interpreters/Aggregator.h> -#include <Processors/ISimpleTransform.h> -#include <Processors/ResizeProcessor.h> +#include <Processors/ISimpleTransform.h> +#include <Processors/ResizeProcessor.h> #include <Processors/Pipe.h> - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -struct ChunksToMerge : public ChunkInfo -{ - std::unique_ptr<Chunks> chunks; - Int32 bucket_num = -1; - bool is_overflows = false; -}; - -GroupingAggregatedTransform::GroupingAggregatedTransform( - const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_) - : IProcessor(InputPorts(num_inputs_, header_), { Block() }) - , num_inputs(num_inputs_) - , params(std::move(params_)) - , last_bucket_number(num_inputs, -1) - , read_from_input(num_inputs, false) -{ -} - -void GroupingAggregatedTransform::readFromAllInputs() -{ - auto in = inputs.begin(); - read_from_all_inputs = true; - - for (size_t i = 0; i < num_inputs; ++i, ++in) - { - if (in->isFinished()) - continue; - - if (read_from_input[i]) - continue; - - in->setNeeded(); - - if (!in->hasData()) - { - read_from_all_inputs = false; - continue; - } - - auto chunk = in->pull(); - read_from_input[i] = true; - addChunk(std::move(chunk), i); - } -} - -void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_overflows) -{ - auto & output = outputs.front(); - - auto info = std::make_shared<ChunksToMerge>(); - info->bucket_num = bucket; - info->is_overflows = is_overflows; - info->chunks = std::make_unique<Chunks>(std::move(chunks)); - - Chunk chunk; - chunk.setChunkInfo(std::move(info)); - output.push(std::move(chunk)); -} - -bool GroupingAggregatedTransform::tryPushTwoLevelData() -{ - auto try_push_by_iter = [&](auto batch_it) - { - if (batch_it == chunks_map.end()) - return false; - - Chunks & cur_chunks = batch_it->second; - if (cur_chunks.empty()) - { - chunks_map.erase(batch_it); - return false; - } - - pushData(std::move(cur_chunks), batch_it->first, false); - chunks_map.erase(batch_it); - return true; - }; - - if (all_inputs_finished) - { - /// Chunks are sorted by bucket. - while (!chunks_map.empty()) - if (try_push_by_iter(chunks_map.begin())) - return true; - } - else - { - for (; next_bucket_to_push < current_bucket; ++next_bucket_to_push) - if (try_push_by_iter(chunks_map.find(next_bucket_to_push))) - return true; - } - - return false; -} - -bool GroupingAggregatedTransform::tryPushSingleLevelData() -{ - if (single_level_chunks.empty()) - return false; - - pushData(std::move(single_level_chunks), -1, false); - return true; -} - -bool GroupingAggregatedTransform::tryPushOverflowData() -{ - if (overflow_chunks.empty()) - return false; - - pushData(std::move(overflow_chunks), -1, true); - return true; -} - -IProcessor::Status GroupingAggregatedTransform::prepare() -{ - /// Check can output. - auto & output = outputs.front(); - - if (output.isFinished()) - { - for (auto & input : inputs) - input.close(); - - chunks_map.clear(); - last_bucket_number.clear(); - return Status::Finished; - } - - /// Read first time from each input to understand if we have two-level aggregation. - if (!read_from_all_inputs) - { - readFromAllInputs(); - if (!read_from_all_inputs) - return Status::NeedData; - } - - /// Convert single level to two levels if have two-level input. - if (has_two_level && !single_level_chunks.empty()) - return Status::Ready; - - /// Check can push (to avoid data caching). - if (!output.canPush()) - { - for (auto & input : inputs) - input.setNotNeeded(); - - return Status::PortFull; - } - - bool pushed_to_output = false; - - /// Output if has data. - if (has_two_level) - pushed_to_output = tryPushTwoLevelData(); - - auto need_input = [this](size_t input_num) - { - if (last_bucket_number[input_num] < current_bucket) - return true; - - return expect_several_chunks_for_single_bucket_per_source && last_bucket_number[input_num] == current_bucket; - }; - - /// Read next bucket if can. - for (; ; ++current_bucket) - { - bool finished = true; - bool need_data = false; - - auto in = inputs.begin(); - for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in) - { - if (in->isFinished()) - continue; - - finished = false; - - if (!need_input(input_num)) - continue; - - in->setNeeded(); - - if (!in->hasData()) - { - need_data = true; - continue; - } - - auto chunk = in->pull(); - addChunk(std::move(chunk), input_num); - - if (has_two_level && !single_level_chunks.empty()) - return Status::Ready; - - if (!in->isFinished() && need_input(input_num)) - need_data = true; - } - - if (finished) - { - all_inputs_finished = true; - break; - } - - if (need_data) - return Status::NeedData; - } - - if (pushed_to_output) - return Status::PortFull; - - if (has_two_level) - { - if (tryPushTwoLevelData()) - return Status::PortFull; - - /// Sanity check. If new bucket was read, we should be able to push it. + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +struct ChunksToMerge : public ChunkInfo +{ + std::unique_ptr<Chunks> chunks; + Int32 bucket_num = -1; + bool is_overflows = false; +}; + +GroupingAggregatedTransform::GroupingAggregatedTransform( + const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_) + : IProcessor(InputPorts(num_inputs_, header_), { Block() }) + , num_inputs(num_inputs_) + , params(std::move(params_)) + , last_bucket_number(num_inputs, -1) + , read_from_input(num_inputs, false) +{ +} + +void GroupingAggregatedTransform::readFromAllInputs() +{ + auto in = inputs.begin(); + read_from_all_inputs = true; + + for (size_t i = 0; i < num_inputs; ++i, ++in) + { + if (in->isFinished()) + continue; + + if (read_from_input[i]) + continue; + + in->setNeeded(); + + if (!in->hasData()) + { + read_from_all_inputs = false; + continue; + } + + auto chunk = in->pull(); + read_from_input[i] = true; + addChunk(std::move(chunk), i); + } +} + +void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_overflows) +{ + auto & output = outputs.front(); + + auto info = std::make_shared<ChunksToMerge>(); + info->bucket_num = bucket; + info->is_overflows = is_overflows; + info->chunks = std::make_unique<Chunks>(std::move(chunks)); + + Chunk chunk; + chunk.setChunkInfo(std::move(info)); + output.push(std::move(chunk)); +} + +bool GroupingAggregatedTransform::tryPushTwoLevelData() +{ + auto try_push_by_iter = [&](auto batch_it) + { + if (batch_it == chunks_map.end()) + return false; + + Chunks & cur_chunks = batch_it->second; + if (cur_chunks.empty()) + { + chunks_map.erase(batch_it); + return false; + } + + pushData(std::move(cur_chunks), batch_it->first, false); + chunks_map.erase(batch_it); + return true; + }; + + if (all_inputs_finished) + { + /// Chunks are sorted by bucket. + while (!chunks_map.empty()) + if (try_push_by_iter(chunks_map.begin())) + return true; + } + else + { + for (; next_bucket_to_push < current_bucket; ++next_bucket_to_push) + if (try_push_by_iter(chunks_map.find(next_bucket_to_push))) + return true; + } + + return false; +} + +bool GroupingAggregatedTransform::tryPushSingleLevelData() +{ + if (single_level_chunks.empty()) + return false; + + pushData(std::move(single_level_chunks), -1, false); + return true; +} + +bool GroupingAggregatedTransform::tryPushOverflowData() +{ + if (overflow_chunks.empty()) + return false; + + pushData(std::move(overflow_chunks), -1, true); + return true; +} + +IProcessor::Status GroupingAggregatedTransform::prepare() +{ + /// Check can output. + auto & output = outputs.front(); + + if (output.isFinished()) + { + for (auto & input : inputs) + input.close(); + + chunks_map.clear(); + last_bucket_number.clear(); + return Status::Finished; + } + + /// Read first time from each input to understand if we have two-level aggregation. + if (!read_from_all_inputs) + { + readFromAllInputs(); + if (!read_from_all_inputs) + return Status::NeedData; + } + + /// Convert single level to two levels if have two-level input. + if (has_two_level && !single_level_chunks.empty()) + return Status::Ready; + + /// Check can push (to avoid data caching). + if (!output.canPush()) + { + for (auto & input : inputs) + input.setNotNeeded(); + + return Status::PortFull; + } + + bool pushed_to_output = false; + + /// Output if has data. + if (has_two_level) + pushed_to_output = tryPushTwoLevelData(); + + auto need_input = [this](size_t input_num) + { + if (last_bucket_number[input_num] < current_bucket) + return true; + + return expect_several_chunks_for_single_bucket_per_source && last_bucket_number[input_num] == current_bucket; + }; + + /// Read next bucket if can. + for (; ; ++current_bucket) + { + bool finished = true; + bool need_data = false; + + auto in = inputs.begin(); + for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in) + { + if (in->isFinished()) + continue; + + finished = false; + + if (!need_input(input_num)) + continue; + + in->setNeeded(); + + if (!in->hasData()) + { + need_data = true; + continue; + } + + auto chunk = in->pull(); + addChunk(std::move(chunk), input_num); + + if (has_two_level && !single_level_chunks.empty()) + return Status::Ready; + + if (!in->isFinished() && need_input(input_num)) + need_data = true; + } + + if (finished) + { + all_inputs_finished = true; + break; + } + + if (need_data) + return Status::NeedData; + } + + if (pushed_to_output) + return Status::PortFull; + + if (has_two_level) + { + if (tryPushTwoLevelData()) + return Status::PortFull; + + /// Sanity check. If new bucket was read, we should be able to push it. /// This is always false, but we still keep this condition in case the code will be changed. if (!all_inputs_finished) // -V547 - throw Exception("GroupingAggregatedTransform has read new two-level bucket, but couldn't push it.", - ErrorCodes::LOGICAL_ERROR); - } - else - { + throw Exception("GroupingAggregatedTransform has read new two-level bucket, but couldn't push it.", + ErrorCodes::LOGICAL_ERROR); + } + else + { if (!all_inputs_finished) // -V547 - throw Exception("GroupingAggregatedTransform should have read all chunks for single level aggregation, " - "but not all of the inputs are finished.", ErrorCodes::LOGICAL_ERROR); - - if (tryPushSingleLevelData()) - return Status::PortFull; - } - - /// If we haven't pushed to output, then all data was read. Push overflows if have. - if (tryPushOverflowData()) - return Status::PortFull; - - output.finish(); - return Status::Finished; -} - -void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) -{ - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception("Chunk info was not set for chunk in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); - - const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()); - if (!agg_info) - throw Exception("Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); - - Int32 bucket = agg_info->bucket_num; - bool is_overflows = agg_info->is_overflows; - - if (is_overflows) - overflow_chunks.emplace_back(std::move(chunk)); - else if (bucket < 0) - single_level_chunks.emplace_back(std::move(chunk)); - else - { - chunks_map[bucket].emplace_back(std::move(chunk)); - has_two_level = true; - last_bucket_number[input] = bucket; - } -} - -void GroupingAggregatedTransform::work() -{ - /// Convert single level data to two level. - if (!single_level_chunks.empty()) - { - const auto & header = getInputs().front().getHeader(); /// Take header from input port. Output header is empty. - auto block = header.cloneWithColumns(single_level_chunks.back().detachColumns()); - single_level_chunks.pop_back(); - auto blocks = params->aggregator.convertBlockToTwoLevel(block); - - for (auto & cur_block : blocks) - { - if (!cur_block) - continue; - - Int32 bucket = cur_block.info.bucket_num; - auto chunk_info = std::make_shared<AggregatedChunkInfo>(); - chunk_info->bucket_num = bucket; - chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); - } - } -} - - -MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params_) - : ISimpleTransform({}, params_->getHeader(), false), params(std::move(params_)) -{ - setInputNotNeededAfterRead(true); -} - -void MergingAggregatedBucketTransform::transform(Chunk & chunk) -{ - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get()); - - if (!chunks_to_merge) - throw Exception("MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.", - ErrorCodes::LOGICAL_ERROR); - - auto header = params->aggregator.getHeader(false); - - BlocksList blocks_list; - for (auto & cur_chunk : *chunks_to_merge->chunks) - { - const auto & cur_info = cur_chunk.getChunkInfo(); - if (!cur_info) - throw Exception("Chunk info was not set for chunk in MergingAggregatedBucketTransform.", - ErrorCodes::LOGICAL_ERROR); - - const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(cur_info.get()); - if (!agg_info) - throw Exception("Chunk should have AggregatedChunkInfo in MergingAggregatedBucketTransform.", - ErrorCodes::LOGICAL_ERROR); - - Block block = header.cloneWithColumns(cur_chunk.detachColumns()); - block.info.is_overflows = agg_info->is_overflows; - block.info.bucket_num = agg_info->bucket_num; - - blocks_list.emplace_back(std::move(block)); - } - - auto res_info = std::make_shared<AggregatedChunkInfo>(); - res_info->is_overflows = chunks_to_merge->is_overflows; - res_info->bucket_num = chunks_to_merge->bucket_num; - chunk.setChunkInfo(std::move(res_info)); - - auto block = params->aggregator.mergeBlocks(blocks_list, params->final); - size_t num_rows = block.rows(); - chunk.setColumns(block.getColumns(), num_rows); -} - - -SortingAggregatedTransform::SortingAggregatedTransform(size_t num_inputs_, AggregatingTransformParamsPtr params_) - : IProcessor(InputPorts(num_inputs_, params_->getHeader()), {params_->getHeader()}) - , num_inputs(num_inputs_) - , params(std::move(params_)) - , last_bucket_number(num_inputs, -1) - , is_input_finished(num_inputs, false) -{ -} - -bool SortingAggregatedTransform::tryPushChunk() -{ - auto & output = outputs.front(); - - if (chunks.empty()) - return false; - - /// Chunk with min current bucket. - auto it = chunks.begin(); - auto cur_bucket = it->first; - - /// Check that can push it - for (size_t input = 0; input < num_inputs; ++input) - if (!is_input_finished[input] && last_bucket_number[input] < cur_bucket) - return false; - - output.push(std::move(it->second)); - chunks.erase(it); - return true; -} - -void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) -{ - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception("Chunk info was not set for chunk in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); - - const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()); - if (!agg_info) - throw Exception("Chunk should have AggregatedChunkInfo in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); - - Int32 bucket = agg_info->bucket_num; - bool is_overflows = agg_info->is_overflows; - - if (is_overflows) - overflow_chunk = std::move(chunk); - else - { - if (chunks[bucket]) - throw Exception("SortingAggregatedTransform already got bucket with number " + toString(bucket), - ErrorCodes::LOGICAL_ERROR); - - chunks[bucket] = std::move(chunk); - last_bucket_number[from_input] = bucket; - } -} - -IProcessor::Status SortingAggregatedTransform::prepare() -{ - /// Check can output. - auto & output = outputs.front(); - - if (output.isFinished()) - { - for (auto & input : inputs) - input.close(); - - chunks.clear(); - last_bucket_number.clear(); - return Status::Finished; - } - - /// Check can push (to avoid data caching). - if (!output.canPush()) - { - for (auto & input : inputs) - input.setNotNeeded(); - - return Status::PortFull; - } - - /// Push if have min version. - bool pushed_to_output = tryPushChunk(); - - bool need_data = false; - bool all_finished = true; - - /// Try read anything. - auto in = inputs.begin(); - for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in) - { - if (in->isFinished()) - { - is_input_finished[input_num] = true; - continue; - } - - //all_finished = false; - - in->setNeeded(); - - if (!in->hasData()) - { - need_data = true; - all_finished = false; - continue; - } - - auto chunk = in->pull(); - addChunk(std::move(chunk), input_num); - - if (in->isFinished()) - { - is_input_finished[input_num] = true; - } - else - { - /// If chunk was pulled, then we need data from this port. - need_data = true; - all_finished = false; - } - } - - if (pushed_to_output) - return Status::PortFull; - - if (tryPushChunk()) - return Status::PortFull; - - if (need_data) - return Status::NeedData; - - if (!all_finished) - throw Exception("SortingAggregatedTransform has read bucket, but couldn't push it.", - ErrorCodes::LOGICAL_ERROR); - - if (overflow_chunk) - { - output.push(std::move(overflow_chunk)); - return Status::PortFull; - } - - output.finish(); - return Status::Finished; -} - - + throw Exception("GroupingAggregatedTransform should have read all chunks for single level aggregation, " + "but not all of the inputs are finished.", ErrorCodes::LOGICAL_ERROR); + + if (tryPushSingleLevelData()) + return Status::PortFull; + } + + /// If we haven't pushed to output, then all data was read. Push overflows if have. + if (tryPushOverflowData()) + return Status::PortFull; + + output.finish(); + return Status::Finished; +} + +void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) +{ + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception("Chunk info was not set for chunk in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + + const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()); + if (!agg_info) + throw Exception("Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + + Int32 bucket = agg_info->bucket_num; + bool is_overflows = agg_info->is_overflows; + + if (is_overflows) + overflow_chunks.emplace_back(std::move(chunk)); + else if (bucket < 0) + single_level_chunks.emplace_back(std::move(chunk)); + else + { + chunks_map[bucket].emplace_back(std::move(chunk)); + has_two_level = true; + last_bucket_number[input] = bucket; + } +} + +void GroupingAggregatedTransform::work() +{ + /// Convert single level data to two level. + if (!single_level_chunks.empty()) + { + const auto & header = getInputs().front().getHeader(); /// Take header from input port. Output header is empty. + auto block = header.cloneWithColumns(single_level_chunks.back().detachColumns()); + single_level_chunks.pop_back(); + auto blocks = params->aggregator.convertBlockToTwoLevel(block); + + for (auto & cur_block : blocks) + { + if (!cur_block) + continue; + + Int32 bucket = cur_block.info.bucket_num; + auto chunk_info = std::make_shared<AggregatedChunkInfo>(); + chunk_info->bucket_num = bucket; + chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); + } + } +} + + +MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params_) + : ISimpleTransform({}, params_->getHeader(), false), params(std::move(params_)) +{ + setInputNotNeededAfterRead(true); +} + +void MergingAggregatedBucketTransform::transform(Chunk & chunk) +{ + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get()); + + if (!chunks_to_merge) + throw Exception("MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.", + ErrorCodes::LOGICAL_ERROR); + + auto header = params->aggregator.getHeader(false); + + BlocksList blocks_list; + for (auto & cur_chunk : *chunks_to_merge->chunks) + { + const auto & cur_info = cur_chunk.getChunkInfo(); + if (!cur_info) + throw Exception("Chunk info was not set for chunk in MergingAggregatedBucketTransform.", + ErrorCodes::LOGICAL_ERROR); + + const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(cur_info.get()); + if (!agg_info) + throw Exception("Chunk should have AggregatedChunkInfo in MergingAggregatedBucketTransform.", + ErrorCodes::LOGICAL_ERROR); + + Block block = header.cloneWithColumns(cur_chunk.detachColumns()); + block.info.is_overflows = agg_info->is_overflows; + block.info.bucket_num = agg_info->bucket_num; + + blocks_list.emplace_back(std::move(block)); + } + + auto res_info = std::make_shared<AggregatedChunkInfo>(); + res_info->is_overflows = chunks_to_merge->is_overflows; + res_info->bucket_num = chunks_to_merge->bucket_num; + chunk.setChunkInfo(std::move(res_info)); + + auto block = params->aggregator.mergeBlocks(blocks_list, params->final); + size_t num_rows = block.rows(); + chunk.setColumns(block.getColumns(), num_rows); +} + + +SortingAggregatedTransform::SortingAggregatedTransform(size_t num_inputs_, AggregatingTransformParamsPtr params_) + : IProcessor(InputPorts(num_inputs_, params_->getHeader()), {params_->getHeader()}) + , num_inputs(num_inputs_) + , params(std::move(params_)) + , last_bucket_number(num_inputs, -1) + , is_input_finished(num_inputs, false) +{ +} + +bool SortingAggregatedTransform::tryPushChunk() +{ + auto & output = outputs.front(); + + if (chunks.empty()) + return false; + + /// Chunk with min current bucket. + auto it = chunks.begin(); + auto cur_bucket = it->first; + + /// Check that can push it + for (size_t input = 0; input < num_inputs; ++input) + if (!is_input_finished[input] && last_bucket_number[input] < cur_bucket) + return false; + + output.push(std::move(it->second)); + chunks.erase(it); + return true; +} + +void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) +{ + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception("Chunk info was not set for chunk in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + + const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()); + if (!agg_info) + throw Exception("Chunk should have AggregatedChunkInfo in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + + Int32 bucket = agg_info->bucket_num; + bool is_overflows = agg_info->is_overflows; + + if (is_overflows) + overflow_chunk = std::move(chunk); + else + { + if (chunks[bucket]) + throw Exception("SortingAggregatedTransform already got bucket with number " + toString(bucket), + ErrorCodes::LOGICAL_ERROR); + + chunks[bucket] = std::move(chunk); + last_bucket_number[from_input] = bucket; + } +} + +IProcessor::Status SortingAggregatedTransform::prepare() +{ + /// Check can output. + auto & output = outputs.front(); + + if (output.isFinished()) + { + for (auto & input : inputs) + input.close(); + + chunks.clear(); + last_bucket_number.clear(); + return Status::Finished; + } + + /// Check can push (to avoid data caching). + if (!output.canPush()) + { + for (auto & input : inputs) + input.setNotNeeded(); + + return Status::PortFull; + } + + /// Push if have min version. + bool pushed_to_output = tryPushChunk(); + + bool need_data = false; + bool all_finished = true; + + /// Try read anything. + auto in = inputs.begin(); + for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in) + { + if (in->isFinished()) + { + is_input_finished[input_num] = true; + continue; + } + + //all_finished = false; + + in->setNeeded(); + + if (!in->hasData()) + { + need_data = true; + all_finished = false; + continue; + } + + auto chunk = in->pull(); + addChunk(std::move(chunk), input_num); + + if (in->isFinished()) + { + is_input_finished[input_num] = true; + } + else + { + /// If chunk was pulled, then we need data from this port. + need_data = true; + all_finished = false; + } + } + + if (pushed_to_output) + return Status::PortFull; + + if (tryPushChunk()) + return Status::PortFull; + + if (need_data) + return Status::NeedData; + + if (!all_finished) + throw Exception("SortingAggregatedTransform has read bucket, but couldn't push it.", + ErrorCodes::LOGICAL_ERROR); + + if (overflow_chunk) + { + output.push(std::move(overflow_chunk)); + return Status::PortFull; + } + + output.finish(); + return Status::Finished; +} + + void addMergingAggregatedMemoryEfficientTransform( Pipe & pipe, AggregatingTransformParamsPtr params, size_t num_merging_processors) -{ +{ pipe.addTransform(std::make_shared<GroupingAggregatedTransform>(pipe.getHeader(), pipe.numOutputPorts(), params)); - - if (num_merging_processors <= 1) - { - /// --> GroupingAggregated --> MergingAggregatedBucket --> + + if (num_merging_processors <= 1) + { + /// --> GroupingAggregated --> MergingAggregatedBucket --> pipe.addTransform(std::make_shared<MergingAggregatedBucketTransform>(params)); return; - } - - /// --> --> MergingAggregatedBucket --> - /// --> GroupingAggregated --> ResizeProcessor --> MergingAggregatedBucket --> SortingAggregated --> - /// --> --> MergingAggregatedBucket --> - + } + + /// --> --> MergingAggregatedBucket --> + /// --> GroupingAggregated --> ResizeProcessor --> MergingAggregatedBucket --> SortingAggregated --> + /// --> --> MergingAggregatedBucket --> + pipe.resize(num_merging_processors); - + pipe.addSimpleTransform([params](const Block &) - { + { return std::make_shared<MergingAggregatedBucketTransform>(params); }); - + pipe.addTransform(std::make_shared<SortingAggregatedTransform>(num_merging_processors, params)); -} - -} +} + +} diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index cf2fc92347..4367f6fec3 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -1,149 +1,149 @@ #pragma once -#include <Processors/IProcessor.h> -#include <Interpreters/Aggregator.h> -#include <Processors/ISimpleTransform.h> -#include <Processors/Transforms/AggregatingTransform.h> -#include <Processors/ResizeProcessor.h> - - -namespace DB -{ - -/** Pre-aggregates data from ports, holding in RAM only one or more (up to merging_threads) blocks from each source. - * This saves RAM in case of using two-level aggregation, where in each source there will be up to 256 blocks with parts of the result. - * - * Aggregate functions in blocks should not be finalized so that their states can be combined. - * - * Used to solve two tasks: - * - * 1. External aggregation with data flush to disk. - * Partially aggregated data (previously divided into 256 buckets) is flushed to some number of files on the disk. - * We need to read them and merge them by buckets - keeping only a few buckets from each file in RAM simultaneously. - * - * 2. Merge aggregation results for distributed query processing. +#include <Processors/IProcessor.h> +#include <Interpreters/Aggregator.h> +#include <Processors/ISimpleTransform.h> +#include <Processors/Transforms/AggregatingTransform.h> +#include <Processors/ResizeProcessor.h> + + +namespace DB +{ + +/** Pre-aggregates data from ports, holding in RAM only one or more (up to merging_threads) blocks from each source. + * This saves RAM in case of using two-level aggregation, where in each source there will be up to 256 blocks with parts of the result. + * + * Aggregate functions in blocks should not be finalized so that their states can be combined. + * + * Used to solve two tasks: + * + * 1. External aggregation with data flush to disk. + * Partially aggregated data (previously divided into 256 buckets) is flushed to some number of files on the disk. + * We need to read them and merge them by buckets - keeping only a few buckets from each file in RAM simultaneously. + * + * 2. Merge aggregation results for distributed query processing. * Partially aggregated data arrives from different servers, which can be split down or not, into 256 buckets, - * and these buckets are passed to us by the network from each server in sequence, one by one. - * You should also read and merge by the buckets. - * - * The essence of the work: - * - * There are a number of sources. They give out blocks with partially aggregated data. - * Each source can return one of the following block sequences: - * 1. "unsplitted" block with bucket_num = -1; + * and these buckets are passed to us by the network from each server in sequence, one by one. + * You should also read and merge by the buckets. + * + * The essence of the work: + * + * There are a number of sources. They give out blocks with partially aggregated data. + * Each source can return one of the following block sequences: + * 1. "unsplitted" block with bucket_num = -1; * 2. "split" (two_level) blocks with bucket_num from 0 to 255; - * In both cases, there may also be a block of "overflows" with bucket_num = -1 and is_overflows = true; - * + * In both cases, there may also be a block of "overflows" with bucket_num = -1 and is_overflows = true; + * * We start from the convention that split blocks are always passed in the order of bucket_num. - * That is, if a < b, then the bucket_num = a block goes before bucket_num = b. - * This is needed for a memory-efficient merge - * - so that you do not need to read the blocks up front, but go all the way up by bucket_num. - * - * In this case, not all bucket_num from the range of 0..255 can be present. - * The overflow block can be presented in any order relative to other blocks (but it can be only one). - * - * It is necessary to combine these sequences of blocks and return the result as a sequence with the same properties. + * That is, if a < b, then the bucket_num = a block goes before bucket_num = b. + * This is needed for a memory-efficient merge + * - so that you do not need to read the blocks up front, but go all the way up by bucket_num. + * + * In this case, not all bucket_num from the range of 0..255 can be present. + * The overflow block can be presented in any order relative to other blocks (but it can be only one). + * + * It is necessary to combine these sequences of blocks and return the result as a sequence with the same properties. * That is, at the output, if there are "split" blocks in the sequence, then they should go in the order of bucket_num. - * - * The merge can be performed using several (merging_threads) threads. - * For this, receiving of a set of blocks for the next bucket_num should be done sequentially, - * and then, when we have several received sets, they can be merged in parallel. - * - * When you receive next blocks from different sources, - * data from sources can also be read in several threads (reading_threads) - * for optimal performance in the presence of a fast network or disks (from where these blocks are read). - */ - -/// Has several inputs and single output. -/// Read from inputs chunks with partially aggregated data, group them by bucket number -/// and write data from single bucket as single chunk. -class GroupingAggregatedTransform : public IProcessor -{ -public: - GroupingAggregatedTransform(const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_); - String getName() const override { return "GroupingAggregatedTransform"; } - - /// Special setting: in case if single source can return several chunks with same bucket. - void allowSeveralChunksForSingleBucketPerSource() { expect_several_chunks_for_single_bucket_per_source = true; } - -protected: - Status prepare() override; - void work() override; - -private: - size_t num_inputs; - AggregatingTransformParamsPtr params; - - std::vector<Int32> last_bucket_number; /// Last bucket read from each input. - std::map<Int32, Chunks> chunks_map; /// bucket -> chunks - Chunks overflow_chunks; - Chunks single_level_chunks; - Int32 current_bucket = 0; /// Currently processing bucket. - Int32 next_bucket_to_push = 0; /// Always <= current_bucket. - bool has_two_level = false; - - bool all_inputs_finished = false; - bool read_from_all_inputs = false; - std::vector<bool> read_from_input; - - bool expect_several_chunks_for_single_bucket_per_source = false; - - /// Add chunk read from input to chunks_map, overflow_chunks or single_level_chunks according to it's chunk info. - void addChunk(Chunk chunk, size_t input); - /// Read from all inputs first chunk. It is needed to detect if any source has two-level aggregation. - void readFromAllInputs(); - /// Push chunks if all inputs has single level. - bool tryPushSingleLevelData(); - /// Push chunks from ready bucket if has one. - bool tryPushTwoLevelData(); - /// Push overflow chunks if has any. - bool tryPushOverflowData(); - /// Push chunks from bucket to output port. - void pushData(Chunks chunks, Int32 bucket, bool is_overflows); -}; - -/// Merge aggregated data from single bucket. -class MergingAggregatedBucketTransform : public ISimpleTransform -{ -public: - explicit MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params); - String getName() const override { return "MergingAggregatedBucketTransform"; } - -protected: - void transform(Chunk & chunk) override; - -private: - AggregatingTransformParamsPtr params; -}; - -/// Has several inputs and single output. -/// Read from inputs merged bucket with aggregated data, sort them by bucket number and write to output. -/// Presumption: inputs return chunks with increasing bucket number, there is at most one chunk per bucket. -class SortingAggregatedTransform : public IProcessor -{ -public: - SortingAggregatedTransform(size_t num_inputs, AggregatingTransformParamsPtr params); - String getName() const override { return "SortingAggregatedTransform"; } - Status prepare() override; - -private: - size_t num_inputs; - AggregatingTransformParamsPtr params; - std::vector<Int32> last_bucket_number; - std::vector<bool> is_input_finished; - std::map<Int32, Chunk> chunks; - Chunk overflow_chunk; - - bool tryPushChunk(); - void addChunk(Chunk chunk, size_t from_input); -}; - + * + * The merge can be performed using several (merging_threads) threads. + * For this, receiving of a set of blocks for the next bucket_num should be done sequentially, + * and then, when we have several received sets, they can be merged in parallel. + * + * When you receive next blocks from different sources, + * data from sources can also be read in several threads (reading_threads) + * for optimal performance in the presence of a fast network or disks (from where these blocks are read). + */ + +/// Has several inputs and single output. +/// Read from inputs chunks with partially aggregated data, group them by bucket number +/// and write data from single bucket as single chunk. +class GroupingAggregatedTransform : public IProcessor +{ +public: + GroupingAggregatedTransform(const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_); + String getName() const override { return "GroupingAggregatedTransform"; } + + /// Special setting: in case if single source can return several chunks with same bucket. + void allowSeveralChunksForSingleBucketPerSource() { expect_several_chunks_for_single_bucket_per_source = true; } + +protected: + Status prepare() override; + void work() override; + +private: + size_t num_inputs; + AggregatingTransformParamsPtr params; + + std::vector<Int32> last_bucket_number; /// Last bucket read from each input. + std::map<Int32, Chunks> chunks_map; /// bucket -> chunks + Chunks overflow_chunks; + Chunks single_level_chunks; + Int32 current_bucket = 0; /// Currently processing bucket. + Int32 next_bucket_to_push = 0; /// Always <= current_bucket. + bool has_two_level = false; + + bool all_inputs_finished = false; + bool read_from_all_inputs = false; + std::vector<bool> read_from_input; + + bool expect_several_chunks_for_single_bucket_per_source = false; + + /// Add chunk read from input to chunks_map, overflow_chunks or single_level_chunks according to it's chunk info. + void addChunk(Chunk chunk, size_t input); + /// Read from all inputs first chunk. It is needed to detect if any source has two-level aggregation. + void readFromAllInputs(); + /// Push chunks if all inputs has single level. + bool tryPushSingleLevelData(); + /// Push chunks from ready bucket if has one. + bool tryPushTwoLevelData(); + /// Push overflow chunks if has any. + bool tryPushOverflowData(); + /// Push chunks from bucket to output port. + void pushData(Chunks chunks, Int32 bucket, bool is_overflows); +}; + +/// Merge aggregated data from single bucket. +class MergingAggregatedBucketTransform : public ISimpleTransform +{ +public: + explicit MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params); + String getName() const override { return "MergingAggregatedBucketTransform"; } + +protected: + void transform(Chunk & chunk) override; + +private: + AggregatingTransformParamsPtr params; +}; + +/// Has several inputs and single output. +/// Read from inputs merged bucket with aggregated data, sort them by bucket number and write to output. +/// Presumption: inputs return chunks with increasing bucket number, there is at most one chunk per bucket. +class SortingAggregatedTransform : public IProcessor +{ +public: + SortingAggregatedTransform(size_t num_inputs, AggregatingTransformParamsPtr params); + String getName() const override { return "SortingAggregatedTransform"; } + Status prepare() override; + +private: + size_t num_inputs; + AggregatingTransformParamsPtr params; + std::vector<Int32> last_bucket_number; + std::vector<bool> is_input_finished; + std::map<Int32, Chunk> chunks; + Chunk overflow_chunk; + + bool tryPushChunk(); + void addChunk(Chunk chunk, size_t from_input); +}; + class Pipe; /// Adds processors to pipe which performs memory efficient merging of partially aggregated data from several sources. void addMergingAggregatedMemoryEfficientTransform( Pipe & pipe, - AggregatingTransformParamsPtr params, - size_t num_merging_processors); - -} - + AggregatingTransformParamsPtr params, + size_t num_merging_processors); + +} + diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h index 8abbc413c4..b08818a2ba 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h @@ -1,20 +1,20 @@ -#pragma once - -#include <Interpreters/PreparedSets.h> -#include <Interpreters/DatabaseAndTableWithAlias.h> -#include <Core/SortDescription.h> -#include <Core/Names.h> +#pragma once + +#include <Interpreters/PreparedSets.h> +#include <Interpreters/DatabaseAndTableWithAlias.h> +#include <Core/SortDescription.h> +#include <Core/Names.h> #include <Storages/ProjectionsDescription.h> #include <Interpreters/AggregateDescription.h> -#include <memory> - -namespace DB -{ - -class ExpressionActions; -using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; - +#include <memory> + +namespace DB +{ + +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; + class ActionsDAG; using ActionsDAGPtr = std::shared_ptr<ActionsDAG>; @@ -42,14 +42,14 @@ using ClusterPtr = std::shared_ptr<Cluster>; struct MergeTreeDataSelectAnalysisResult; using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr<MergeTreeDataSelectAnalysisResult>; -struct PrewhereInfo -{ - /// Actions which are executed in order to alias columns are used for prewhere actions. +struct PrewhereInfo +{ + /// Actions which are executed in order to alias columns are used for prewhere actions. ActionsDAGPtr alias_actions; /// Actions for row level security filter. Applied separately before prewhere_actions. /// This actions are separate because prewhere condition should not be executed over filtered rows. ActionsDAGPtr row_level_filter; - /// Actions which are executed on block in order to get filter column for prewhere step. + /// Actions which are executed on block in order to get filter column for prewhere step. ActionsDAGPtr prewhere_actions; String row_level_column_name; String prewhere_column_name; @@ -63,13 +63,13 @@ struct PrewhereInfo std::string dump() const; }; -/// Helper struct to store all the information about the filter expression. -struct FilterInfo -{ +/// Helper struct to store all the information about the filter expression. +struct FilterInfo +{ ExpressionActionsPtr alias_actions; ExpressionActionsPtr actions; - String column_name; - bool do_remove_column = false; + String column_name; + bool do_remove_column = false; }; /// Same as FilterInfo, but with ActionsDAG. @@ -80,25 +80,25 @@ struct FilterDAGInfo bool do_remove_column = false; std::string dump() const; -}; - -struct InputOrderInfo -{ - SortDescription order_key_prefix_descr; - int direction; +}; + +struct InputOrderInfo +{ + SortDescription order_key_prefix_descr; + int direction; UInt64 limit; - + InputOrderInfo(const SortDescription & order_key_prefix_descr_, int direction_, UInt64 limit_) : order_key_prefix_descr(order_key_prefix_descr_), direction(direction_), limit(limit_) {} - - bool operator ==(const InputOrderInfo & other) const - { - return order_key_prefix_descr == other.order_key_prefix_descr && direction == other.direction; - } - - bool operator !=(const InputOrderInfo & other) const { return !(*this == other); } -}; - + + bool operator ==(const InputOrderInfo & other) const + { + return order_key_prefix_descr == other.order_key_prefix_descr && direction == other.direction; + } + + bool operator !=(const InputOrderInfo & other) const { return !(*this == other); } +}; + class IMergeTreeDataPart; using ManyExpressionActions = std::vector<ExpressionActionsPtr>; @@ -125,15 +125,15 @@ struct ProjectionCandidate MergeTreeDataSelectAnalysisResultPtr merge_tree_normal_select_result_ptr; }; -/** Query along with some additional data, - * that can be used during query processing - * inside storage engines. - */ -struct SelectQueryInfo -{ - ASTPtr query; - ASTPtr view_query; /// Optimized VIEW query - +/** Query along with some additional data, + * that can be used during query processing + * inside storage engines. + */ +struct SelectQueryInfo +{ + ASTPtr query; + ASTPtr view_query; /// Optimized VIEW query + /// Cluster for the query. ClusterPtr cluster; /// Optimized cluster for the query. @@ -143,16 +143,16 @@ struct SelectQueryInfo ClusterPtr optimized_cluster; TreeRewriterResultPtr syntax_analyzer_result; - - PrewhereInfoPtr prewhere_info; - - ReadInOrderOptimizerPtr order_optimizer; + + PrewhereInfoPtr prewhere_info; + + ReadInOrderOptimizerPtr order_optimizer; /// Can be modified while reading from storage InputOrderInfoPtr input_order_info; - - /// Prepared sets are used for indices by storage engine. - /// Example: x IN (1, 2, 3) - PreparedSets sets; + + /// Prepared sets are used for indices by storage engine. + /// Example: x IN (1, 2, 3) + PreparedSets sets; /// Cached value of ExpressionAnalysisResult::has_window bool has_window = false; @@ -166,6 +166,6 @@ struct SelectQueryInfo bool merge_tree_empty_result = false; Block minmax_count_projection_block; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; -}; - -} +}; + +} |