aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorarcadia-devtools <arcadia-devtools@yandex-team.ru>2022-02-10 16:48:02 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:48:02 +0300
commit659131349be7796c633c453b4d8b4fa20b8c6ee9 (patch)
tree67ae2197ea6054b7c46901db060a27fa94377631
parenta8b9b8cf5b7405ae170a24f8e1fc27efd8b0849b (diff)
downloadydb-659131349be7796c633c453b4d8b4fa20b8c6ee9.tar.gz
Restoring authorship annotation for <arcadia-devtools@yandex-team.ru>. Commit 1 of 2.
-rw-r--r--build/external_resources/ymake/ya.make.inc12
-rw-r--r--build/platform/test_tool/host.ya.make.inc30
-rw-r--r--build/plugins/java.py12
-rw-r--r--build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource6
-rw-r--r--build/prebuilt/contrib/tools/protoc_std/ya.make.resource6
-rw-r--r--build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource6
-rw-r--r--build/rules/flake8/migrations.yaml2428
-rw-r--r--build/ya.conf.json2
-rw-r--r--build/ymake.core.conf2
-rw-r--r--contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h52
-rw-r--r--contrib/libs/libc_compat/readpassphrase.c360
-rw-r--r--library/cpp/lfalloc/lf_allocX64.h2
-rw-r--r--library/cpp/threading/local_executor/tbb_local_executor.cpp106
-rw-r--r--library/cpp/threading/local_executor/tbb_local_executor.h98
-rw-r--r--library/cpp/threading/local_executor/ya.make2
-rw-r--r--util/charset/ya.make2
-rw-r--r--util/datetime/cputimer.cpp2
-rw-r--r--util/system/info.cpp6
-rw-r--r--util/thread/lfqueue.h10
-rwxr-xr-xya.bat2
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h32
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp50
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h18
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp4
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp20
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp180
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h70
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp10
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h12
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h1078
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h678
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h148
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp140
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h56
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp632
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h144
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h210
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h846
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h374
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h824
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h132
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h658
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h64
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h470
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp50
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h22
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h44
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp44
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h30
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp222
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h68
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h624
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp396
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h88
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h142
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp636
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h58
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp1110
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h474
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h74
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp322
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h786
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h74
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp90
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h22
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp314
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h424
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp58
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h36
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp14
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h8
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp86
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h26
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp60
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h62
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp94
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h44
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp60
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h12
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp140
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h62
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp172
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp68
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h54
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp286
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp624
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h344
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h558
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h274
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h62
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp112
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h126
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp472
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h288
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp76
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp44
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp230
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h80
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp356
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h106
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h72
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp560
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h204
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp204
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h74
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp64
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h434
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp76
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h124
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h50
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp68
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp26
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp150
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h80
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp138
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h50
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp68
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h54
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp190
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h116
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp22
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h44
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp248
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h98
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp58
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h46
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp152
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h68
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h500
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h2402
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp266
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp1046
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h454
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h482
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h74
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp138
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h62
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h138
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp56
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h20
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp222
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h150
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp108
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h116
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h154
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp164
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h54
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp230
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h102
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp736
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h164
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h56
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp690
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h376
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h106
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp182
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp146
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h98
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h158
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp54
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h88
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp54
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h98
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp88
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h54
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp170
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h48
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp86
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h62
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp188
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h96
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h34
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp72
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h42
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h34
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h34
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp100
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h84
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp318
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h154
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h62
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp148
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h106
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h92
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp730
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp1176
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h118
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp108
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h34
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp88
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h40
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp204
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h50
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp108
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h32
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp378
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h70
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp112
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h38
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp144
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h38
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp190
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h34
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp166
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h42
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp250
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h54
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp164
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h36
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp446
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h84
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h64
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp78
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h34
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp42
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h36
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h64
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp272
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h42
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp258
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h32
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp172
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h36
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp60
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h36
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp154
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h52
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp132
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h36
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h140
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h198
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp86
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp228
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h26
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp92
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h72
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp330
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp128
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h62
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp876
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h102
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp418
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h108
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp756
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h100
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp360
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h92
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp168
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h84
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp88
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp220
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h122
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp68
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h56
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp632
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h132
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp176
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp52
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp842
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h268
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h36
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp400
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h144
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h42
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp222
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h150
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp1156
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h238
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp246
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h60
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp1016
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h278
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h118
273 files changed, 27240 insertions, 27240 deletions
diff --git a/build/external_resources/ymake/ya.make.inc b/build/external_resources/ymake/ya.make.inc
index b88f36c4db..9c73abab34 100644
--- a/build/external_resources/ymake/ya.make.inc
+++ b/build/external_resources/ymake/ya.make.inc
@@ -1,13 +1,13 @@
IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64 OR HOST_OS_DARWIN AND HOST_ARCH_ARM64 OR HOST_OS_LINUX AND HOST_ARCH_PPC64LE OR HOST_OS_LINUX AND HOST_ARCH_X86_64 OR HOST_OS_WINDOWS AND HOST_ARCH_X86_64)
-ELSE()
+ELSE()
MESSAGE(FATAL_ERROR Unsupported host platform for YMAKE)
ENDIF()
DECLARE_EXTERNAL_HOST_RESOURCES_BUNDLE(
YMAKE
- sbr:2763560807 FOR DARWIN
- sbr:2763561138 FOR DARWIN-ARM64
- sbr:2763560653 FOR LINUX-PPC64LE
- sbr:2763560979 FOR LINUX
- sbr:2763560492 FOR WIN32
+ sbr:2763560807 FOR DARWIN
+ sbr:2763561138 FOR DARWIN-ARM64
+ sbr:2763560653 FOR LINUX-PPC64LE
+ sbr:2763560979 FOR LINUX
+ sbr:2763560492 FOR WIN32
)
diff --git a/build/platform/test_tool/host.ya.make.inc b/build/platform/test_tool/host.ya.make.inc
index c25f2b1326..300a4df446 100644
--- a/build/platform/test_tool/host.ya.make.inc
+++ b/build/platform/test_tool/host.ya.make.inc
@@ -1,16 +1,16 @@
-IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984950)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990673)
-ELSEIF (HOST_OS_DARWIN AND HOST_ARCH_ARM64)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984688)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990185)
-ELSEIF (HOST_OS_LINUX AND HOST_ARCH_PPC64LE)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984559)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990014)
-ELSEIF (HOST_OS_LINUX AND HOST_ARCH_X86_64)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764985330)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990852)
-ELSEIF (HOST_OS_WINDOWS AND HOST_ARCH_X86_64)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984404)
- DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764989842)
+IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984950)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990673)
+ELSEIF (HOST_OS_DARWIN AND HOST_ARCH_ARM64)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984688)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990185)
+ELSEIF (HOST_OS_LINUX AND HOST_ARCH_PPC64LE)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984559)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990014)
+ELSEIF (HOST_OS_LINUX AND HOST_ARCH_X86_64)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764985330)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764990852)
+ELSEIF (HOST_OS_WINDOWS AND HOST_ARCH_X86_64)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:2764984404)
+ DECLARE_EXTERNAL_RESOURCE(TEST_TOOL3_HOST sbr:2764989842)
ENDIF()
diff --git a/build/plugins/java.py b/build/plugins/java.py
index 16fc126734..da751537d1 100644
--- a/build/plugins/java.py
+++ b/build/plugins/java.py
@@ -40,18 +40,18 @@ def on_run_jbuild_program(unit, *args):
flat, kv = common.sort_by_keywords({'IN': -1, 'IN_DIR': -1, 'OUT': -1, 'OUT_DIR': -1, 'CWD': 1, 'CLASSPATH': -1, 'CP_USE_COMMAND_FILE': 1, 'ADD_SRCS_TO_CLASSPATH': 0}, args)
depends = kv.get('CLASSPATH', []) + kv.get('JAR', [])
- fake_out = None
+ fake_out = None
if depends:
# XXX: hack to force ymake to build dependencies
- fake_out = "fake.out.{}".format(hash(tuple(args)))
- unit.on_run_java(['TOOL'] + depends + ["OUT", fake_out])
+ fake_out = "fake.out.{}".format(hash(tuple(args)))
+ unit.on_run_java(['TOOL'] + depends + ["OUT", fake_out])
if not kv.get('CP_USE_COMMAND_FILE'):
args += ['CP_USE_COMMAND_FILE', unit.get(['JAVA_PROGRAM_CP_USE_COMMAND_FILE']) or 'yes']
- if fake_out is not None:
- args += ['FAKE_OUT', fake_out]
-
+ if fake_out is not None:
+ args += ['FAKE_OUT', fake_out]
+
prev = unit.get(['RUN_JAVA_PROGRAM_VALUE']) or ''
new_val = (prev + ' ' + base64.b64encode(json.dumps(list(args), encoding='utf-8'))).strip()
unit.set(['RUN_JAVA_PROGRAM_VALUE', new_val])
diff --git a/build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource b/build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource
index 8edf2e5d4a..9ae9c68baa 100644
--- a/build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource
+++ b/build/prebuilt/contrib/python/mypy-protobuf/bin/protoc-gen-mypy/ya.make.resource
@@ -1,9 +1,9 @@
IF (OS_DARWIN AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 2433625017)
+ SET(SANDBOX_RESOURCE_ID 2433625017)
ELSEIF (OS_LINUX AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 2433625425)
+ SET(SANDBOX_RESOURCE_ID 2433625425)
ELSEIF (OS_WINDOWS AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 2433624379)
+ SET(SANDBOX_RESOURCE_ID 2433624379)
ELSE()
SET(SANDBOX_RESOURCE_ID)
ENDIF()
diff --git a/build/prebuilt/contrib/tools/protoc_std/ya.make.resource b/build/prebuilt/contrib/tools/protoc_std/ya.make.resource
index 738c7da7ec..650f87a0cc 100644
--- a/build/prebuilt/contrib/tools/protoc_std/ya.make.resource
+++ b/build/prebuilt/contrib/tools/protoc_std/ya.make.resource
@@ -1,9 +1,9 @@
IF (OS_DARWIN AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 1714771857)
+ SET(SANDBOX_RESOURCE_ID 1714771857)
ELSEIF (OS_LINUX AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 1714772118)
+ SET(SANDBOX_RESOURCE_ID 1714772118)
ELSEIF (OS_WINDOWS AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 1714771351)
+ SET(SANDBOX_RESOURCE_ID 1714771351)
ELSE()
SET(SANDBOX_RESOURCE_ID)
ENDIF()
diff --git a/build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource b/build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource
index 05fcd0cbd5..34148b1537 100644
--- a/build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource
+++ b/build/prebuilt/vendor/github.com/golang/protobuf/protoc-gen-go/ya.make.resource
@@ -1,9 +1,9 @@
IF (OS_DARWIN AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 2297961019)
+ SET(SANDBOX_RESOURCE_ID 2297961019)
ELSEIF (OS_LINUX AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 2297961241)
+ SET(SANDBOX_RESOURCE_ID 2297961241)
ELSEIF (OS_WINDOWS AND ARCH_X86_64)
- SET(SANDBOX_RESOURCE_ID 2297960716)
+ SET(SANDBOX_RESOURCE_ID 2297960716)
ELSE()
SET(SANDBOX_RESOURCE_ID)
ENDIF()
diff --git a/build/rules/flake8/migrations.yaml b/build/rules/flake8/migrations.yaml
index 6e54bf2e62..1fbca469ad 100644
--- a/build/rules/flake8/migrations.yaml
+++ b/build/rules/flake8/migrations.yaml
@@ -1,464 +1,464 @@
migrations:
- W605:
- ignore:
- - W605
- prefixes:
- - addappter/backend/testing
- - addappter/backend/testing/fixtures/configuration/ios
- - adfox/amacs/tests/functional/tests_amacs/bugs/medium
- - adfox/amacs/tests/functional/tests_amacs/dynamic_monetization/v2
- - adfox/amacs/tests/functional/tests_amacs/targeting/targeting_logic_tree_puids
- - adfox/amacs/tests/functional/utils
- - adfox/amacs/tests/functional/utils/db
- - adfox/amacs/tests/functional/utils/tools
- - ads/bsyeti/servants/bot
- - ads/libs/py_autobudget
- - ads/libs/py_bid_correction
- - ads/libs/py_cliutils
- - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/mapreducelib
- - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs
- - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabtools
- - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabutils
- - ads/libs/py_ml_factors/factor
- - ads/libs/py_ml_factors/matrixnet
- - ads/libs/py_test_mapreduce
- - ads/ml_engine/learn/result/flow_generate_extended_fstr
- - ads/ml_engine/learn/result/local_extended_fstr
- - ads/ml_engine/learn/result/local_extended_fstr/lib
- - ads/ml_engine/lib
- - ads/ml_monitoring/alerts/ut
- - ads/nirvana/automl/lib/batch_processor
- - ads/nirvana/difacto/online_loss_processor
- - ads/nirvana/graph_retrier/lib
- - ads/nirvana/online_learning/move_dmlc_dumps/lib
- - ads/nirvana/online_learning/pipeline_launcher/task_utils/lib
- - ads/nirvana/sequential_learning
- - ads/nirvana/tools/apc_check
- - ads/quality/apc/gmg/generate_workflow
- - ads/quality/apc/gmg/make_lm_pool
- - ads/quality/apc/gmg/make_lm_pool/lib
- - ads/quality/apc/prgmgv3/workflow_constructor/lib
- - ads/quality/bid_correction/lib
- - ads/quality/dssm/lib
- - ads/quality/dssm/prgmg/make_pool
- - ads/quality/dssm/search/201708/make_pool
- - ads/quality/dssm/synonyms/search/tools/join_fields
- - ads/quality/max_positions
- - ads/quality/search_lm_conv/lib
- - ads/quality/tools/adj_calc
- - ads/sandbox_scripts/bmcategory_queryage_coeffs/lib
- - ads/sandbox_scripts/build_tag_rules_table
- - ads/sandbox_scripts/clean_mapreduce/ut
- - ads/sandbox_scripts/join_yabar
- - ads/tools/mranalyze
- - ads/tools/yt_operations_analyzer
- - ads/watchman/contrib/apispec-patched
- - ads/watchman/contrib/apispec-patched/apispec
- - ads/watchman/experiments/lib
- - advq/offline_phits/monitoring
- - alice/boltalka/generative/tfnn/preprocess
- - alice/boltalka/generative/training/data/nn/filtered_twitter
- - alice/boltalka/generative/training/data/nn/util
- - alice/boltalka/generative/training/data/nn/util/dict
- - alice/boltalka/tools/dssm_preprocessing/preprocessing/lib
- - alice/nlu/py_libs/tokenizer/ut/py2
- - alice/nlu/py_libs/tokenizer/ut/py3
- - alice/nlu/tools/paraphrase_finder
- - alice/paskills/nirvana_inflector
- - alice/paskills/recipe_utils/lib
- - alice/tests/difftest/request_miner
- - antirobot/tools/daily_routine/lib
- - april/web/bas/bm
- - april/web/bas/bm/utils
- - april/web/bas/collector
- - aurora/aurora/core
- - aurora/aurora/core/services
- - aurora/aurora/core/toloka
- - aurora/xpath/api/utils
- - aurora/xpath/applier
- - balancer/test/functional/admin/admin
- - balancer/test/functional/regexp_host
- - balancer/test/util
- - balancer/test/util/dnsfake
- - billing/apikeys/apikeys
- - billing/apikeys/apikeys/butils_port
- - billing/apikeys/apikeys/butils_port/application
- - billing/dcs/dcs
- - billing/dcs/dcs/temporary/butils
- - billing/dcs/dcs/temporary/butils/application
- - cloud/bootstrap/db/src/admin
- - cloud/iam/codegen/python/codegen
- - cloud/netinfra/rknfilter/yc_rkn_s3tools
- - commerce/adv_backend
- - commerce/adv_backend/backend/management/commands/migrator
- - commerce/adv_backend/backend/validators
- - contrib/nginx/tests/tap
- - cv/imageproc/ocr/tools/nirvana/blocks_dataset/extract_pdf_boxes
- - cv/imageproc/ocr/tools/nirvana/blocks_dataset/g_blocks
- - cv/imageproc/ocr/tools/nirvana/blocks_dataset/rotate_pdf
- - cv/imageproc/ocr/tools/nirvana/confidence_factors/format_utils
- - cv/imageproc/ocr/tools/nirvana/recaptcha/generate_captcha_images
- - cv/imageproc/ocr/tools/statistic/auto_tests/ocr_test_lib
- - cv/imageproc/ocr/tools/statistic/ocr_intent_statistic
- - cv/imageproc/ocr/tools/statistic/rectify_evaluation/document_edges_statistic
- - cv/imageproc/ocr/tools/statistic/rectify_evaluation/ocr_statistic
- - cv/imageproc/ocr/tools/statistic/rectify_evaluation/yt_runner
- - cv/imageproc/ocr/tools/toloka_labeling/add_main_areas_to_labelling
- - cv/imageproc/ocr/tools/toloka_labeling/add_ocr_line_recognition
- - cv/imageproc/ocr/tools/toloka_labeling/convert
- - cv/imageproc/ocr/tools/toloka_labeling/convert_toloka_labeling_to_asessors_labeling
- - cv/imageproc/ocr/tools/toloka_labeling/get_labeling
- - cv/imageproc/ocr/tools/toloka_labeling/labeling
- - cv/imageproc/ocr/tools/toloka_labeling/labeling_assessors
- - cv/imageproc/ocr/tools/toloka_labeling/labeling_captcha
- - cv/imageproc/ocr/tools/toloka_labeling/paint_good
- - cv/imageproc/ocr/tools/toloka_labeling/studier/generate_blocks_images
- - cv/imageproc/ocr/tools/toloka_labeling/studier/get_queries_info
- - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/convert_box_labeling
- - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/extact_ocr_with_gt_blocks
- - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_bleu_statistic
- - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_etalon_ocr_result
- - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_translate_orig_from_ocr_labelling
- - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/match_blocks
- - cv/imgclassifiers/framework/gpu_utilization
- - cv/imgclassifiers/framework/nirvana/runners/train/statistics_parser
- - datacloud/score_api/validators
- - devtools/adept
- - devtools/distbuild/deploy/servants
- - devtools/dummy_arcadia/test/test_cache_invalidation
- - devtools/import_contrib/projects/phonenumber
- - devtools/qafw/qyp
- - devtools/qafw/rtc_deploy/lib
- - devtools/qafw/selenium/lib
- - devtools/signer/signer/utils
- - devtools/unitybot
- - devtools/unitybot/responsibility
- - devtools/ya/test
- - devtools/ya/test/tests/lib
- - dict/moria/nirvana/context/begemot_thesaurus_rule
- - dict/moria/nirvana/lib/util
- - dict/mt/alice/scenarios/prepare_data
- - dict/mt/analytics/metrics/quality/mt/stupids/lib
- - dict/mt/analytics/sentence_breaking/toloka_binary/lib
- - dict/mt/eval/eval_viewer/lib/spec/mt
- - dict/mt/eval/lib/metrics/bleu
- - dict/mt/g2p/rule_based
- - dict/mt/make/libs/common
- - dict/mt/make/libs/eval
- - dict/mt/make/libs/tfnn
- - dict/mt/make/libs/translate_mtd
- - dict/mt/make/modules/corpus_generation/common
- - dict/mt/make/tools/lm_diff
- - dict/mt/make/tools/make_cap_model/score_caps
- - dict/mt/make/tools/opus_merge
- - dict/mt/make/tools/tfnn/convert_mtd_to_tfnn
- - dict/mt/make/tools/yt_transform/tests
- - dict/mt/mtdict/make/definitions/modules/extracts/remove_etym
- - dict/mt/scripts/testsets/crowdsource/globalvoices/find_parallel_pages/extract_page_info
- - dict/mt/tools/log_converter/rem_tool
- - dict/mt/tools/tmx_to_plain_text
- - dict/ontodb/cardsparser/lib
- - dict/ontodb/daily/merge_cache
- - dict/ontodb/daily/validator
- - dict/ontodb/images/lib
- - dict/ontodb/isa/subtitle/add_subtitles
- - dict/ontodb/lists/toloka/parsing
- - dict/ontodb/onto_lib/card_to_url
- - dict/ontodb/onto_lib/card_utils
- - dict/ontodb/onto_lib/sources/discogs
- - dict/ontodb/onto_lib/sources/fantlab
- - dict/ontodb/onto_lib/sources/freebase
- - dict/ontodb/onto_lib/sources/freebase/mediators
- - dict/ontodb/onto_lib/sources/google_play
- - dict/ontodb/onto_lib/sources/itunes
- - dict/ontodb/onto_lib/sources/kinopoisk
- - dict/ontodb/onto_lib/sources/labirint
- - dict/ontodb/onto_lib/sources/musicbrainz
- - dict/ontodb/onto_lib/sources/transfermarkt
- - dict/ontodb/onto_lib/sources/www
- - dict/ontodb/ontodb_log/ctr
- - dict/ontodb/report/common
- - dict/ontodb/report/film_stuff
- - dict/ontodb/schema
- - dict/ontodb/tools/entity_lists/build_watson_sources/config
- - dict/ontodb/tools/entity_lists/build_watson_sources/src/filmkomedia
- - dict/ontodb/tools/entity_lists/build_watson_sources/src/filmzor
- - dict/ontodb/tools/entity_lists/build_watson_sources/src/imdb
- - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinolinehd
- - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinotime
- - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinovolt
- - dict/ontodb/tools/entity_lists/is_series_film_query
- - dict/ontodb/tools/entity_lists/mine_titles
- - dict/ontodb/tools/entity_lists/parser/config
- - dict/ontodb/tools/entity_lists/parser/src/adme
- - dict/ontodb/tools/entity_lists/parser/src/afisha
- - dict/ontodb/tools/entity_lists/parser/src/allbestmovies
- - dict/ontodb/tools/entity_lists/parser/src/cinemacc
- - dict/ontodb/tools/entity_lists/parser/src/cinetoday
- - dict/ontodb/tools/entity_lists/parser/src/cobrafilm
- - dict/ontodb/tools/entity_lists/parser/src/detifilm
- - dict/ontodb/tools/entity_lists/parser/src/dostfilms
- - dict/ontodb/tools/entity_lists/parser/src/fasttorrentsu
- - dict/ontodb/tools/entity_lists/parser/src/filmanias
- - dict/ontodb/tools/entity_lists/parser/src/filmhd1080
- - dict/ontodb/tools/entity_lists/parser/src/filmkomedia
- - dict/ontodb/tools/entity_lists/parser/src/filmov1000
- - dict/ontodb/tools/entity_lists/parser/src/filmpro
- - dict/ontodb/tools/entity_lists/parser/src/filmuzhasov
- - dict/ontodb/tools/entity_lists/parser/src/filmzor
- - dict/ontodb/tools/entity_lists/parser/src/hdkinoclub
- - dict/ontodb/tools/entity_lists/parser/src/iceagemult
- - dict/ontodb/tools/entity_lists/parser/src/imdb
- - dict/ontodb/tools/entity_lists/parser/src/ivi
- - dict/ontodb/tools/entity_lists/parser/src/kinohabr
- - dict/ontodb/tools/entity_lists/parser/src/kinohorror
- - dict/ontodb/tools/entity_lists/parser/src/kinolinehd
- - dict/ontodb/tools/entity_lists/parser/src/kinomliff
- - dict/ontodb/tools/entity_lists/parser/src/kinoonlinetop
- - dict/ontodb/tools/entity_lists/parser/src/kinopod
- - dict/ontodb/tools/entity_lists/parser/src/kinopoisk
- - dict/ontodb/tools/entity_lists/parser/src/kinorip
- - dict/ontodb/tools/entity_lists/parser/src/kinosky
- - dict/ontodb/tools/entity_lists/parser/src/kinotime
- - dict/ontodb/tools/entity_lists/parser/src/kinotop
- - dict/ontodb/tools/entity_lists/parser/src/kinovolt
- - dict/ontodb/tools/entity_lists/parser/src/luchshiespiski
- - dict/ontodb/tools/entity_lists/parser/src/megogo
- - dict/ontodb/tools/entity_lists/parser/src/multikstv
- - dict/ontodb/tools/entity_lists/parser/src/multyasha
- - dict/ontodb/tools/entity_lists/parser/src/newfilmpro
- - dict/ontodb/tools/entity_lists/parser/src/okino
- - dict/ontodb/tools/entity_lists/parser/src/okomediya
- - dict/ontodb/tools/entity_lists/parser/src/onlinekinohd
- - dict/ontodb/tools/entity_lists/parser/src/parkhorror
- - dict/ontodb/tools/entity_lists/parser/src/prostotop
- - dict/ontodb/tools/entity_lists/parser/src/rosmovies
- - dict/ontodb/tools/entity_lists/parser/src/rserial
- - dict/ontodb/tools/entity_lists/parser/src/shikimori
- - dict/ontodb/tools/entity_lists/parser/src/strahzona
- - dict/ontodb/tools/entity_lists/parser/src/tabfilm
- - dict/ontodb/tools/entity_lists/parser/src/thecinemaclub
- - dict/ontodb/tools/entity_lists/parser/src/tlum
- - dict/ontodb/tools/entity_lists/parser/src/topspiski
- - dict/ontodb/tools/entity_lists/parser/src/vmirefilmov
- - dict/ontodb/tools/entity_lists/parser/src/vokrugtv
- - dict/ontodb/tools/entity_lists/parser/src/westernfilm
- - dict/ontodb/tools/entity_lists/relev
- - dict/ontodb/tools/entity_lists/sticky
- - dict/ontodb/tools/fields_diff/lib
- - dict/ontodb/tools/ontodb_viewer
- - dict/ontodb/tools/ontodbfixes/import_fixes/lib
- - dict/ontodb/tools/ontodbfixes/viewer
- - dict/ontodb/tools/url_answer/lib
- - dict/ontodb/user_logs/serp_clicks/lib
- - dict/ontodb/user_logs/wiki_spy_clicks
- - dict/ontodb/utils
- - dict/ontodb/utils/add_clicks
- - dict/ontodb/utils/build_helpers
- - dict/ontodb/utils/import_json_timelines/lib
- - dict/ontodb/utils/map_card_data
- - dict/ontodb/utils/monitoring
- - dict/ontodb/utils/music
- - dict/ontodb/utils/norm_ontoids_in_gzt
- - dict/ontodb/utils/norm_urls
- - dict/ontodb/utils/string_utils
- - dict/ontodb/utils/support_words
- - dict/ontodb/utils/update_links
- - dict/ontodb/wikicommon
- - dict/ontodb/wikicommon/get_defin
- - dict/ontodb/wikicommon/infobox
- - dict/ontodb/wikicommon/link_to_ontoid
- - dict/ontodb/wikicommon/on_add_short_defin
- - dict/ontodb/wikicommon/on_build_card
- - dict/ontodb/wikicommon/resource_files/wiki_fields
- - dict/ontodb/wikicommon/text_mine_film_participants
- - dict/ontodb/wikicommon/text_mine_interesting_facts
- - dict/ontodb/wikicommon/text_mine_projects
- - dict/ontodb/wikicommon/text_mine_sport_team_participants
- - dict/ontodb/wikicommon/wiki
- - dict/ontodb/wikicommon/wiki_syntax
- - dict/tools/find_synonym
- - disk/admin/monitors/common
- - disk/admin/robot_switcher
- - dj/tools/viewer/custom/entity
+ W605:
+ ignore:
+ - W605
+ prefixes:
+ - addappter/backend/testing
+ - addappter/backend/testing/fixtures/configuration/ios
+ - adfox/amacs/tests/functional/tests_amacs/bugs/medium
+ - adfox/amacs/tests/functional/tests_amacs/dynamic_monetization/v2
+ - adfox/amacs/tests/functional/tests_amacs/targeting/targeting_logic_tree_puids
+ - adfox/amacs/tests/functional/utils
+ - adfox/amacs/tests/functional/utils/db
+ - adfox/amacs/tests/functional/utils/tools
+ - ads/bsyeti/servants/bot
+ - ads/libs/py_autobudget
+ - ads/libs/py_bid_correction
+ - ads/libs/py_cliutils
+ - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/mapreducelib
+ - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs
+ - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabtools
+ - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabutils
+ - ads/libs/py_ml_factors/factor
+ - ads/libs/py_ml_factors/matrixnet
+ - ads/libs/py_test_mapreduce
+ - ads/ml_engine/learn/result/flow_generate_extended_fstr
+ - ads/ml_engine/learn/result/local_extended_fstr
+ - ads/ml_engine/learn/result/local_extended_fstr/lib
+ - ads/ml_engine/lib
+ - ads/ml_monitoring/alerts/ut
+ - ads/nirvana/automl/lib/batch_processor
+ - ads/nirvana/difacto/online_loss_processor
+ - ads/nirvana/graph_retrier/lib
+ - ads/nirvana/online_learning/move_dmlc_dumps/lib
+ - ads/nirvana/online_learning/pipeline_launcher/task_utils/lib
+ - ads/nirvana/sequential_learning
+ - ads/nirvana/tools/apc_check
+ - ads/quality/apc/gmg/generate_workflow
+ - ads/quality/apc/gmg/make_lm_pool
+ - ads/quality/apc/gmg/make_lm_pool/lib
+ - ads/quality/apc/prgmgv3/workflow_constructor/lib
+ - ads/quality/bid_correction/lib
+ - ads/quality/dssm/lib
+ - ads/quality/dssm/prgmg/make_pool
+ - ads/quality/dssm/search/201708/make_pool
+ - ads/quality/dssm/synonyms/search/tools/join_fields
+ - ads/quality/max_positions
+ - ads/quality/search_lm_conv/lib
+ - ads/quality/tools/adj_calc
+ - ads/sandbox_scripts/bmcategory_queryage_coeffs/lib
+ - ads/sandbox_scripts/build_tag_rules_table
+ - ads/sandbox_scripts/clean_mapreduce/ut
+ - ads/sandbox_scripts/join_yabar
+ - ads/tools/mranalyze
+ - ads/tools/yt_operations_analyzer
+ - ads/watchman/contrib/apispec-patched
+ - ads/watchman/contrib/apispec-patched/apispec
+ - ads/watchman/experiments/lib
+ - advq/offline_phits/monitoring
+ - alice/boltalka/generative/tfnn/preprocess
+ - alice/boltalka/generative/training/data/nn/filtered_twitter
+ - alice/boltalka/generative/training/data/nn/util
+ - alice/boltalka/generative/training/data/nn/util/dict
+ - alice/boltalka/tools/dssm_preprocessing/preprocessing/lib
+ - alice/nlu/py_libs/tokenizer/ut/py2
+ - alice/nlu/py_libs/tokenizer/ut/py3
+ - alice/nlu/tools/paraphrase_finder
+ - alice/paskills/nirvana_inflector
+ - alice/paskills/recipe_utils/lib
+ - alice/tests/difftest/request_miner
+ - antirobot/tools/daily_routine/lib
+ - april/web/bas/bm
+ - april/web/bas/bm/utils
+ - april/web/bas/collector
+ - aurora/aurora/core
+ - aurora/aurora/core/services
+ - aurora/aurora/core/toloka
+ - aurora/xpath/api/utils
+ - aurora/xpath/applier
+ - balancer/test/functional/admin/admin
+ - balancer/test/functional/regexp_host
+ - balancer/test/util
+ - balancer/test/util/dnsfake
+ - billing/apikeys/apikeys
+ - billing/apikeys/apikeys/butils_port
+ - billing/apikeys/apikeys/butils_port/application
+ - billing/dcs/dcs
+ - billing/dcs/dcs/temporary/butils
+ - billing/dcs/dcs/temporary/butils/application
+ - cloud/bootstrap/db/src/admin
+ - cloud/iam/codegen/python/codegen
+ - cloud/netinfra/rknfilter/yc_rkn_s3tools
+ - commerce/adv_backend
+ - commerce/adv_backend/backend/management/commands/migrator
+ - commerce/adv_backend/backend/validators
+ - contrib/nginx/tests/tap
+ - cv/imageproc/ocr/tools/nirvana/blocks_dataset/extract_pdf_boxes
+ - cv/imageproc/ocr/tools/nirvana/blocks_dataset/g_blocks
+ - cv/imageproc/ocr/tools/nirvana/blocks_dataset/rotate_pdf
+ - cv/imageproc/ocr/tools/nirvana/confidence_factors/format_utils
+ - cv/imageproc/ocr/tools/nirvana/recaptcha/generate_captcha_images
+ - cv/imageproc/ocr/tools/statistic/auto_tests/ocr_test_lib
+ - cv/imageproc/ocr/tools/statistic/ocr_intent_statistic
+ - cv/imageproc/ocr/tools/statistic/rectify_evaluation/document_edges_statistic
+ - cv/imageproc/ocr/tools/statistic/rectify_evaluation/ocr_statistic
+ - cv/imageproc/ocr/tools/statistic/rectify_evaluation/yt_runner
+ - cv/imageproc/ocr/tools/toloka_labeling/add_main_areas_to_labelling
+ - cv/imageproc/ocr/tools/toloka_labeling/add_ocr_line_recognition
+ - cv/imageproc/ocr/tools/toloka_labeling/convert
+ - cv/imageproc/ocr/tools/toloka_labeling/convert_toloka_labeling_to_asessors_labeling
+ - cv/imageproc/ocr/tools/toloka_labeling/get_labeling
+ - cv/imageproc/ocr/tools/toloka_labeling/labeling
+ - cv/imageproc/ocr/tools/toloka_labeling/labeling_assessors
+ - cv/imageproc/ocr/tools/toloka_labeling/labeling_captcha
+ - cv/imageproc/ocr/tools/toloka_labeling/paint_good
+ - cv/imageproc/ocr/tools/toloka_labeling/studier/generate_blocks_images
+ - cv/imageproc/ocr/tools/toloka_labeling/studier/get_queries_info
+ - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/convert_box_labeling
+ - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/extact_ocr_with_gt_blocks
+ - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_bleu_statistic
+ - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_etalon_ocr_result
+ - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/get_translate_orig_from_ocr_labelling
+ - cv/imageproc/ocr/tools/toloka_labeling/translate_e2e_metric/match_blocks
+ - cv/imgclassifiers/framework/gpu_utilization
+ - cv/imgclassifiers/framework/nirvana/runners/train/statistics_parser
+ - datacloud/score_api/validators
+ - devtools/adept
+ - devtools/distbuild/deploy/servants
+ - devtools/dummy_arcadia/test/test_cache_invalidation
+ - devtools/import_contrib/projects/phonenumber
+ - devtools/qafw/qyp
+ - devtools/qafw/rtc_deploy/lib
+ - devtools/qafw/selenium/lib
+ - devtools/signer/signer/utils
+ - devtools/unitybot
+ - devtools/unitybot/responsibility
+ - devtools/ya/test
+ - devtools/ya/test/tests/lib
+ - dict/moria/nirvana/context/begemot_thesaurus_rule
+ - dict/moria/nirvana/lib/util
+ - dict/mt/alice/scenarios/prepare_data
+ - dict/mt/analytics/metrics/quality/mt/stupids/lib
+ - dict/mt/analytics/sentence_breaking/toloka_binary/lib
+ - dict/mt/eval/eval_viewer/lib/spec/mt
+ - dict/mt/eval/lib/metrics/bleu
+ - dict/mt/g2p/rule_based
+ - dict/mt/make/libs/common
+ - dict/mt/make/libs/eval
+ - dict/mt/make/libs/tfnn
+ - dict/mt/make/libs/translate_mtd
+ - dict/mt/make/modules/corpus_generation/common
+ - dict/mt/make/tools/lm_diff
+ - dict/mt/make/tools/make_cap_model/score_caps
+ - dict/mt/make/tools/opus_merge
+ - dict/mt/make/tools/tfnn/convert_mtd_to_tfnn
+ - dict/mt/make/tools/yt_transform/tests
+ - dict/mt/mtdict/make/definitions/modules/extracts/remove_etym
+ - dict/mt/scripts/testsets/crowdsource/globalvoices/find_parallel_pages/extract_page_info
+ - dict/mt/tools/log_converter/rem_tool
+ - dict/mt/tools/tmx_to_plain_text
+ - dict/ontodb/cardsparser/lib
+ - dict/ontodb/daily/merge_cache
+ - dict/ontodb/daily/validator
+ - dict/ontodb/images/lib
+ - dict/ontodb/isa/subtitle/add_subtitles
+ - dict/ontodb/lists/toloka/parsing
+ - dict/ontodb/onto_lib/card_to_url
+ - dict/ontodb/onto_lib/card_utils
+ - dict/ontodb/onto_lib/sources/discogs
+ - dict/ontodb/onto_lib/sources/fantlab
+ - dict/ontodb/onto_lib/sources/freebase
+ - dict/ontodb/onto_lib/sources/freebase/mediators
+ - dict/ontodb/onto_lib/sources/google_play
+ - dict/ontodb/onto_lib/sources/itunes
+ - dict/ontodb/onto_lib/sources/kinopoisk
+ - dict/ontodb/onto_lib/sources/labirint
+ - dict/ontodb/onto_lib/sources/musicbrainz
+ - dict/ontodb/onto_lib/sources/transfermarkt
+ - dict/ontodb/onto_lib/sources/www
+ - dict/ontodb/ontodb_log/ctr
+ - dict/ontodb/report/common
+ - dict/ontodb/report/film_stuff
+ - dict/ontodb/schema
+ - dict/ontodb/tools/entity_lists/build_watson_sources/config
+ - dict/ontodb/tools/entity_lists/build_watson_sources/src/filmkomedia
+ - dict/ontodb/tools/entity_lists/build_watson_sources/src/filmzor
+ - dict/ontodb/tools/entity_lists/build_watson_sources/src/imdb
+ - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinolinehd
+ - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinotime
+ - dict/ontodb/tools/entity_lists/build_watson_sources/src/kinovolt
+ - dict/ontodb/tools/entity_lists/is_series_film_query
+ - dict/ontodb/tools/entity_lists/mine_titles
+ - dict/ontodb/tools/entity_lists/parser/config
+ - dict/ontodb/tools/entity_lists/parser/src/adme
+ - dict/ontodb/tools/entity_lists/parser/src/afisha
+ - dict/ontodb/tools/entity_lists/parser/src/allbestmovies
+ - dict/ontodb/tools/entity_lists/parser/src/cinemacc
+ - dict/ontodb/tools/entity_lists/parser/src/cinetoday
+ - dict/ontodb/tools/entity_lists/parser/src/cobrafilm
+ - dict/ontodb/tools/entity_lists/parser/src/detifilm
+ - dict/ontodb/tools/entity_lists/parser/src/dostfilms
+ - dict/ontodb/tools/entity_lists/parser/src/fasttorrentsu
+ - dict/ontodb/tools/entity_lists/parser/src/filmanias
+ - dict/ontodb/tools/entity_lists/parser/src/filmhd1080
+ - dict/ontodb/tools/entity_lists/parser/src/filmkomedia
+ - dict/ontodb/tools/entity_lists/parser/src/filmov1000
+ - dict/ontodb/tools/entity_lists/parser/src/filmpro
+ - dict/ontodb/tools/entity_lists/parser/src/filmuzhasov
+ - dict/ontodb/tools/entity_lists/parser/src/filmzor
+ - dict/ontodb/tools/entity_lists/parser/src/hdkinoclub
+ - dict/ontodb/tools/entity_lists/parser/src/iceagemult
+ - dict/ontodb/tools/entity_lists/parser/src/imdb
+ - dict/ontodb/tools/entity_lists/parser/src/ivi
+ - dict/ontodb/tools/entity_lists/parser/src/kinohabr
+ - dict/ontodb/tools/entity_lists/parser/src/kinohorror
+ - dict/ontodb/tools/entity_lists/parser/src/kinolinehd
+ - dict/ontodb/tools/entity_lists/parser/src/kinomliff
+ - dict/ontodb/tools/entity_lists/parser/src/kinoonlinetop
+ - dict/ontodb/tools/entity_lists/parser/src/kinopod
+ - dict/ontodb/tools/entity_lists/parser/src/kinopoisk
+ - dict/ontodb/tools/entity_lists/parser/src/kinorip
+ - dict/ontodb/tools/entity_lists/parser/src/kinosky
+ - dict/ontodb/tools/entity_lists/parser/src/kinotime
+ - dict/ontodb/tools/entity_lists/parser/src/kinotop
+ - dict/ontodb/tools/entity_lists/parser/src/kinovolt
+ - dict/ontodb/tools/entity_lists/parser/src/luchshiespiski
+ - dict/ontodb/tools/entity_lists/parser/src/megogo
+ - dict/ontodb/tools/entity_lists/parser/src/multikstv
+ - dict/ontodb/tools/entity_lists/parser/src/multyasha
+ - dict/ontodb/tools/entity_lists/parser/src/newfilmpro
+ - dict/ontodb/tools/entity_lists/parser/src/okino
+ - dict/ontodb/tools/entity_lists/parser/src/okomediya
+ - dict/ontodb/tools/entity_lists/parser/src/onlinekinohd
+ - dict/ontodb/tools/entity_lists/parser/src/parkhorror
+ - dict/ontodb/tools/entity_lists/parser/src/prostotop
+ - dict/ontodb/tools/entity_lists/parser/src/rosmovies
+ - dict/ontodb/tools/entity_lists/parser/src/rserial
+ - dict/ontodb/tools/entity_lists/parser/src/shikimori
+ - dict/ontodb/tools/entity_lists/parser/src/strahzona
+ - dict/ontodb/tools/entity_lists/parser/src/tabfilm
+ - dict/ontodb/tools/entity_lists/parser/src/thecinemaclub
+ - dict/ontodb/tools/entity_lists/parser/src/tlum
+ - dict/ontodb/tools/entity_lists/parser/src/topspiski
+ - dict/ontodb/tools/entity_lists/parser/src/vmirefilmov
+ - dict/ontodb/tools/entity_lists/parser/src/vokrugtv
+ - dict/ontodb/tools/entity_lists/parser/src/westernfilm
+ - dict/ontodb/tools/entity_lists/relev
+ - dict/ontodb/tools/entity_lists/sticky
+ - dict/ontodb/tools/fields_diff/lib
+ - dict/ontodb/tools/ontodb_viewer
+ - dict/ontodb/tools/ontodbfixes/import_fixes/lib
+ - dict/ontodb/tools/ontodbfixes/viewer
+ - dict/ontodb/tools/url_answer/lib
+ - dict/ontodb/user_logs/serp_clicks/lib
+ - dict/ontodb/user_logs/wiki_spy_clicks
+ - dict/ontodb/utils
+ - dict/ontodb/utils/add_clicks
+ - dict/ontodb/utils/build_helpers
+ - dict/ontodb/utils/import_json_timelines/lib
+ - dict/ontodb/utils/map_card_data
+ - dict/ontodb/utils/monitoring
+ - dict/ontodb/utils/music
+ - dict/ontodb/utils/norm_ontoids_in_gzt
+ - dict/ontodb/utils/norm_urls
+ - dict/ontodb/utils/string_utils
+ - dict/ontodb/utils/support_words
+ - dict/ontodb/utils/update_links
+ - dict/ontodb/wikicommon
+ - dict/ontodb/wikicommon/get_defin
+ - dict/ontodb/wikicommon/infobox
+ - dict/ontodb/wikicommon/link_to_ontoid
+ - dict/ontodb/wikicommon/on_add_short_defin
+ - dict/ontodb/wikicommon/on_build_card
+ - dict/ontodb/wikicommon/resource_files/wiki_fields
+ - dict/ontodb/wikicommon/text_mine_film_participants
+ - dict/ontodb/wikicommon/text_mine_interesting_facts
+ - dict/ontodb/wikicommon/text_mine_projects
+ - dict/ontodb/wikicommon/text_mine_sport_team_participants
+ - dict/ontodb/wikicommon/wiki
+ - dict/ontodb/wikicommon/wiki_syntax
+ - dict/tools/find_synonym
+ - disk/admin/monitors/common
+ - disk/admin/robot_switcher
+ - dj/tools/viewer/custom/entity
- education/lib/nirvana/operations/yt_nodes_deep_diff
- - education/schoolbook/analytics/adhoc/ANALITICSEDU-515
- - education/schoolbook/analytics/adhoc/ANALITICSEDU-687
- - entity/ontodb/tasks/backup_yt_tables/lib
- - entity/ontodb/tasks/clean_old_data
- - entity/ontodb/tasks/import_museums/lib
- - entity/ontodb/tasks/import_yam/lib
- - entity/ontodb/tasks/send_table_checker_sensors/lib
- - entity/ontodb/tasks/watson_converter/lib
- - entity/ontodb/util/wiki
- - entity/quality/helix
- - entity/quality/metrics/film_lists/combine_attributes_for_stupids_marking
- - entity/quality/metrics/not_film_lists/combine_attributes_for_stupids_marking
- - entity/quality/metrics/passport
- - entity/ugc/db/python/offline_processing
- - entity/ugc/quality/reviews_ranking/ml/lib/trueskill
- - entity/ugc/tests/lib
- - entity/ugc/tools/bell/replay_requests
- - entity/ugc/tools/comments/prepare_business_replies_for_cmnt
- - entity/ugc/tools/tank/user_poll_ammo
- - entity/ugc/tools/viewer_server/lib
- - entity/ugc/tools/viewer_server/lib/core
- - extsearch/audio/yamrec/query_browser
- - extsearch/collections/tools/mmeta2metrics_serp
- - extsearch/collections/tools/nirvana/collections_indexer
- - extsearch/collections/tools/upper2metrics_serp
- - extsearch/geo/conveyors/annotations/filtrate_banned_v2
- - extsearch/geo/conveyors/annotations/metro_extractor/get_metro_to_ll
- - extsearch/geo/conveyors/experimental/annotations/fast_annotations/collection_keywords_extractor
- - extsearch/geo/tools/similar_orgs/read_user_session
- - extsearch/geo/tools/special_features/mining_result_merger
- - extsearch/images/library/ytscraper
- - extsearch/images/money/scripts/robot
- - extsearch/images/money/scripts/robot/util
- - extsearch/images/money/tools/commercial_serps_downloader
- - extsearch/images/robot/index/testlib/index_process_description
- - extsearch/images/robot/library/pycm
- - extsearch/images/robot/scripts/cm/semidup2
- - extsearch/images/robot/tools/index_snapshot_cleaner
- - extsearch/images/robot/tools/indexrotate
- - extsearch/images/robot/tools/robot_losses/python/rotor_download_images
- - extsearch/video/python/yql
- - extsearch/video/quality/series/base/builder/common
- - extsearch/video/quality/series/base/builder/wrappers
- - extsearch/video/robot/cm/deletes/cmpy/playerdata
- - extsearch/video/robot/cm/library
- - extsearch/video/robot/hostsdb/tool/config/add_rules
- - extsearch/video/robot/rt_transcoder/metarobot/tests
- - extsearch/video/robot/rt_transcoder/transcoder/tests
- - haas/cmis/hwr_allocation
- - haas/cmis/hwr_allocation/allocation_methods
- - health/articles/articles_pipeline
- - health/articles/articles_pipeline/actions
- - health/articles/articles_pipeline/lib
- - health/articles/articles_pipeline/lib/util
- - health/common_libs/utils
- - health/yamd/health_import
- - health/yamd/health_import/data_tests
- - health/yamd/libs/cross_links
- - health/yamd/libs/utils
- - health/yamd/libs/utils/parsers
- - infra/awacs/vendor/awacs/pire/ut
- - infra/callisto/controllers/viewer/lib2
- - infra/cqudp/src
- - infra/cqudp/src/eggs
- - infra/deploy/tools/yd_migrate/lib
- - infra/gencfg-gui
- - infra/heartbeat/src
- - infra/heartbeat/src/daemon
- - infra/kernel/tools/coroner
- - infra/netconfig/utils/pinger/lib
- - infra/porto/api_py
- - infra/portoshell
- - infra/portoshell/src
- - infra/qyp/vmctl/src
- - infra/reconf_juggler/tools/jdiff/tests
- - infra/rtc/janitor
- - infra/scripts/eventlog_uploader/script
- - infra/yp_drcp/lib
- - infra/yp_dru/bin
- - irt/bmgen/market_data
- - irt/bmgen/synonyms
- - keyboard/analytics/toloka/nirvana/blocks
- - keyboard/analytics/toloka/nirvana/swipe
- - keyboard/analytics/toloka/nirvana/typing
- - keyboard/dict/nirvana/blocks
- - library/python/testing/filter
- - library/python/testing/gtest
- - locdoc/doc_tools/yoda/friends/okapi
- - locdoc/doc_tools/yoda/friends/okapi/operations
- - logbroker/tools/manual/alter_sqs_tables
- - logbroker/tools/manual/create_logbroker_account/lib
- - logbroker/tools/manual/load_test/load
- - mail/contrib/ccs-caldavtester
- - mail/contrib/ccs-caldavtester/src
- - mail/contrib/ccs-caldavtester/verifiers
- - mail/contrib/tatsu/test
- - mail/contrib/tatsu/test/grammar
- - mail/python/theatre/app
- - mail/tools/ews_call/ews_call
- - mail/tools/safely_delete_stids/lib
- - mail/yasm/lib
- - mail/yasm/lib/calendar/alerts
- - mail/yasm/lib/calendar/panels
- - mapreduce/yt/tools/du-yt
- - maps/analyzer/pylibs/watchman_api/lib
- - maps/automotive/proxy/config_generator
- - maps/automotive/remote_access/autotests/tests/data_types
- - maps/b2bgeo/mvrp_solver/backend/tests_lib
- - maps/carparks/regression/renderer/generate_ammo
- - maps/carparks/tools/route_lost_viewer/bin
- - maps/infra/monitoring/sla_calculator/core/services
- - maps/jams/renderer2/common/ecstatic/lib
- - maps/mobile/server/tools/cache_deprecator
- - maps/mobile/tools/android-manifest
- - maps/pylibs/dataset_collector
- - maps/pylibs/monitoring/lib
- - maps/pylibs/monitoring/tests
- - maps/renderer/tilesgen/tools/lib
- - maps/renderer/tools/download_release
- - maps/routing/router/regression/gen-stopwatch-ammo
- - maps/routing/router/scripts/gen-ammo
- - maps/routing/versus/lib
- - maps/tools/matcher_quality/routes_to_geoms
- - maps/tools/package_releaser/lib
- - maps/tools/tanker-build/translate_messages
- - maps/wikimap/mapspro/libs/python/pymod
- - maps/wikimap/mapspro/libs/python/pymod/yandex/maps/wiki
- - maps/wikimap/stat/libs/common/tests/lib
- - maps/wikimap/stat/libs/common/tests/lib/dates_ut
- - market/contrib/python/yaconf_v0.1.2
- - market/contrib/python/yaconf_v0.1.2/yaconf
- - market/idx/streams/yatf
- - market/library/cms_promo/py_utils
- - market/mstat/ch-cache/lib
- - market/mstat/ch-cache/lib/database
- - market/reductor/configure/lib
- - market/reductor/www
- - market/sre/library/python/maaslib
- - market/sre/services/balancer_api/lib
- - market/tools/report_stats/lib
- - market/yamarec/log-parsers/bin
- - market/yamarec/log-parsers/yamarec_log_parsers
- - market/yamarec/metarouter/tests
- - market/yamarec/metarouter/yamarec_metarouter
- - market/yamarec/performance/utils
- - mds/s3/s3_mds_proxy
- - mds/s3/s3_mds_proxy/s3mds/helpers/config
- - mds/s3/s3_mds_proxy/s3mds/idm/roles
- - mds/s3/s3_mds_proxy/s3mds/xml
- - media/media_support/media_support
- - media/media_support/media_support/chats
- - metrika/admin/maas/lib/core/common
- - metrika/pylib/iptruler
- - metrika/pylib/utils
- - ml/nirvana/nope
- - ml/tensorflow/ytensorflow
- - ml/tensorflow/ytensorflow/ytensorflow/inference
- - ml/tensorflow/ytensorflow/ytensorflow/quantization
- - ml/tensorflow/ytensorflow/ytensorflow/train/hooks
- - modadvert/libs/connectors
- - modadvert/libs/lyncher/factor_providers/evil_misprints/it
- - modadvert/libs/lyncher/factor_providers/misprints
- - modadvert/libs/lyncher/ut/rules
- - modadvert/libs/utils/common
- - modadvert/libs/utils/common/ut
- - modadvert/programs/abuse/bs_abuse_log_processor
- - modadvert/programs/direct_proxy/libs/handlers
- - modadvert/programs/saas_indexer/libs
- - modadvert/programs/transfer_manager/libs
- - modadvert/programs/update_flags_offline/tables_manager/libs
- - mssngr/router/tools/state_cache_updater_v2
- - opensource/sync/bin/arc2git
- - opensource/sync/bin/git2git
- - orgvisits/library/python/test_tools/yql/lib
- - orgvisits/library/python/text_tools
- - orgvisits/library/python/yt_jobs/tests
- - orgvisits/metrics/ugc_feedback/ctr
+ - education/schoolbook/analytics/adhoc/ANALITICSEDU-515
+ - education/schoolbook/analytics/adhoc/ANALITICSEDU-687
+ - entity/ontodb/tasks/backup_yt_tables/lib
+ - entity/ontodb/tasks/clean_old_data
+ - entity/ontodb/tasks/import_museums/lib
+ - entity/ontodb/tasks/import_yam/lib
+ - entity/ontodb/tasks/send_table_checker_sensors/lib
+ - entity/ontodb/tasks/watson_converter/lib
+ - entity/ontodb/util/wiki
+ - entity/quality/helix
+ - entity/quality/metrics/film_lists/combine_attributes_for_stupids_marking
+ - entity/quality/metrics/not_film_lists/combine_attributes_for_stupids_marking
+ - entity/quality/metrics/passport
+ - entity/ugc/db/python/offline_processing
+ - entity/ugc/quality/reviews_ranking/ml/lib/trueskill
+ - entity/ugc/tests/lib
+ - entity/ugc/tools/bell/replay_requests
+ - entity/ugc/tools/comments/prepare_business_replies_for_cmnt
+ - entity/ugc/tools/tank/user_poll_ammo
+ - entity/ugc/tools/viewer_server/lib
+ - entity/ugc/tools/viewer_server/lib/core
+ - extsearch/audio/yamrec/query_browser
+ - extsearch/collections/tools/mmeta2metrics_serp
+ - extsearch/collections/tools/nirvana/collections_indexer
+ - extsearch/collections/tools/upper2metrics_serp
+ - extsearch/geo/conveyors/annotations/filtrate_banned_v2
+ - extsearch/geo/conveyors/annotations/metro_extractor/get_metro_to_ll
+ - extsearch/geo/conveyors/experimental/annotations/fast_annotations/collection_keywords_extractor
+ - extsearch/geo/tools/similar_orgs/read_user_session
+ - extsearch/geo/tools/special_features/mining_result_merger
+ - extsearch/images/library/ytscraper
+ - extsearch/images/money/scripts/robot
+ - extsearch/images/money/scripts/robot/util
+ - extsearch/images/money/tools/commercial_serps_downloader
+ - extsearch/images/robot/index/testlib/index_process_description
+ - extsearch/images/robot/library/pycm
+ - extsearch/images/robot/scripts/cm/semidup2
+ - extsearch/images/robot/tools/index_snapshot_cleaner
+ - extsearch/images/robot/tools/indexrotate
+ - extsearch/images/robot/tools/robot_losses/python/rotor_download_images
+ - extsearch/video/python/yql
+ - extsearch/video/quality/series/base/builder/common
+ - extsearch/video/quality/series/base/builder/wrappers
+ - extsearch/video/robot/cm/deletes/cmpy/playerdata
+ - extsearch/video/robot/cm/library
+ - extsearch/video/robot/hostsdb/tool/config/add_rules
+ - extsearch/video/robot/rt_transcoder/metarobot/tests
+ - extsearch/video/robot/rt_transcoder/transcoder/tests
+ - haas/cmis/hwr_allocation
+ - haas/cmis/hwr_allocation/allocation_methods
+ - health/articles/articles_pipeline
+ - health/articles/articles_pipeline/actions
+ - health/articles/articles_pipeline/lib
+ - health/articles/articles_pipeline/lib/util
+ - health/common_libs/utils
+ - health/yamd/health_import
+ - health/yamd/health_import/data_tests
+ - health/yamd/libs/cross_links
+ - health/yamd/libs/utils
+ - health/yamd/libs/utils/parsers
+ - infra/awacs/vendor/awacs/pire/ut
+ - infra/callisto/controllers/viewer/lib2
+ - infra/cqudp/src
+ - infra/cqudp/src/eggs
+ - infra/deploy/tools/yd_migrate/lib
+ - infra/gencfg-gui
+ - infra/heartbeat/src
+ - infra/heartbeat/src/daemon
+ - infra/kernel/tools/coroner
+ - infra/netconfig/utils/pinger/lib
+ - infra/porto/api_py
+ - infra/portoshell
+ - infra/portoshell/src
+ - infra/qyp/vmctl/src
+ - infra/reconf_juggler/tools/jdiff/tests
+ - infra/rtc/janitor
+ - infra/scripts/eventlog_uploader/script
+ - infra/yp_drcp/lib
+ - infra/yp_dru/bin
+ - irt/bmgen/market_data
+ - irt/bmgen/synonyms
+ - keyboard/analytics/toloka/nirvana/blocks
+ - keyboard/analytics/toloka/nirvana/swipe
+ - keyboard/analytics/toloka/nirvana/typing
+ - keyboard/dict/nirvana/blocks
+ - library/python/testing/filter
+ - library/python/testing/gtest
+ - locdoc/doc_tools/yoda/friends/okapi
+ - locdoc/doc_tools/yoda/friends/okapi/operations
+ - logbroker/tools/manual/alter_sqs_tables
+ - logbroker/tools/manual/create_logbroker_account/lib
+ - logbroker/tools/manual/load_test/load
+ - mail/contrib/ccs-caldavtester
+ - mail/contrib/ccs-caldavtester/src
+ - mail/contrib/ccs-caldavtester/verifiers
+ - mail/contrib/tatsu/test
+ - mail/contrib/tatsu/test/grammar
+ - mail/python/theatre/app
+ - mail/tools/ews_call/ews_call
+ - mail/tools/safely_delete_stids/lib
+ - mail/yasm/lib
+ - mail/yasm/lib/calendar/alerts
+ - mail/yasm/lib/calendar/panels
+ - mapreduce/yt/tools/du-yt
+ - maps/analyzer/pylibs/watchman_api/lib
+ - maps/automotive/proxy/config_generator
+ - maps/automotive/remote_access/autotests/tests/data_types
+ - maps/b2bgeo/mvrp_solver/backend/tests_lib
+ - maps/carparks/regression/renderer/generate_ammo
+ - maps/carparks/tools/route_lost_viewer/bin
+ - maps/infra/monitoring/sla_calculator/core/services
+ - maps/jams/renderer2/common/ecstatic/lib
+ - maps/mobile/server/tools/cache_deprecator
+ - maps/mobile/tools/android-manifest
+ - maps/pylibs/dataset_collector
+ - maps/pylibs/monitoring/lib
+ - maps/pylibs/monitoring/tests
+ - maps/renderer/tilesgen/tools/lib
+ - maps/renderer/tools/download_release
+ - maps/routing/router/regression/gen-stopwatch-ammo
+ - maps/routing/router/scripts/gen-ammo
+ - maps/routing/versus/lib
+ - maps/tools/matcher_quality/routes_to_geoms
+ - maps/tools/package_releaser/lib
+ - maps/tools/tanker-build/translate_messages
+ - maps/wikimap/mapspro/libs/python/pymod
+ - maps/wikimap/mapspro/libs/python/pymod/yandex/maps/wiki
+ - maps/wikimap/stat/libs/common/tests/lib
+ - maps/wikimap/stat/libs/common/tests/lib/dates_ut
+ - market/contrib/python/yaconf_v0.1.2
+ - market/contrib/python/yaconf_v0.1.2/yaconf
+ - market/idx/streams/yatf
+ - market/library/cms_promo/py_utils
+ - market/mstat/ch-cache/lib
+ - market/mstat/ch-cache/lib/database
+ - market/reductor/configure/lib
+ - market/reductor/www
+ - market/sre/library/python/maaslib
+ - market/sre/services/balancer_api/lib
+ - market/tools/report_stats/lib
+ - market/yamarec/log-parsers/bin
+ - market/yamarec/log-parsers/yamarec_log_parsers
+ - market/yamarec/metarouter/tests
+ - market/yamarec/metarouter/yamarec_metarouter
+ - market/yamarec/performance/utils
+ - mds/s3/s3_mds_proxy
+ - mds/s3/s3_mds_proxy/s3mds/helpers/config
+ - mds/s3/s3_mds_proxy/s3mds/idm/roles
+ - mds/s3/s3_mds_proxy/s3mds/xml
+ - media/media_support/media_support
+ - media/media_support/media_support/chats
+ - metrika/admin/maas/lib/core/common
+ - metrika/pylib/iptruler
+ - metrika/pylib/utils
+ - ml/nirvana/nope
+ - ml/tensorflow/ytensorflow
+ - ml/tensorflow/ytensorflow/ytensorflow/inference
+ - ml/tensorflow/ytensorflow/ytensorflow/quantization
+ - ml/tensorflow/ytensorflow/ytensorflow/train/hooks
+ - modadvert/libs/connectors
+ - modadvert/libs/lyncher/factor_providers/evil_misprints/it
+ - modadvert/libs/lyncher/factor_providers/misprints
+ - modadvert/libs/lyncher/ut/rules
+ - modadvert/libs/utils/common
+ - modadvert/libs/utils/common/ut
+ - modadvert/programs/abuse/bs_abuse_log_processor
+ - modadvert/programs/direct_proxy/libs/handlers
+ - modadvert/programs/saas_indexer/libs
+ - modadvert/programs/transfer_manager/libs
+ - modadvert/programs/update_flags_offline/tables_manager/libs
+ - mssngr/router/tools/state_cache_updater_v2
+ - opensource/sync/bin/arc2git
+ - opensource/sync/bin/git2git
+ - orgvisits/library/python/test_tools/yql/lib
+ - orgvisits/library/python/text_tools
+ - orgvisits/library/python/yt_jobs/tests
+ - orgvisits/metrics/ugc_feedback/ctr
- passport/backend/adm_api/views/meltingpot
- passport/backend/core
- passport/backend/core/builders/frodo
@@ -471,765 +471,765 @@ migrations:
- passport/backend/core/historydb/tests
- passport/backend/core/suggest
- passport/backend/core/types
- - portal/tools/morda-release/server
- - quality/ab_testing/abt_resources_lib/loaders/tests/long_metrics
- - quality/ab_testing/cofe/bin/cofe_ctl
- - quality/ab_testing/cofe/projects/disk
- - quality/ab_testing/cofe/projects/disk/utils
- - quality/ab_testing/cofe/projects/geo/geoadv
- - quality/ab_testing/scripts/kati
- - quality/ab_testing/scripts/kati/lib
- - quality/antifraud/scripts/prod/traffic_chains
- - quality/functionality/chats/feedback/src/feedback/core
- - quality/functionality/chats/feedback/src/feedback/core/settings
- - quality/functionality/chats/floyd/src/floyd/core
- - quality/functionality/chats/floyd/src/floyd/core/settings
- - quality/functionality/entity_search/factqueries/instructions/prepare_facts
- - quality/functionality/entity_search/factqueries/tools/extract_fact/scripts/get_encyc_queries
- - quality/functionality/facts/common/goldensets/actuality/gen_population_tasks
- - quality/functionality/facts/recheck/calc_actuality_factors
- - quality/functionality/parsepl/libs/parsers/tests
- - quality/functionality/parsepl/nirvana/build_market_parsers/src
- - quality/functionality/parsepl/toloka/configs_for_parsers
- - quality/functionality/parsepl/toloka/configs_for_parsers/ut
- - quality/functionality/rtx/scripts/social-serp/nano_squeeze
- - quality/functionality/scripts/nirvana/jobs/casper/helpers/ytreader
- - quality/functionality/snippets/social_bna
- - quality/functionality/snippets/top_hosts/plugins/calculator888
- - quality/functionality/turbo/analytics/ecommerce/lib/page_classifier
- - quality/functionality/turbo/auto_cleanup/lib
- - quality/functionality/turbo/rss/parser_job/tests/small
- - quality/functionality/turbo/tools/infinity/tools/wmc
- - quality/functionality/turbo/tools/rediff
- - quality/functionality/turbo/tools/tdm
- - quality/functionality/turbo/yandex_pogoda
- - quality/functionality/unstructured/yt_concatenator/tests
- - quality/nirvana_tools/conveyor_operations/asgen/config
- - quality/nirvana_tools/conveyor_operations/eval_feature/eval_feature_viewer
- - quality/nirvana_tools/conveyor_operations/meta_formula_bfmf_bundle/append_formula_factors
- - quality/nirvana_tools/conveyor_operations/meta_formula_bfmf_bundle/get_cache_from_appended_features
- - quality/query_expansions/proc/covfefe/makefile_update
- - quality/query_expansions/tools/tests/test_dummy_process
- - quality/relev_tools/dsat/find_words_with_absent_forms
- - quality/relev_tools/lboost_ops/nirvana/operations/main/tests
- - quality/trailer/suggest/services/maps/conveyors/doc_features/chronostat
- - quality/trailer/suggest/services/maps/conveyors/pool/make_pointwise_pool
- - quality/trailer/suggest/services/maps/tools/retrieval_test
- - quality/trailer/suggest_dict/suggest_framework/tools
- - quality/user_sessions/market/custom_statistics
- - quality/user_sessions/market/custom_statistics/cust/abtypes/refuses
- - quality/userdata/scripts
- - quality/userdata/scripts/state_validation
- - quality/webfresh/learn/half_hour/build_union_prs
- - quality/webfresh/libraries/prs
- - quality/webfresh/libraries/prs/tests
- - quality/webfresh/metrics/aggregate_serps
- - quality/webfresh/metrics/bad_urls_stats
- - quality/webfresh/metrics/build_formulas_config
- - quality/yaqlib/yaqlint
- - regulargeo/tools
- - rnd_toolbox/deckard
- - rnd_toolbox/deckard/storage
- - rnd_toolbox/hwlib
- - robot/favicon/python
- - robot/jupiter/library/python/sample
- - robot/jupiter/scripts
- - robot/jupiter/viewers/galileo
- - robot/kwyt/scripts/sampling_data
- - robot/lemur/scripts/common
- - robot/metrics/forums_sbr/bin/get_forum_urls
- - robot/metrics/forums_sbr/bin/get_urls_sample
- - robot/metrics/forums_sbr/bin/parse_zora_result
- - robot/metrics/fresh_sbr/mk_fresh_serp
- - robot/metrics/rotor_missed_words_metric/bin/gemini_canonize
- - robot/metrics/speed_sbr/remove_fresh_hosts
- - robot/quality/robotrank/mk_learn_pool/lib
- - robot/salmon_agent/counters
- - robot/salmon_agent/utils
- - rt-research/broadmatching/mr/IRT-1517
- - rt-research/broadmatching/scripts/dyn-smart-banners/update_dyn_trashfilter
- - rt-research/broadmatching/scripts/pylib
- - rt-research/broadmatching/scripts/pylib/bm
- - rt-research/broadmatching/scripts/yt/catalogia_mapper
- - rt-research/broadmatching/scripts/yt/cdict_generator
- - rt-research/broadmatching/scripts/yt/dyn-sources/generate_dse_banners
- - rt-research/broadmatching/scripts/yt/dyn-sources/generate_dse_banners/generate_filtered_links
- - rt-research/broadmatching/scripts/yt/dyn-sources/yt_prepare_dyn_sources
- - rt-research/multik/deploy/deploy
- - saas/tools/devops/lib
- - saas/tools/devops/lib23/tests/py2
- - saas/tools/devops/lib23/tests/py23
- - samogon/libs/sandbox
- - sandbox/common/projects_handler
- - sandbox/projects/BuildBegemotLightTestConfig
- - sandbox/projects/BuildNewsPackage
- - sandbox/projects/BuildSportProxyData
- - sandbox/projects/BuildYobject
- - sandbox/projects/CheckFreshDocuments
- - sandbox/projects/CompareNewsdResponses
- - sandbox/projects/CompareYmakeDump
- - sandbox/projects/ConvertVideo
- - sandbox/projects/DeployVideoMmetaShard
- - sandbox/projects/GetAdvquickDatabase
- - sandbox/projects/GetFusionMiddlesearchResponses
- - sandbox/projects/GetPokazometerDatabase
- - sandbox/projects/IexBuildPackages
- - sandbox/projects/IexImportPatterns
- - sandbox/projects/LandingConstructor
- - sandbox/projects/LandingConstructor/Stat/Jobs
- - sandbox/projects/MediaLib
- - sandbox/projects/MediaLib/shardmap
- - sandbox/projects/MixQueriesExperimentsRegions
- - sandbox/projects/PersonalPoiGenerator
- - sandbox/projects/PersonalPoiGenerator/PoisDumper
- - sandbox/projects/ReleaseBalancerConfigGenerator
- - sandbox/projects/ReleaseConfigGeneratorService
- - sandbox/projects/ReleaseMediaShardmaps
- - sandbox/projects/ReportDataRuntime
- - sandbox/projects/ReportDataRuntimeItem
- - sandbox/projects/ReportDataRuntimeRT
- - sandbox/projects/ReportDataRuntimeTags
- - sandbox/projects/ReportRuleTestFull
- - sandbox/projects/RunNewsLoadtest
- - sandbox/projects/SOC/YtProxyAnalyzeCommandParams
- - sandbox/projects/SOC/YtRawMasterLogAnalysis
- - sandbox/projects/SpawnTestConfigGenerator
- - sandbox/projects/TaxiSecurity
- - sandbox/projects/TaxiSecurity/BadLogsAnalyzer
- - sandbox/projects/TaxiSecurity/YodaxAnalyzer
- - sandbox/projects/TestFrontMetricsLogs
- - sandbox/projects/TestFrontMetricsLogs/modules
- - sandbox/projects/TestReportPerformance
- - sandbox/projects/TickenatorBatchProcessing
- - sandbox/projects/TickenatorBatchProcessing/YasmScreenshoter
- - sandbox/projects/Travel/tasks/tools
- - sandbox/projects/Ufo
- - sandbox/projects/Ufo/CI
- - sandbox/projects/UpdateConfigGeneratorDb
- - sandbox/projects/UpdateMapsWizardPpoData
- - sandbox/projects/UpdateTestenvNewsdResources
- - sandbox/projects/UrlsByShowCounters/executable/lib
- - sandbox/projects/VpsAmmo
- - sandbox/projects/YabsDebuilder
- - sandbox/projects/YabsDebuilderDev
- - sandbox/projects/YabsServerStatPerformance
- - sandbox/projects/adfox/adfox_ui/testpalm/testrunCreate
- - sandbox/projects/alice_evo
- - sandbox/projects/alice_evo/AliceEvoIntegrationTestsWrapper
- - sandbox/projects/antirobot
- - sandbox/projects/antirobot/AsnNames
- - sandbox/projects/antirobot/LoadTesting
- - sandbox/projects/autobudget/autobudget_lib
- - sandbox/projects/avia/avia_statistics/update_alternative_routes_prices
- - sandbox/projects/avia/avia_statistics/update_flights
- - sandbox/projects/avia/avia_statistics/update_median_prices
- - sandbox/projects/avia/avia_statistics/update_popular_months
- - sandbox/projects/avia/avia_statistics/update_return_ticket_prices
- - sandbox/projects/avia/avia_statistics/update_route_crosslinks
- - sandbox/projects/avia/flight_status_registrar/FlightStatsRegistrar
- - sandbox/projects/avia/flight_status_registrar/OagFlightsRegistrar
- - sandbox/projects/avia/flight_status_registrar/VariFlightRegistrar
- - sandbox/projects/avia/log_unknown_fare_codes
- - sandbox/projects/avia/travel_avia_dump_resource/task
- - sandbox/projects/bsyeti
- - sandbox/projects/canvas
- - sandbox/projects/canvas/video_constructor_utils
- - sandbox/projects/cloud/yfm
- - sandbox/projects/common/compare_upper_results
- - sandbox/projects/common/fusion
- - sandbox/projects/common/gencfg
- - sandbox/projects/common/market_report
- - sandbox/projects/common/mobilesearch
- - sandbox/projects/common/mobilesearch/startrek_client
- - sandbox/projects/common/yabs
- - sandbox/projects/gencfg
- - sandbox/projects/gencfg/BuildConfigGenerator2
- - sandbox/projects/gencfg/GencfgMonitoringCharts
- - sandbox/projects/gencfg/ReleaseConfigGenerator2
- - sandbox/projects/gencfg/workflow
- - sandbox/projects/health/acceptance_begemot_graph
- - sandbox/projects/laas
- - sandbox/projects/laas/CollectTestGeobases
- - sandbox/projects/logs/HashedSessionsDiff
- - sandbox/projects/logs/TestRalibPerfomance
- - sandbox/projects/market/infra/helpers
- - sandbox/projects/market/infra/helpers/changes_helper
- - sandbox/projects/masstransit/MapsMasstransitImportVehicleTasks
- - sandbox/projects/media
- - sandbox/projects/media/admins/mysqlcopydb
- - sandbox/projects/media/kp-front-nginx/config-validation
- - sandbox/projects/media_crm/tasks
- - sandbox/projects/media_crm/tasks/media_crm_deploy
- - sandbox/projects/metrika/mobile/sdk/helpers
- - sandbox/projects/mssngr/rtc
- - sandbox/projects/mssngr/runtime/MssngrRouterLoadTest
- - sandbox/projects/music
- - sandbox/projects/music/MusicExportYdbToYt
- - sandbox/projects/music/ReleaseMusic
- - sandbox/projects/music/deployment/helpers
- - sandbox/projects/news
- - sandbox/projects/news/CompareNewsAnnotatorResponses
- - sandbox/projects/news/UpdateRTHubAdvWidgetResources
- - sandbox/projects/ofd/backend/ofd_backend_package_build
- - sandbox/projects/ofd/backend/ofd_backend_run_tests
- - sandbox/projects/ofd/notifier/ofd_notifier_package_build
- - sandbox/projects/ofd/runtime/ofd_runtime_package_build
- - sandbox/projects/ofd/runtime/ofd_runtime_run_tests
- - sandbox/projects/ofd/tasks/ofd_tasks_package_build
- - sandbox/projects/porto/BuildPortoLayer
- - sandbox/projects/porto/BuildPortoLayerTmp
- - sandbox/projects/qafw
- - sandbox/projects/qafw/ansible
- - sandbox/projects/reconf
- - sandbox/projects/sandbox_ci/sandbox_ci_compare_load_test
- - sandbox/projects/sandbox_ci/task
- - sandbox/projects/tests
- - sandbox/projects/turbo
- - sandbox/projects/turbo/CompareTurboResponses
- - sandbox/projects/turbo/SampleForTurbo
- - sandbox/projects/vh
- - sandbox/projects/vh/faas/FaasConvertVideoVodTest
- - sandbox/projects/vh/frontend/count_diff
- - sandbox/projects/vh/frontend/generate_requests_from_yt_logs
- - sandbox/projects/vins
- - sandbox/projects/vins/AliceBegemotMegamindPerfTest
- - sandbox/projects/vins/BuildVinsCustomEntity
- - sandbox/projects/vins/MegamindPerfTest
- - sandbox/projects/vins/VinsPerfTest
- - sandbox/projects/websearch/CheckPrechargeAfterMemoryMap
- - sandbox/projects/yane/ParseYanswerFactLogs
- - sandbox/sdk2
- - sandbox/sdk2/vcs
- - sandbox/serviceapi
- - sandbox/serviceapi/handlers
- - sandbox/yasandbox/database/clickhouse
- - skynet/kernel
- - skynet/kernel/util/sys/user
- - skynet/library
- - skynet/library/tasks
- - smart_devices/crash_analytics/tools/minidump_analyzer
- - smart_devices/tools/launcher2/tests/restarts
- - smm/lib/models/sklearn
- - sprav/mining/botanik_miner
- - statbox/qb2
- - statbox/qb2/qb2_extensions/api/v1/extractors
- - statbox/qb2/qb2_extensions/api/v1/extractors/pool
- - statbox/statkey/jam/jobs/cubes/desktop_installs/v2
- - strm/generate/lib/generate
- - sup/stat/find_pushes
- - talents/nlu/scripts/train/geoname
- - testenv/jobs/rtyserver
- - tools/mkdocs_builder/lib
- - tools/mkdocs_builder/mkdocs_yandex
- - tools/mkdocs_builder/mkdocs_yandex/mkdocs_yandex
- - tools/mkdocs_builder/mkdocs_yandex/mkdocs_yandex/ext/markdown
- - tools/releaser/src
- - tools/ygetparam
- - travel/avia/library/python/common
- - travel/avia/library/python/common/tests
- - travel/avia/library/python/common/tests/lib
- - travel/avia/library/python/common/utils
- - travel/avia/library/python/geosearch
- - travel/avia/library/python/geosearch/views
- - travel/avia/library/python/route_search
- - travel/avia/library/python/route_search/by_number
- - travel/rasp/bus/admin/utils
- - travel/rasp/bus/admin/utils/points
- - travel/rasp/library/python/common/tests
- - travel/rasp/library/python/common/tests/data_api/billing
- - travel/rasp/library/python/common/tests/data_api/dzv
- - travel/rasp/library/python/geosearch
- - travel/rasp/library/python/geosearch/views
- - travel/rasp/library/python/route_search
- - travel/rasp/library/python/route_search/by_number
- - travel/rasp/train_api
- - travel/rasp/train_api/scripts
- - travel/rasp/train_api/tests
- - travel/rasp/train_api/tests/tariffs/train/views
- - travel/rasp/train_api/tests/tariffs/train/wizard
- - travel/rasp/train_api/tests/train_purchase
- - travel/rasp/train_api/tests/train_purchase/tasks
- - travel/rasp/train_api/train_partners/base/train_details
- - vcs/svn/hooks/check_arc_commit
- - vh/telegram/sqs2media
- - vh/telegram/sqs2media/handlers
- - voicetech/asr/tools/asr_analyzer/lib
- - voicetech/common/voicetable/bin/filter_text
- - voicetech/common/voicetable/checks/bin/general_voicetable_check
- - voicetech/infra/gdpr_proxy/service
- - voicetech/tts/vh/utils
- - weather/workers/warnings/general
- - yabs/analytics/anomaly_analyzer/src
- - yabs/chat_bot/bot
- - yabs/event-utils
- - yabs/outdoor/libs/confirmed_booking
- - yabs/python-libs/common
- - yabs/qa/b2b_utils/bsserver_b2b/engine/bs_utils
- - yabs/qa/b2b_utils/bsserver_b2b/engine/mongo_utils
- - yabs/qa/b2b_utils/bsserver_b2b/engine/run
- - yabs/qa/b2b_utils/bsserver_b2b/engine/validate_scripts
- - yabs/qa/yabs_b2b_tank/qabs/b2b
- - yabs/sbyt/testing/core
- - yabs/server/cs/pylibs/partner_interface_monitor
- - yabs/server/cs/pylibs/settings
- - yabs/server/infra/bstrbufbuf/plugin
- - yabs/server/infra/trivial_cron
- - yabs/server/libs/py_markdown_strings
- - yabs/server/test/ft/BSDEV-73064
- - yabs/server/test/ft/BSSERVER-11503
- - yabs/server/test/ft/BSSERVER-14195
- - yabs/server/test/ft/BSSERVER-2122
- - yabs/server/test/ft/BSSERVER-2158
- - yabs/server/test/ft/BSSERVER-2454
- - yabs/server/test/ft/BSSERVER-2976
- - yabs/server/test/ft/BSSERVER-3895
- - yabs/server/test/ft/BSSERVER-9233
- - yabs/server/test/ft/NANPU-817
- - yabs/server/test/ft/checks
- - yabs/server/test/pylibs/qxl
- - yabs/server/test/pylibs/simulator
- - yabs/server/test/qabs_bsserver_pytest
- - yabs/server/test/tools/oneshot_tester/lib
- - yabs/stat/dropstat2/api/lib
- - yabs/stat/infra/clickhouse/repair_master_report
- - yabs/utils/autosupbs/pylibs/tasks_generator
- - yabs/utils/autosupbs/tests/tasks_generator
- - yabs/utils/yabs-mysql-binlog-audit/lib
- - yabs/vh/cms-pgaas/cms_common
- - yabs/vh/cms-pgaas/cms_common/biz
- - yabs/vh/cms-pgaas/content_importer/evsproduction/pattern_based
- - yabs/vh/cms-pgaas/content_ksiva_api/lib
- - yabs/vh/cms-pgaas/feed_miner
- - yabs/vh/cms-pgaas/feed_miner/downloader_middlewares
- - yabs/vh/frontend/test/vh_pytest
- - yabs/web-bins/export-stat/pcode_experiments/dill
- - yabs/web-bins/export-stat/pcode_experiments/issue
- - yaphone/advisor/project
- - yaphone/localization_admin/src
- - yaphone/localization_admin/src/models
- - yaphone/localization_admin/src/models/details
- - yaphone/localization_admin/src/models/support_info
- - yp/scheduler_simulator/simtool
- - yql/tools/docs/custom_mkdocs
- - yql/tools/docs/wiki2markdown
- - yql/tools/mrjob/test
- - yql/tools/qplayer
- - yweb/antimalware/mitb/mitb_monitor/lib
- - yweb/antiporno/analyze_yt_ops
- - yweb/antiporno/cp_conv/prepare_suggestive_cp_img_basket
- - yweb/antiporno/cp_conv/text_classif/bin
- - yweb/antiporno/nav/lib
- - yweb/antiporno/pyutil/url
- - yweb/antiporno/queries_manual_markup/lib
- - yweb/antiporno/query_analyzer/porn_query_config_parser
- - yweb/antiporno/site_reachability/lib/reachability_common
- - yweb/antiporno/top_queries_cleanup/yql/test
- - yweb/antispam/clean_web/tools/run_cm_targets
- - yweb/antispam/mascot/scripts/tcinet
- - yweb/antispam/seo_masks/py/static_impl/ut
- - yweb/antispam/tools/yql
- - yweb/antispam/webspam/collections/vw_model_applier/bin
- - yweb/antispam/webspam/export/tries/tools/upper_tries
- - yweb/antispam/ytgr/viewer
- - yweb/blender/newsletter/unused_formulas
- - yweb/blender/scripts/blender_viewer/conveyor_experiments/utils
- - yweb/blender/scripts/nirvana/jobs/calc_workflow_num_with_filters
- - yweb/blender/scripts/nirvana/jobs/join_features
- - yweb/blender/scripts/nirvana/jobs/train_sbs_model/utils
- - yweb/blogs/parsers/test
- - yweb/freshness/scripts/sport_pushes
- - yweb/freshness/scripts/sport_wizard
- - yweb/freshness/scripts/svn
- - yweb/freshness/scripts/trends/trendbot_tg/tg_handler
- - yweb/freshness/scripts/trends/trendbot_tg/tg_handler/foreground
- - yweb/news/hosts_differ
- - yweb/news/runtime_scripts/event_to_infra
- - yweb/news/tests
- - yweb/news/tests/export
- - yweb/news/tests/utils
- - yweb/robot/limbo
- - yweb/robot/limbo/imports
- - yweb/robot/metrics/pmusca/lib
- - yweb/sitelinks/astrolabe/build_bna/candidates/filter_by_region
- - yweb/sitelinks/scripts/sitelinks
- - yweb/structhtml/richsnippets/scripts/build_foto_recipes/prepare_to_deploy
- - yweb/verticals/scripts/sport_chats
- - yweb/verticals/scripts/sport_zen_updater/add_parsed_zen_urls
- - yweb/video/vparsrobot/v2/tests
- - yweb/video/vparsrobot/v2/tests-large
- - yweb/webdaemons/clickdaemon/tools/create_ammo_from_tcpdump
- - yweb/yasap/answers_nirvana/make_ammos
- - yweb/yasap/answers_quality/plagiarism/prepare_scraper_queries
- - yweb/yasap/pdb/backend/offline_views/history_calculator
- - yweb/yasap/pdb/food/normalizer
- - yweb/yasap/pdb/nirvana/gathered_boards_delta
- - yweb/younglings/tasks/YOUNGLINGS-516
- - zootopia/analytics/drive/source/drive/operations/support/registrations/reg_quality
- - zootopia/hub/vds
- - zootopia/hub/vds/onetime/orgthief/orgthief/parsers
- - zootopia/hub/vds/scripts
- - zootopia/hub/vds/services/velobike
- F841:
- ignore:
- - F841
- prefixes:
- - addappter/web/api
- - addappter/web/api/views
- - addappter/web/api/views/api
- - addappter/web/api/views/frontend
- - addappter/web/common
- - addappter/web/common/events
- - addappter/web/libs
- - addappter/web/libs/marshmallow
- - addappter/web/libs/walrus
- - addappter/web/testing/fixtures
- - adfox/infra/amacs_config/lib
- - ads/autobudget/metrics/equivalency_monitoring
- - ads/autobudget/metrics/example_monitoring
- - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabtools
- - ads/ml_engine/learn/result/local_extended_fstr/lib
- - ads/pytorch/lib/online_learning/production/processors/tsar_processor/lib
- - ads/quality/apc_check_py
- - ads/quality/embedding/tsar_tensor/ft_pool/lib
- - ads/quality/ltp/libs/build_pool
- - ads/sandbox_scripts/ggmonitor
- - alice/analytics/wer/g2p
- - alice/hollywood/tests/perf_test
- - alice/megamind/tests/library
- - alice/paskills/granet_server/tests
- - alice/uniproxy/bin/send-digest
- - alice/uniproxy/bin/uniproxy
- - alice/uniproxy/bin/uniproxy-delivery
- - alice/uniproxy/bin/uniproxy-subway
- - alice/uniproxy/bin/uniproxy-unistat
- - alice/uniproxy/bin/yabio-storage
- - alice/uniproxy/library/testing
- - alice/uniproxy/library/testing/mocks
- - alice/uniproxy/tools/balancer_top
- - apphost/conf/tests/blackbox
- - april/badb
- - april/badb/badb/db/mysql
- - april/web/bas/ca
- - april/web/bas/ca/forms
- - april/web/bas/collector
- - aurora/scripts/parsers/zoon_ru
- - balancer/test/functional/regexp_host
- - balancer/test/functional/regexp_path
- - billing/apikeys/apikeys
- - billing/apikeys/apikeys/mapper
- - billing/bcl/bcl
- - billing/refs/refs
- - cloud/marketplace/queue/yc_marketplace_queue
- - cloud/netinfra/rknfilter/yc_rkn_common
- - cloud/netinfra/rknfilter/yc_rkn_config_node
- - cmnt/tools/regression/request_classes
- - cv/imageproc/ocr/tools/database_extraction/nirvana/imgaug/src/augmenters
- - cv/short2long/training/yt_calc_factors
- - datacloud/log_reader/lib
- - devtools/local_cache/ac/tests/perfac
- - dict/bert/make/lib
- - dict/bert/make/lib/models
- - dict/bert/make/lib/tasks
- - dict/mt/analytics/sentence_breaking/translate_human_eval_comparison
- - dict/mt/g2p/asr/graph
- - dict/ontodb/onto_lib/sources/wikidata
- - dict/ontodb/proto/direct/ut
- - dict/ontodb/utils/export_src_codes
- - direct/infra/direct_zkcli
- - direct/infra/dt-dump-b2yt-data
- - district/logbroker_consumers/lib
- - district/logbroker_consumers/lib/yt
- - dj/tools/rthub_profiles/acceptance/acceptance_tool
- - dj/tools/viewer/custom/entity
- - edadeal/analytics/scripts/CashbackReport/CashbackReportLib
- - edadeal/analytics/scripts/CashbackReport/CashbackReportLib/CashbackReportLib
- - education/lib/common
- - education/schoolbook/analytics/adhoc/ANALITICSEDU-687
- - entity/ontodb/tasks/vloggers
- - entity/recommender/nirvana/operations/abt_experiments_launcher
- - entity/ugc/nirvana/ugcdb/support_hidden_import/lib/ut
- - entity/ugc/tools/update_photos
- - extsearch/audio/generative/py/uploader
- - extsearch/images/tools/nirvana/download_serps
- - extsearch/video/robot/cm/library/ut
- - extsearch/video/robot/cm/transcoder/cmpy/vh_index_dups_matcher
- - extsearch/video/robot/crawling/player_testing/services/live_proxy
- - extsearch/video/robot/previews/hitman/lost_preview_status
- - extsearch/video/robot/tools/library/python
- - extsearch/video/transcoder/per_title/handler
- - extsearch/video/transcoder/per_title/vmaf
- - geosuggest/conveyors/learn_pool/lst_weights/lib
- - haas/cmis/hwr_preorders
- - haas/graphite_sync/get_report
- - infra/deploy_queue_controller/lib
- - infra/dist/dmover/bin/dmover
- - infra/dist/dmover/lib
- - infra/dist/dmover/lib/internal
- - infra/dist/dmover/tests
- - infra/host-cpu-metrics
- - infra/host-cpu-metrics/host_metrics
- - infra/porto/api_py
- - infra/qyp/vmproxy/tests
- - infra/shawshank/tests
- - infra/skybit
- - infra/skybit/src
- - infra/yp_dns/tools/handle_duplicate_records/lib
- - infra/yp_quota_distributor/lib
- - keyboard/dict/nirvana/config_generator/blocks
- - keyboard/dict/synthetic_ngrams/synthetic_ngrams_builder
- - lbs/metrics/lbs_binbase_diff
- - library/python/bstr
- - logbroker/tools/manual/collect_capacity/base_2020
- - logbroker/tools/manual/collect_capacity/collect_pre
- - logbroker/tools/manual/collect_capacity/set_capacity_pre
- - logbroker/tools/manual/collect_capacity/topic_list
- - logbroker/tools/manual/create_logbroker_account/lib
- - logbroker/tools/startrek/st
- - mail/python/fake_mulcagate
- - mail/python/theatre/app
- - mail/python/theatre/app/log_helpers
- - mail/tools/sql_execute_per_shard/lib
- - maps/analytics/legacy/nile/statadhoc-8703-site-api-report
- - maps/automotive/carwashes/tests/src
- - maps/automotive/qa/metrics/common/ut
- - maps/automotive/tools/statistics_auto/pylib/track_match_finder
- - maps/infra/apiteka/config_uploader/tests
- - maps/infra/sedem/cli/tests/release
- - maps/infra/sedem/cli/tests/release/utils
- - maps/tools/matcher_quality/routes_to_geoms
- - maps_adv/common/shared_mock/tests
- - market/mobile_validator/mt/env
- - market/mstat/ch-cache/tests
- - market/sre/services/cema-proxy/lib
- - market/sre/services/cema/lib
- - market/sre/services/cema/lib/classes
- - market/yamarec/yamarec/yamarec1
- - market/yamarec/yamarec/yamarec1/tasks
- - metrika/admin/brb/server/lib
- - metrika/admin/maas/bin/backend
- - metrika/admin/maas/bin/monitoring/maas_instances_memory
- - metrika/admin/maas/lib/core/common
- - metrika/admin/maas/lib/core/daemon
- - metrika/admin/maas/lib/core/service
- - metrika/admin/python/duty/bot/lib
- - metrika/admin/python/scripts/jrun
- - metrika/tasklets/conductor/impl
- - milab/lib/i2tclient/python
- - ml/tensorflow/tfnn/tests
- - mlp/mail/aspam/experiments/MLP_231
- - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/inbound
- - mlp/mail/aspam/nirvana_operations/conveyor/calc_dsats
- - modadvert/libs/connectors/loggers
- - modadvert/libs/http
- - modadvert/libs/laas/workers/domain_threats
- - modadvert/libs/laas/workers/features_from_href
- - modadvert/libs/utils/dictutils/ut
- - modadvert/programs/cv_app/libs
- - modadvert/tools/accept_campaigns
- - mssngr/botfarm/src/bot
- - music/analytics/jam-sox/music_lib/financial_reports/tests/unit
- - music/tools/download-info
+ - portal/tools/morda-release/server
+ - quality/ab_testing/abt_resources_lib/loaders/tests/long_metrics
+ - quality/ab_testing/cofe/bin/cofe_ctl
+ - quality/ab_testing/cofe/projects/disk
+ - quality/ab_testing/cofe/projects/disk/utils
+ - quality/ab_testing/cofe/projects/geo/geoadv
+ - quality/ab_testing/scripts/kati
+ - quality/ab_testing/scripts/kati/lib
+ - quality/antifraud/scripts/prod/traffic_chains
+ - quality/functionality/chats/feedback/src/feedback/core
+ - quality/functionality/chats/feedback/src/feedback/core/settings
+ - quality/functionality/chats/floyd/src/floyd/core
+ - quality/functionality/chats/floyd/src/floyd/core/settings
+ - quality/functionality/entity_search/factqueries/instructions/prepare_facts
+ - quality/functionality/entity_search/factqueries/tools/extract_fact/scripts/get_encyc_queries
+ - quality/functionality/facts/common/goldensets/actuality/gen_population_tasks
+ - quality/functionality/facts/recheck/calc_actuality_factors
+ - quality/functionality/parsepl/libs/parsers/tests
+ - quality/functionality/parsepl/nirvana/build_market_parsers/src
+ - quality/functionality/parsepl/toloka/configs_for_parsers
+ - quality/functionality/parsepl/toloka/configs_for_parsers/ut
+ - quality/functionality/rtx/scripts/social-serp/nano_squeeze
+ - quality/functionality/scripts/nirvana/jobs/casper/helpers/ytreader
+ - quality/functionality/snippets/social_bna
+ - quality/functionality/snippets/top_hosts/plugins/calculator888
+ - quality/functionality/turbo/analytics/ecommerce/lib/page_classifier
+ - quality/functionality/turbo/auto_cleanup/lib
+ - quality/functionality/turbo/rss/parser_job/tests/small
+ - quality/functionality/turbo/tools/infinity/tools/wmc
+ - quality/functionality/turbo/tools/rediff
+ - quality/functionality/turbo/tools/tdm
+ - quality/functionality/turbo/yandex_pogoda
+ - quality/functionality/unstructured/yt_concatenator/tests
+ - quality/nirvana_tools/conveyor_operations/asgen/config
+ - quality/nirvana_tools/conveyor_operations/eval_feature/eval_feature_viewer
+ - quality/nirvana_tools/conveyor_operations/meta_formula_bfmf_bundle/append_formula_factors
+ - quality/nirvana_tools/conveyor_operations/meta_formula_bfmf_bundle/get_cache_from_appended_features
+ - quality/query_expansions/proc/covfefe/makefile_update
+ - quality/query_expansions/tools/tests/test_dummy_process
+ - quality/relev_tools/dsat/find_words_with_absent_forms
+ - quality/relev_tools/lboost_ops/nirvana/operations/main/tests
+ - quality/trailer/suggest/services/maps/conveyors/doc_features/chronostat
+ - quality/trailer/suggest/services/maps/conveyors/pool/make_pointwise_pool
+ - quality/trailer/suggest/services/maps/tools/retrieval_test
+ - quality/trailer/suggest_dict/suggest_framework/tools
+ - quality/user_sessions/market/custom_statistics
+ - quality/user_sessions/market/custom_statistics/cust/abtypes/refuses
+ - quality/userdata/scripts
+ - quality/userdata/scripts/state_validation
+ - quality/webfresh/learn/half_hour/build_union_prs
+ - quality/webfresh/libraries/prs
+ - quality/webfresh/libraries/prs/tests
+ - quality/webfresh/metrics/aggregate_serps
+ - quality/webfresh/metrics/bad_urls_stats
+ - quality/webfresh/metrics/build_formulas_config
+ - quality/yaqlib/yaqlint
+ - regulargeo/tools
+ - rnd_toolbox/deckard
+ - rnd_toolbox/deckard/storage
+ - rnd_toolbox/hwlib
+ - robot/favicon/python
+ - robot/jupiter/library/python/sample
+ - robot/jupiter/scripts
+ - robot/jupiter/viewers/galileo
+ - robot/kwyt/scripts/sampling_data
+ - robot/lemur/scripts/common
+ - robot/metrics/forums_sbr/bin/get_forum_urls
+ - robot/metrics/forums_sbr/bin/get_urls_sample
+ - robot/metrics/forums_sbr/bin/parse_zora_result
+ - robot/metrics/fresh_sbr/mk_fresh_serp
+ - robot/metrics/rotor_missed_words_metric/bin/gemini_canonize
+ - robot/metrics/speed_sbr/remove_fresh_hosts
+ - robot/quality/robotrank/mk_learn_pool/lib
+ - robot/salmon_agent/counters
+ - robot/salmon_agent/utils
+ - rt-research/broadmatching/mr/IRT-1517
+ - rt-research/broadmatching/scripts/dyn-smart-banners/update_dyn_trashfilter
+ - rt-research/broadmatching/scripts/pylib
+ - rt-research/broadmatching/scripts/pylib/bm
+ - rt-research/broadmatching/scripts/yt/catalogia_mapper
+ - rt-research/broadmatching/scripts/yt/cdict_generator
+ - rt-research/broadmatching/scripts/yt/dyn-sources/generate_dse_banners
+ - rt-research/broadmatching/scripts/yt/dyn-sources/generate_dse_banners/generate_filtered_links
+ - rt-research/broadmatching/scripts/yt/dyn-sources/yt_prepare_dyn_sources
+ - rt-research/multik/deploy/deploy
+ - saas/tools/devops/lib
+ - saas/tools/devops/lib23/tests/py2
+ - saas/tools/devops/lib23/tests/py23
+ - samogon/libs/sandbox
+ - sandbox/common/projects_handler
+ - sandbox/projects/BuildBegemotLightTestConfig
+ - sandbox/projects/BuildNewsPackage
+ - sandbox/projects/BuildSportProxyData
+ - sandbox/projects/BuildYobject
+ - sandbox/projects/CheckFreshDocuments
+ - sandbox/projects/CompareNewsdResponses
+ - sandbox/projects/CompareYmakeDump
+ - sandbox/projects/ConvertVideo
+ - sandbox/projects/DeployVideoMmetaShard
+ - sandbox/projects/GetAdvquickDatabase
+ - sandbox/projects/GetFusionMiddlesearchResponses
+ - sandbox/projects/GetPokazometerDatabase
+ - sandbox/projects/IexBuildPackages
+ - sandbox/projects/IexImportPatterns
+ - sandbox/projects/LandingConstructor
+ - sandbox/projects/LandingConstructor/Stat/Jobs
+ - sandbox/projects/MediaLib
+ - sandbox/projects/MediaLib/shardmap
+ - sandbox/projects/MixQueriesExperimentsRegions
+ - sandbox/projects/PersonalPoiGenerator
+ - sandbox/projects/PersonalPoiGenerator/PoisDumper
+ - sandbox/projects/ReleaseBalancerConfigGenerator
+ - sandbox/projects/ReleaseConfigGeneratorService
+ - sandbox/projects/ReleaseMediaShardmaps
+ - sandbox/projects/ReportDataRuntime
+ - sandbox/projects/ReportDataRuntimeItem
+ - sandbox/projects/ReportDataRuntimeRT
+ - sandbox/projects/ReportDataRuntimeTags
+ - sandbox/projects/ReportRuleTestFull
+ - sandbox/projects/RunNewsLoadtest
+ - sandbox/projects/SOC/YtProxyAnalyzeCommandParams
+ - sandbox/projects/SOC/YtRawMasterLogAnalysis
+ - sandbox/projects/SpawnTestConfigGenerator
+ - sandbox/projects/TaxiSecurity
+ - sandbox/projects/TaxiSecurity/BadLogsAnalyzer
+ - sandbox/projects/TaxiSecurity/YodaxAnalyzer
+ - sandbox/projects/TestFrontMetricsLogs
+ - sandbox/projects/TestFrontMetricsLogs/modules
+ - sandbox/projects/TestReportPerformance
+ - sandbox/projects/TickenatorBatchProcessing
+ - sandbox/projects/TickenatorBatchProcessing/YasmScreenshoter
+ - sandbox/projects/Travel/tasks/tools
+ - sandbox/projects/Ufo
+ - sandbox/projects/Ufo/CI
+ - sandbox/projects/UpdateConfigGeneratorDb
+ - sandbox/projects/UpdateMapsWizardPpoData
+ - sandbox/projects/UpdateTestenvNewsdResources
+ - sandbox/projects/UrlsByShowCounters/executable/lib
+ - sandbox/projects/VpsAmmo
+ - sandbox/projects/YabsDebuilder
+ - sandbox/projects/YabsDebuilderDev
+ - sandbox/projects/YabsServerStatPerformance
+ - sandbox/projects/adfox/adfox_ui/testpalm/testrunCreate
+ - sandbox/projects/alice_evo
+ - sandbox/projects/alice_evo/AliceEvoIntegrationTestsWrapper
+ - sandbox/projects/antirobot
+ - sandbox/projects/antirobot/AsnNames
+ - sandbox/projects/antirobot/LoadTesting
+ - sandbox/projects/autobudget/autobudget_lib
+ - sandbox/projects/avia/avia_statistics/update_alternative_routes_prices
+ - sandbox/projects/avia/avia_statistics/update_flights
+ - sandbox/projects/avia/avia_statistics/update_median_prices
+ - sandbox/projects/avia/avia_statistics/update_popular_months
+ - sandbox/projects/avia/avia_statistics/update_return_ticket_prices
+ - sandbox/projects/avia/avia_statistics/update_route_crosslinks
+ - sandbox/projects/avia/flight_status_registrar/FlightStatsRegistrar
+ - sandbox/projects/avia/flight_status_registrar/OagFlightsRegistrar
+ - sandbox/projects/avia/flight_status_registrar/VariFlightRegistrar
+ - sandbox/projects/avia/log_unknown_fare_codes
+ - sandbox/projects/avia/travel_avia_dump_resource/task
+ - sandbox/projects/bsyeti
+ - sandbox/projects/canvas
+ - sandbox/projects/canvas/video_constructor_utils
+ - sandbox/projects/cloud/yfm
+ - sandbox/projects/common/compare_upper_results
+ - sandbox/projects/common/fusion
+ - sandbox/projects/common/gencfg
+ - sandbox/projects/common/market_report
+ - sandbox/projects/common/mobilesearch
+ - sandbox/projects/common/mobilesearch/startrek_client
+ - sandbox/projects/common/yabs
+ - sandbox/projects/gencfg
+ - sandbox/projects/gencfg/BuildConfigGenerator2
+ - sandbox/projects/gencfg/GencfgMonitoringCharts
+ - sandbox/projects/gencfg/ReleaseConfigGenerator2
+ - sandbox/projects/gencfg/workflow
+ - sandbox/projects/health/acceptance_begemot_graph
+ - sandbox/projects/laas
+ - sandbox/projects/laas/CollectTestGeobases
+ - sandbox/projects/logs/HashedSessionsDiff
+ - sandbox/projects/logs/TestRalibPerfomance
+ - sandbox/projects/market/infra/helpers
+ - sandbox/projects/market/infra/helpers/changes_helper
+ - sandbox/projects/masstransit/MapsMasstransitImportVehicleTasks
+ - sandbox/projects/media
+ - sandbox/projects/media/admins/mysqlcopydb
+ - sandbox/projects/media/kp-front-nginx/config-validation
+ - sandbox/projects/media_crm/tasks
+ - sandbox/projects/media_crm/tasks/media_crm_deploy
+ - sandbox/projects/metrika/mobile/sdk/helpers
+ - sandbox/projects/mssngr/rtc
+ - sandbox/projects/mssngr/runtime/MssngrRouterLoadTest
+ - sandbox/projects/music
+ - sandbox/projects/music/MusicExportYdbToYt
+ - sandbox/projects/music/ReleaseMusic
+ - sandbox/projects/music/deployment/helpers
+ - sandbox/projects/news
+ - sandbox/projects/news/CompareNewsAnnotatorResponses
+ - sandbox/projects/news/UpdateRTHubAdvWidgetResources
+ - sandbox/projects/ofd/backend/ofd_backend_package_build
+ - sandbox/projects/ofd/backend/ofd_backend_run_tests
+ - sandbox/projects/ofd/notifier/ofd_notifier_package_build
+ - sandbox/projects/ofd/runtime/ofd_runtime_package_build
+ - sandbox/projects/ofd/runtime/ofd_runtime_run_tests
+ - sandbox/projects/ofd/tasks/ofd_tasks_package_build
+ - sandbox/projects/porto/BuildPortoLayer
+ - sandbox/projects/porto/BuildPortoLayerTmp
+ - sandbox/projects/qafw
+ - sandbox/projects/qafw/ansible
+ - sandbox/projects/reconf
+ - sandbox/projects/sandbox_ci/sandbox_ci_compare_load_test
+ - sandbox/projects/sandbox_ci/task
+ - sandbox/projects/tests
+ - sandbox/projects/turbo
+ - sandbox/projects/turbo/CompareTurboResponses
+ - sandbox/projects/turbo/SampleForTurbo
+ - sandbox/projects/vh
+ - sandbox/projects/vh/faas/FaasConvertVideoVodTest
+ - sandbox/projects/vh/frontend/count_diff
+ - sandbox/projects/vh/frontend/generate_requests_from_yt_logs
+ - sandbox/projects/vins
+ - sandbox/projects/vins/AliceBegemotMegamindPerfTest
+ - sandbox/projects/vins/BuildVinsCustomEntity
+ - sandbox/projects/vins/MegamindPerfTest
+ - sandbox/projects/vins/VinsPerfTest
+ - sandbox/projects/websearch/CheckPrechargeAfterMemoryMap
+ - sandbox/projects/yane/ParseYanswerFactLogs
+ - sandbox/sdk2
+ - sandbox/sdk2/vcs
+ - sandbox/serviceapi
+ - sandbox/serviceapi/handlers
+ - sandbox/yasandbox/database/clickhouse
+ - skynet/kernel
+ - skynet/kernel/util/sys/user
+ - skynet/library
+ - skynet/library/tasks
+ - smart_devices/crash_analytics/tools/minidump_analyzer
+ - smart_devices/tools/launcher2/tests/restarts
+ - smm/lib/models/sklearn
+ - sprav/mining/botanik_miner
+ - statbox/qb2
+ - statbox/qb2/qb2_extensions/api/v1/extractors
+ - statbox/qb2/qb2_extensions/api/v1/extractors/pool
+ - statbox/statkey/jam/jobs/cubes/desktop_installs/v2
+ - strm/generate/lib/generate
+ - sup/stat/find_pushes
+ - talents/nlu/scripts/train/geoname
+ - testenv/jobs/rtyserver
+ - tools/mkdocs_builder/lib
+ - tools/mkdocs_builder/mkdocs_yandex
+ - tools/mkdocs_builder/mkdocs_yandex/mkdocs_yandex
+ - tools/mkdocs_builder/mkdocs_yandex/mkdocs_yandex/ext/markdown
+ - tools/releaser/src
+ - tools/ygetparam
+ - travel/avia/library/python/common
+ - travel/avia/library/python/common/tests
+ - travel/avia/library/python/common/tests/lib
+ - travel/avia/library/python/common/utils
+ - travel/avia/library/python/geosearch
+ - travel/avia/library/python/geosearch/views
+ - travel/avia/library/python/route_search
+ - travel/avia/library/python/route_search/by_number
+ - travel/rasp/bus/admin/utils
+ - travel/rasp/bus/admin/utils/points
+ - travel/rasp/library/python/common/tests
+ - travel/rasp/library/python/common/tests/data_api/billing
+ - travel/rasp/library/python/common/tests/data_api/dzv
+ - travel/rasp/library/python/geosearch
+ - travel/rasp/library/python/geosearch/views
+ - travel/rasp/library/python/route_search
+ - travel/rasp/library/python/route_search/by_number
+ - travel/rasp/train_api
+ - travel/rasp/train_api/scripts
+ - travel/rasp/train_api/tests
+ - travel/rasp/train_api/tests/tariffs/train/views
+ - travel/rasp/train_api/tests/tariffs/train/wizard
+ - travel/rasp/train_api/tests/train_purchase
+ - travel/rasp/train_api/tests/train_purchase/tasks
+ - travel/rasp/train_api/train_partners/base/train_details
+ - vcs/svn/hooks/check_arc_commit
+ - vh/telegram/sqs2media
+ - vh/telegram/sqs2media/handlers
+ - voicetech/asr/tools/asr_analyzer/lib
+ - voicetech/common/voicetable/bin/filter_text
+ - voicetech/common/voicetable/checks/bin/general_voicetable_check
+ - voicetech/infra/gdpr_proxy/service
+ - voicetech/tts/vh/utils
+ - weather/workers/warnings/general
+ - yabs/analytics/anomaly_analyzer/src
+ - yabs/chat_bot/bot
+ - yabs/event-utils
+ - yabs/outdoor/libs/confirmed_booking
+ - yabs/python-libs/common
+ - yabs/qa/b2b_utils/bsserver_b2b/engine/bs_utils
+ - yabs/qa/b2b_utils/bsserver_b2b/engine/mongo_utils
+ - yabs/qa/b2b_utils/bsserver_b2b/engine/run
+ - yabs/qa/b2b_utils/bsserver_b2b/engine/validate_scripts
+ - yabs/qa/yabs_b2b_tank/qabs/b2b
+ - yabs/sbyt/testing/core
+ - yabs/server/cs/pylibs/partner_interface_monitor
+ - yabs/server/cs/pylibs/settings
+ - yabs/server/infra/bstrbufbuf/plugin
+ - yabs/server/infra/trivial_cron
+ - yabs/server/libs/py_markdown_strings
+ - yabs/server/test/ft/BSDEV-73064
+ - yabs/server/test/ft/BSSERVER-11503
+ - yabs/server/test/ft/BSSERVER-14195
+ - yabs/server/test/ft/BSSERVER-2122
+ - yabs/server/test/ft/BSSERVER-2158
+ - yabs/server/test/ft/BSSERVER-2454
+ - yabs/server/test/ft/BSSERVER-2976
+ - yabs/server/test/ft/BSSERVER-3895
+ - yabs/server/test/ft/BSSERVER-9233
+ - yabs/server/test/ft/NANPU-817
+ - yabs/server/test/ft/checks
+ - yabs/server/test/pylibs/qxl
+ - yabs/server/test/pylibs/simulator
+ - yabs/server/test/qabs_bsserver_pytest
+ - yabs/server/test/tools/oneshot_tester/lib
+ - yabs/stat/dropstat2/api/lib
+ - yabs/stat/infra/clickhouse/repair_master_report
+ - yabs/utils/autosupbs/pylibs/tasks_generator
+ - yabs/utils/autosupbs/tests/tasks_generator
+ - yabs/utils/yabs-mysql-binlog-audit/lib
+ - yabs/vh/cms-pgaas/cms_common
+ - yabs/vh/cms-pgaas/cms_common/biz
+ - yabs/vh/cms-pgaas/content_importer/evsproduction/pattern_based
+ - yabs/vh/cms-pgaas/content_ksiva_api/lib
+ - yabs/vh/cms-pgaas/feed_miner
+ - yabs/vh/cms-pgaas/feed_miner/downloader_middlewares
+ - yabs/vh/frontend/test/vh_pytest
+ - yabs/web-bins/export-stat/pcode_experiments/dill
+ - yabs/web-bins/export-stat/pcode_experiments/issue
+ - yaphone/advisor/project
+ - yaphone/localization_admin/src
+ - yaphone/localization_admin/src/models
+ - yaphone/localization_admin/src/models/details
+ - yaphone/localization_admin/src/models/support_info
+ - yp/scheduler_simulator/simtool
+ - yql/tools/docs/custom_mkdocs
+ - yql/tools/docs/wiki2markdown
+ - yql/tools/mrjob/test
+ - yql/tools/qplayer
+ - yweb/antimalware/mitb/mitb_monitor/lib
+ - yweb/antiporno/analyze_yt_ops
+ - yweb/antiporno/cp_conv/prepare_suggestive_cp_img_basket
+ - yweb/antiporno/cp_conv/text_classif/bin
+ - yweb/antiporno/nav/lib
+ - yweb/antiporno/pyutil/url
+ - yweb/antiporno/queries_manual_markup/lib
+ - yweb/antiporno/query_analyzer/porn_query_config_parser
+ - yweb/antiporno/site_reachability/lib/reachability_common
+ - yweb/antiporno/top_queries_cleanup/yql/test
+ - yweb/antispam/clean_web/tools/run_cm_targets
+ - yweb/antispam/mascot/scripts/tcinet
+ - yweb/antispam/seo_masks/py/static_impl/ut
+ - yweb/antispam/tools/yql
+ - yweb/antispam/webspam/collections/vw_model_applier/bin
+ - yweb/antispam/webspam/export/tries/tools/upper_tries
+ - yweb/antispam/ytgr/viewer
+ - yweb/blender/newsletter/unused_formulas
+ - yweb/blender/scripts/blender_viewer/conveyor_experiments/utils
+ - yweb/blender/scripts/nirvana/jobs/calc_workflow_num_with_filters
+ - yweb/blender/scripts/nirvana/jobs/join_features
+ - yweb/blender/scripts/nirvana/jobs/train_sbs_model/utils
+ - yweb/blogs/parsers/test
+ - yweb/freshness/scripts/sport_pushes
+ - yweb/freshness/scripts/sport_wizard
+ - yweb/freshness/scripts/svn
+ - yweb/freshness/scripts/trends/trendbot_tg/tg_handler
+ - yweb/freshness/scripts/trends/trendbot_tg/tg_handler/foreground
+ - yweb/news/hosts_differ
+ - yweb/news/runtime_scripts/event_to_infra
+ - yweb/news/tests
+ - yweb/news/tests/export
+ - yweb/news/tests/utils
+ - yweb/robot/limbo
+ - yweb/robot/limbo/imports
+ - yweb/robot/metrics/pmusca/lib
+ - yweb/sitelinks/astrolabe/build_bna/candidates/filter_by_region
+ - yweb/sitelinks/scripts/sitelinks
+ - yweb/structhtml/richsnippets/scripts/build_foto_recipes/prepare_to_deploy
+ - yweb/verticals/scripts/sport_chats
+ - yweb/verticals/scripts/sport_zen_updater/add_parsed_zen_urls
+ - yweb/video/vparsrobot/v2/tests
+ - yweb/video/vparsrobot/v2/tests-large
+ - yweb/webdaemons/clickdaemon/tools/create_ammo_from_tcpdump
+ - yweb/yasap/answers_nirvana/make_ammos
+ - yweb/yasap/answers_quality/plagiarism/prepare_scraper_queries
+ - yweb/yasap/pdb/backend/offline_views/history_calculator
+ - yweb/yasap/pdb/food/normalizer
+ - yweb/yasap/pdb/nirvana/gathered_boards_delta
+ - yweb/younglings/tasks/YOUNGLINGS-516
+ - zootopia/analytics/drive/source/drive/operations/support/registrations/reg_quality
+ - zootopia/hub/vds
+ - zootopia/hub/vds/onetime/orgthief/orgthief/parsers
+ - zootopia/hub/vds/scripts
+ - zootopia/hub/vds/services/velobike
+ F841:
+ ignore:
+ - F841
+ prefixes:
+ - addappter/web/api
+ - addappter/web/api/views
+ - addappter/web/api/views/api
+ - addappter/web/api/views/frontend
+ - addappter/web/common
+ - addappter/web/common/events
+ - addappter/web/libs
+ - addappter/web/libs/marshmallow
+ - addappter/web/libs/walrus
+ - addappter/web/testing/fixtures
+ - adfox/infra/amacs_config/lib
+ - ads/autobudget/metrics/equivalency_monitoring
+ - ads/autobudget/metrics/example_monitoring
+ - ads/libs/py_mapreduce/yabs-mapreduce-modules/py-modules/yabs/tabtools
+ - ads/ml_engine/learn/result/local_extended_fstr/lib
+ - ads/pytorch/lib/online_learning/production/processors/tsar_processor/lib
+ - ads/quality/apc_check_py
+ - ads/quality/embedding/tsar_tensor/ft_pool/lib
+ - ads/quality/ltp/libs/build_pool
+ - ads/sandbox_scripts/ggmonitor
+ - alice/analytics/wer/g2p
+ - alice/hollywood/tests/perf_test
+ - alice/megamind/tests/library
+ - alice/paskills/granet_server/tests
+ - alice/uniproxy/bin/send-digest
+ - alice/uniproxy/bin/uniproxy
+ - alice/uniproxy/bin/uniproxy-delivery
+ - alice/uniproxy/bin/uniproxy-subway
+ - alice/uniproxy/bin/uniproxy-unistat
+ - alice/uniproxy/bin/yabio-storage
+ - alice/uniproxy/library/testing
+ - alice/uniproxy/library/testing/mocks
+ - alice/uniproxy/tools/balancer_top
+ - apphost/conf/tests/blackbox
+ - april/badb
+ - april/badb/badb/db/mysql
+ - april/web/bas/ca
+ - april/web/bas/ca/forms
+ - april/web/bas/collector
+ - aurora/scripts/parsers/zoon_ru
+ - balancer/test/functional/regexp_host
+ - balancer/test/functional/regexp_path
+ - billing/apikeys/apikeys
+ - billing/apikeys/apikeys/mapper
+ - billing/bcl/bcl
+ - billing/refs/refs
+ - cloud/marketplace/queue/yc_marketplace_queue
+ - cloud/netinfra/rknfilter/yc_rkn_common
+ - cloud/netinfra/rknfilter/yc_rkn_config_node
+ - cmnt/tools/regression/request_classes
+ - cv/imageproc/ocr/tools/database_extraction/nirvana/imgaug/src/augmenters
+ - cv/short2long/training/yt_calc_factors
+ - datacloud/log_reader/lib
+ - devtools/local_cache/ac/tests/perfac
+ - dict/bert/make/lib
+ - dict/bert/make/lib/models
+ - dict/bert/make/lib/tasks
+ - dict/mt/analytics/sentence_breaking/translate_human_eval_comparison
+ - dict/mt/g2p/asr/graph
+ - dict/ontodb/onto_lib/sources/wikidata
+ - dict/ontodb/proto/direct/ut
+ - dict/ontodb/utils/export_src_codes
+ - direct/infra/direct_zkcli
+ - direct/infra/dt-dump-b2yt-data
+ - district/logbroker_consumers/lib
+ - district/logbroker_consumers/lib/yt
+ - dj/tools/rthub_profiles/acceptance/acceptance_tool
+ - dj/tools/viewer/custom/entity
+ - edadeal/analytics/scripts/CashbackReport/CashbackReportLib
+ - edadeal/analytics/scripts/CashbackReport/CashbackReportLib/CashbackReportLib
+ - education/lib/common
+ - education/schoolbook/analytics/adhoc/ANALITICSEDU-687
+ - entity/ontodb/tasks/vloggers
+ - entity/recommender/nirvana/operations/abt_experiments_launcher
+ - entity/ugc/nirvana/ugcdb/support_hidden_import/lib/ut
+ - entity/ugc/tools/update_photos
+ - extsearch/audio/generative/py/uploader
+ - extsearch/images/tools/nirvana/download_serps
+ - extsearch/video/robot/cm/library/ut
+ - extsearch/video/robot/cm/transcoder/cmpy/vh_index_dups_matcher
+ - extsearch/video/robot/crawling/player_testing/services/live_proxy
+ - extsearch/video/robot/previews/hitman/lost_preview_status
+ - extsearch/video/robot/tools/library/python
+ - extsearch/video/transcoder/per_title/handler
+ - extsearch/video/transcoder/per_title/vmaf
+ - geosuggest/conveyors/learn_pool/lst_weights/lib
+ - haas/cmis/hwr_preorders
+ - haas/graphite_sync/get_report
+ - infra/deploy_queue_controller/lib
+ - infra/dist/dmover/bin/dmover
+ - infra/dist/dmover/lib
+ - infra/dist/dmover/lib/internal
+ - infra/dist/dmover/tests
+ - infra/host-cpu-metrics
+ - infra/host-cpu-metrics/host_metrics
+ - infra/porto/api_py
+ - infra/qyp/vmproxy/tests
+ - infra/shawshank/tests
+ - infra/skybit
+ - infra/skybit/src
+ - infra/yp_dns/tools/handle_duplicate_records/lib
+ - infra/yp_quota_distributor/lib
+ - keyboard/dict/nirvana/config_generator/blocks
+ - keyboard/dict/synthetic_ngrams/synthetic_ngrams_builder
+ - lbs/metrics/lbs_binbase_diff
+ - library/python/bstr
+ - logbroker/tools/manual/collect_capacity/base_2020
+ - logbroker/tools/manual/collect_capacity/collect_pre
+ - logbroker/tools/manual/collect_capacity/set_capacity_pre
+ - logbroker/tools/manual/collect_capacity/topic_list
+ - logbroker/tools/manual/create_logbroker_account/lib
+ - logbroker/tools/startrek/st
+ - mail/python/fake_mulcagate
+ - mail/python/theatre/app
+ - mail/python/theatre/app/log_helpers
+ - mail/tools/sql_execute_per_shard/lib
+ - maps/analytics/legacy/nile/statadhoc-8703-site-api-report
+ - maps/automotive/carwashes/tests/src
+ - maps/automotive/qa/metrics/common/ut
+ - maps/automotive/tools/statistics_auto/pylib/track_match_finder
+ - maps/infra/apiteka/config_uploader/tests
+ - maps/infra/sedem/cli/tests/release
+ - maps/infra/sedem/cli/tests/release/utils
+ - maps/tools/matcher_quality/routes_to_geoms
+ - maps_adv/common/shared_mock/tests
+ - market/mobile_validator/mt/env
+ - market/mstat/ch-cache/tests
+ - market/sre/services/cema-proxy/lib
+ - market/sre/services/cema/lib
+ - market/sre/services/cema/lib/classes
+ - market/yamarec/yamarec/yamarec1
+ - market/yamarec/yamarec/yamarec1/tasks
+ - metrika/admin/brb/server/lib
+ - metrika/admin/maas/bin/backend
+ - metrika/admin/maas/bin/monitoring/maas_instances_memory
+ - metrika/admin/maas/lib/core/common
+ - metrika/admin/maas/lib/core/daemon
+ - metrika/admin/maas/lib/core/service
+ - metrika/admin/python/duty/bot/lib
+ - metrika/admin/python/scripts/jrun
+ - metrika/tasklets/conductor/impl
+ - milab/lib/i2tclient/python
+ - ml/tensorflow/tfnn/tests
+ - mlp/mail/aspam/experiments/MLP_231
+ - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/inbound
+ - mlp/mail/aspam/nirvana_operations/conveyor/calc_dsats
+ - modadvert/libs/connectors/loggers
+ - modadvert/libs/http
+ - modadvert/libs/laas/workers/domain_threats
+ - modadvert/libs/laas/workers/features_from_href
+ - modadvert/libs/utils/dictutils/ut
+ - modadvert/programs/cv_app/libs
+ - modadvert/tools/accept_campaigns
+ - mssngr/botfarm/src/bot
+ - music/analytics/jam-sox/music_lib/financial_reports/tests/unit
+ - music/tools/download-info
- passport/backend/library/distprim
- passport/backend/library/distprim/threading
- passport/backend/meltingpot/utils
- - plus/gift/gift
- - plus/gift/gift/admin
- - quality/ab_testing/cofe/projects/zalogin
- - quality/functionality/chats/common/utils
- - quality/functionality/chats/feedback/src/feedback/api
- - quality/functionality/chats/feedback/src/feedback/api/v1
- - quality/functionality/rtx/server/plugin
- - quality/functionality/snippets/rkn_filter
- - quality/functionality/turbo/analytics/quality/cms_report
- - quality/functionality/turbo/analytics/quality/samplers/cms_sampler
- - quality/functionality/turbo/autoparser/flags_postprocess/tests/medium
- - quality/functionality/turbo/tools/pq_delete_docs
- - quality/functionality/turbo/tools/tdm
- - quality/logs/mousetrack_lib/python/tests
- - quality/relev_tools/choice_screen/create_stove
- - quality/trailer/suggest/toloka_processor/report_yt
- - quality/webfresh/metrics/serpsets/raw_serpset_patcher/tests
- - rnd_toolbox/deckard
- - rnd_toolbox/nyamm
- - robot/library/yuppie
- - robot/library/yuppie/modules
- - robot/metrics/speed_sbr/nirvana
- - robot/quality/nirvana/refererrank/premetric_map
- - robot/quality/samovar_conveyour/tools/get_samples
- - robot/quality/sendlink_conveyour/tools/max_rank_acceptance
- - robot/research/eval_nirvana_graph
- - rt-research/broadmatching/scripts/yt/dyn-sources/yt_prepare_dyn_sources
- - rt-research/multik/tools/jupyter
- - rtmapreduce/tests/recipes/rtmr_processing_recipe
- - saas/tools/devops/check_backup
- - saas/tools/devops/lib23
- - saas/tools/refresh/import_rtyserver
- - sandbox/common/upload
- - sandbox/projects/alice_evo
- - sandbox/projects/avia/mysql_sync_testing_with_prod
- - sandbox/projects/balancer/load/BalancerLoadCompare
- - sandbox/projects/bitbucket/GBGAdaptor
- - sandbox/projects/devops
- - sandbox/projects/devops/HardwareGencfgGroups
- - sandbox/projects/dj/DjCompileConfig
- - sandbox/projects/geoadv
- - sandbox/projects/geoadv/ReleasePrices
- - sandbox/projects/geosearch/snippets
- - sandbox/projects/geosearch/snippets/AddrsSnippetsTask
- - sandbox/projects/hollywood
- - sandbox/projects/hollywood/fast_data/BuildAndDeployHollywoodFastData
- - sandbox/projects/logbroker/tasks/BuildSeveralResources
- - sandbox/projects/masstransit/MapsMasstransitImportVehicleTasks
- - sandbox/projects/music/deployment/MusicRestoreMdb/YandexCloudDatabase
- - sandbox/projects/music/deployment/helpers
- - sandbox/projects/news
- - sandbox/projects/news/RunNewsComplexLoadtests
- - sandbox/projects/news/runtime_tests
- - sandbox/projects/news/runtime_tests/CompareNewsApphostServiceResponses
- - sandbox/projects/saas/backups/DetachServiceIndex2
- - sandbox/projects/sport_wizard/DeploySportProxyData
- - sandbox/projects/video
- - sandbox/projects/video/priemka/VideoRobotPriemkaSimple
- - sandbox/projects/vqe/measure_performance
- - sandbox/projects/yt
- - sandbox/projects/yt/ci_tasks/packages/YtBuildCronPackage
- - sandbox/projects/yt/layers_tasks/YtBuildSpravLayerTask
- - search/metrics/monitoring/core
- - search/mon/rviewer/db
- - search/mon/rviewer/modules
- - search/mon/trainer/libs
- - search/mon/uchenki/app
- - search/mon/uchenki/app/controllers/api
- - search/mon/wabbajack/bin/icscr
- - search/mon/wabbajack/libs/client
- - search/mon/wabbajack/libs/client/parsers
- - search/scraper/parser_platform/parsers
- - search/scraper_over_yt/scripts
- - skynet/kernel
- - skynet/kernel/util/tests
- - smart_devices/tools/launcher2/tests/restarts
- - statbox/statkey/jam/jobs
- - statbox/statkey/jam/jobs/cubes/superapp/autofill/v2
- - statbox/statkey/jam/jobs/cubes/superapp/bi_turboappweb_turbo_counter/v2
- - statbox/statkey/jam/jobs/cubes/superapp/cohorts_daily/v2
- - strm/generate/lib/generate
- - taxi/graph/packages/taxigraphd
- - testenv/core/web_server
- - travel/avia/shared_flights/tasks/schedules_dumper
- - travel/hotels/suggest/builder
- - travel/hotels/suggest/metrics_builder
- - travel/rasp/content/rzdParser
- - travel/rasp/train_api
- - travel/rasp/train_api/middleware
- - vcs/manage_contrib
- - vh/lib/sqs_watcher
- - vh/recommender/tools/delayed_view_stats
- - voicetech/asr/markup/select
- - voicetech/asr/tools/run_normalizer/prepare_data_for_mt_normalizer
- - voicetech/infra/uniproxy/tests/session
- - voicetech/infra/voice_ext_mon/bin
- - voicetech/spotter/selection_for_annotation/bin
- - voicetech/spotter/train/lib
- - weather/workers/warnings/push
- - yabs/analytics/anomaly_analyzer/src
- - yabs/analytics/new_traffic_generation/src
- - yabs/analytics/traffic_generation/z_2_barnavig_click_counter
- - yabs/analytics/traffic_generation/z_4_spylog_visits_counter
- - yabs/analytics/traffic_generation/z_5_appmetr_counter
- - yabs/analytics/traffic_generation/z_6_metrika_visits_counter
- - yabs/analytics/traffic_generation/z_7_chevent_scc_counter
- - yabs/analytics/traffic_generation/z_9_3_metr_yabro_coeff
- - yabs/analytics/traffic_generation/z_9_4_all_visits
- - yabs/analytics/yt_cleaner
- - yabs/autobudget/pylibs/tasks
- - yabs/awaps_pvl/pvl/logic
- - yabs/outdoor/py_schedule/logic
- - yabs/qa/oneshots/astkachev/BSSERVER-14230/noload_rsya_only
- - yabs/qa/oneshots/astkachev/BSSERVER-14230/noload_search_only
- - yabs/qa/oneshots/sergtaim/BSSERVER-14259/AddKeywordWithNewIdToYT-test
- - yabs/server/cs/pylibs/dash_board
- - yabs/server/cs/pylibs/full_graph_plot
- - yabs/server/cs/pylibs/gantt_chart/lib
- - yabs/server/cs/pylibs/settings
- - yabs/server/test/ft/BSSERVER-13708
- - yabs/vh/cms-pgaas/sport_api_importer/sport_api_importer_lib
- - yabs/vh/frontend/test/ft/HOME-43539
- - yweb/antispam/cid/analytics/pushes/crypta_lal/args
- - yweb/antispam/cid/analytics/pushes/push_sender/args
- - yweb/antispam/cid/analytics/pushes/push_sender/cli
- - yweb/antispam/cid/support/auto_answer/args
- - yweb/blender/scripts/blender_viewer/conveyor_experiments/utils
- - yweb/freshness/scripts/sport_pushes
- - yweb/news/dupsd/tests
- - yweb/news/zen/video_export
- - yweb/sitelinks/astrolabe/migration/bna_to_mysql
- - yweb/webscripts/video/duplicates/nirvana_scripts/knn/filter_knn_source
- - yweb/yasap/pdb/tools/backup/create_backup
- - yweb/yasap/znatoki/znatoki_ctl/lib
- - yweb/yasap/znatoki/znatoki_ctl/lib/commands
- - zootopia/analytics/ml/features/geo_features/idle_duration_features
- - zootopia/analytics/ml/join_features/lib
- - zootopia/analytics/ml/util/logs_context_manager/lib
- F403:
- ignore:
- - F403
- prefixes:
- - adfox/infra/registrator
- - adfox/infra/registrator/lib
- - ads/libs/py_dssm_lib/dssm_applier
- - ads/libs/py_ml_factors/matrixnet
- - ads/libs/py_vw_lib
- - ads/nirvana/xfactor-yt
- - ads/tools/mx_feature_remapper
- - ads/watchman/contrib/flask-restplus-patched
- - ads/watchman/contrib/flask-restplus-patched/flask_restplus_patched
- - alice/analytics/utils/marty/run_nirvana_instance
- - cloud/ai/speechkit/stt/tests/data_pipeline/join
- - cv/short2long/nirvana/lib
- - devtools/signer/signer
- - dict/ontodb/cmpy/lib
- - dict/ontodb/isa/libshorttext/converter/stemmer
- - dict/ontodb/report/common
- - dict/ontodb/utils/export_src_codes
- - edadeal/analytics/scripts/CashbackReport/CashbackReportLib
- - edadeal/analytics/scripts/CashbackReport/CashbackReportLib/CashbackReportLib
- - extsearch/geo/recommender/runtime/config/generation
- - mail/freezing_tests/active_users_aggregation
- - maps/analyzer/tools/online_jams/pylib
- - market/seo/offer_base
- - mediapers/feature_machine/nirvana/script_runner
- - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/inbound
- - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/outbound
- - mlp/mail/aspam/nirvana_operations/conveyor/calc_dsats
+ - plus/gift/gift
+ - plus/gift/gift/admin
+ - quality/ab_testing/cofe/projects/zalogin
+ - quality/functionality/chats/common/utils
+ - quality/functionality/chats/feedback/src/feedback/api
+ - quality/functionality/chats/feedback/src/feedback/api/v1
+ - quality/functionality/rtx/server/plugin
+ - quality/functionality/snippets/rkn_filter
+ - quality/functionality/turbo/analytics/quality/cms_report
+ - quality/functionality/turbo/analytics/quality/samplers/cms_sampler
+ - quality/functionality/turbo/autoparser/flags_postprocess/tests/medium
+ - quality/functionality/turbo/tools/pq_delete_docs
+ - quality/functionality/turbo/tools/tdm
+ - quality/logs/mousetrack_lib/python/tests
+ - quality/relev_tools/choice_screen/create_stove
+ - quality/trailer/suggest/toloka_processor/report_yt
+ - quality/webfresh/metrics/serpsets/raw_serpset_patcher/tests
+ - rnd_toolbox/deckard
+ - rnd_toolbox/nyamm
+ - robot/library/yuppie
+ - robot/library/yuppie/modules
+ - robot/metrics/speed_sbr/nirvana
+ - robot/quality/nirvana/refererrank/premetric_map
+ - robot/quality/samovar_conveyour/tools/get_samples
+ - robot/quality/sendlink_conveyour/tools/max_rank_acceptance
+ - robot/research/eval_nirvana_graph
+ - rt-research/broadmatching/scripts/yt/dyn-sources/yt_prepare_dyn_sources
+ - rt-research/multik/tools/jupyter
+ - rtmapreduce/tests/recipes/rtmr_processing_recipe
+ - saas/tools/devops/check_backup
+ - saas/tools/devops/lib23
+ - saas/tools/refresh/import_rtyserver
+ - sandbox/common/upload
+ - sandbox/projects/alice_evo
+ - sandbox/projects/avia/mysql_sync_testing_with_prod
+ - sandbox/projects/balancer/load/BalancerLoadCompare
+ - sandbox/projects/bitbucket/GBGAdaptor
+ - sandbox/projects/devops
+ - sandbox/projects/devops/HardwareGencfgGroups
+ - sandbox/projects/dj/DjCompileConfig
+ - sandbox/projects/geoadv
+ - sandbox/projects/geoadv/ReleasePrices
+ - sandbox/projects/geosearch/snippets
+ - sandbox/projects/geosearch/snippets/AddrsSnippetsTask
+ - sandbox/projects/hollywood
+ - sandbox/projects/hollywood/fast_data/BuildAndDeployHollywoodFastData
+ - sandbox/projects/logbroker/tasks/BuildSeveralResources
+ - sandbox/projects/masstransit/MapsMasstransitImportVehicleTasks
+ - sandbox/projects/music/deployment/MusicRestoreMdb/YandexCloudDatabase
+ - sandbox/projects/music/deployment/helpers
+ - sandbox/projects/news
+ - sandbox/projects/news/RunNewsComplexLoadtests
+ - sandbox/projects/news/runtime_tests
+ - sandbox/projects/news/runtime_tests/CompareNewsApphostServiceResponses
+ - sandbox/projects/saas/backups/DetachServiceIndex2
+ - sandbox/projects/sport_wizard/DeploySportProxyData
+ - sandbox/projects/video
+ - sandbox/projects/video/priemka/VideoRobotPriemkaSimple
+ - sandbox/projects/vqe/measure_performance
+ - sandbox/projects/yt
+ - sandbox/projects/yt/ci_tasks/packages/YtBuildCronPackage
+ - sandbox/projects/yt/layers_tasks/YtBuildSpravLayerTask
+ - search/metrics/monitoring/core
+ - search/mon/rviewer/db
+ - search/mon/rviewer/modules
+ - search/mon/trainer/libs
+ - search/mon/uchenki/app
+ - search/mon/uchenki/app/controllers/api
+ - search/mon/wabbajack/bin/icscr
+ - search/mon/wabbajack/libs/client
+ - search/mon/wabbajack/libs/client/parsers
+ - search/scraper/parser_platform/parsers
+ - search/scraper_over_yt/scripts
+ - skynet/kernel
+ - skynet/kernel/util/tests
+ - smart_devices/tools/launcher2/tests/restarts
+ - statbox/statkey/jam/jobs
+ - statbox/statkey/jam/jobs/cubes/superapp/autofill/v2
+ - statbox/statkey/jam/jobs/cubes/superapp/bi_turboappweb_turbo_counter/v2
+ - statbox/statkey/jam/jobs/cubes/superapp/cohorts_daily/v2
+ - strm/generate/lib/generate
+ - taxi/graph/packages/taxigraphd
+ - testenv/core/web_server
+ - travel/avia/shared_flights/tasks/schedules_dumper
+ - travel/hotels/suggest/builder
+ - travel/hotels/suggest/metrics_builder
+ - travel/rasp/content/rzdParser
+ - travel/rasp/train_api
+ - travel/rasp/train_api/middleware
+ - vcs/manage_contrib
+ - vh/lib/sqs_watcher
+ - vh/recommender/tools/delayed_view_stats
+ - voicetech/asr/markup/select
+ - voicetech/asr/tools/run_normalizer/prepare_data_for_mt_normalizer
+ - voicetech/infra/uniproxy/tests/session
+ - voicetech/infra/voice_ext_mon/bin
+ - voicetech/spotter/selection_for_annotation/bin
+ - voicetech/spotter/train/lib
+ - weather/workers/warnings/push
+ - yabs/analytics/anomaly_analyzer/src
+ - yabs/analytics/new_traffic_generation/src
+ - yabs/analytics/traffic_generation/z_2_barnavig_click_counter
+ - yabs/analytics/traffic_generation/z_4_spylog_visits_counter
+ - yabs/analytics/traffic_generation/z_5_appmetr_counter
+ - yabs/analytics/traffic_generation/z_6_metrika_visits_counter
+ - yabs/analytics/traffic_generation/z_7_chevent_scc_counter
+ - yabs/analytics/traffic_generation/z_9_3_metr_yabro_coeff
+ - yabs/analytics/traffic_generation/z_9_4_all_visits
+ - yabs/analytics/yt_cleaner
+ - yabs/autobudget/pylibs/tasks
+ - yabs/awaps_pvl/pvl/logic
+ - yabs/outdoor/py_schedule/logic
+ - yabs/qa/oneshots/astkachev/BSSERVER-14230/noload_rsya_only
+ - yabs/qa/oneshots/astkachev/BSSERVER-14230/noload_search_only
+ - yabs/qa/oneshots/sergtaim/BSSERVER-14259/AddKeywordWithNewIdToYT-test
+ - yabs/server/cs/pylibs/dash_board
+ - yabs/server/cs/pylibs/full_graph_plot
+ - yabs/server/cs/pylibs/gantt_chart/lib
+ - yabs/server/cs/pylibs/settings
+ - yabs/server/test/ft/BSSERVER-13708
+ - yabs/vh/cms-pgaas/sport_api_importer/sport_api_importer_lib
+ - yabs/vh/frontend/test/ft/HOME-43539
+ - yweb/antispam/cid/analytics/pushes/crypta_lal/args
+ - yweb/antispam/cid/analytics/pushes/push_sender/args
+ - yweb/antispam/cid/analytics/pushes/push_sender/cli
+ - yweb/antispam/cid/support/auto_answer/args
+ - yweb/blender/scripts/blender_viewer/conveyor_experiments/utils
+ - yweb/freshness/scripts/sport_pushes
+ - yweb/news/dupsd/tests
+ - yweb/news/zen/video_export
+ - yweb/sitelinks/astrolabe/migration/bna_to_mysql
+ - yweb/webscripts/video/duplicates/nirvana_scripts/knn/filter_knn_source
+ - yweb/yasap/pdb/tools/backup/create_backup
+ - yweb/yasap/znatoki/znatoki_ctl/lib
+ - yweb/yasap/znatoki/znatoki_ctl/lib/commands
+ - zootopia/analytics/ml/features/geo_features/idle_duration_features
+ - zootopia/analytics/ml/join_features/lib
+ - zootopia/analytics/ml/util/logs_context_manager/lib
+ F403:
+ ignore:
+ - F403
+ prefixes:
+ - adfox/infra/registrator
+ - adfox/infra/registrator/lib
+ - ads/libs/py_dssm_lib/dssm_applier
+ - ads/libs/py_ml_factors/matrixnet
+ - ads/libs/py_vw_lib
+ - ads/nirvana/xfactor-yt
+ - ads/tools/mx_feature_remapper
+ - ads/watchman/contrib/flask-restplus-patched
+ - ads/watchman/contrib/flask-restplus-patched/flask_restplus_patched
+ - alice/analytics/utils/marty/run_nirvana_instance
+ - cloud/ai/speechkit/stt/tests/data_pipeline/join
+ - cv/short2long/nirvana/lib
+ - devtools/signer/signer
+ - dict/ontodb/cmpy/lib
+ - dict/ontodb/isa/libshorttext/converter/stemmer
+ - dict/ontodb/report/common
+ - dict/ontodb/utils/export_src_codes
+ - edadeal/analytics/scripts/CashbackReport/CashbackReportLib
+ - edadeal/analytics/scripts/CashbackReport/CashbackReportLib/CashbackReportLib
+ - extsearch/geo/recommender/runtime/config/generation
+ - mail/freezing_tests/active_users_aggregation
+ - maps/analyzer/tools/online_jams/pylib
+ - market/seo/offer_base
+ - mediapers/feature_machine/nirvana/script_runner
+ - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/inbound
+ - mlp/mail/aspam/nirvana_operations/conveyor/build_prod_graph/outbound
+ - mlp/mail/aspam/nirvana_operations/conveyor/calc_dsats
- passport/backend/api/tests/views/bundle/change_avatar
- - quality/ab_testing/cofe/projects/alice/sesame/v1_1
- - quality/neural_net/bert/bert/utils/distributed
- - quality/neural_net/bert/bert/utils/fs
- - sandbox/projects/tycoon/TycoonAdverts
- - search/base_search/tools/explain_l1
- - search/mon/rviewer/app
- - search/mon/rviewer/db
- - search/mon/rviewer/modules
- - search/mon/rviewer/modules/clients
- - search/mon/rviewer/modules/orchestrator
- - travel/avia/avia_api
- - travel/avia/avia_api/avia/settings
- - travel/avia/avia_api/avia/v1/schemas
- - travel/avia/avia_api/tests
- - voicetech/asr/cloud_engine/cli/nirvana/aggregate_kenlm_outputs
- - voicetech/asr/cloud_engine/cli/nirvana/aggregate_subword_lm_outputs
- - voicetech/asr/cloud_engine/cli/nirvana/build_lm
- - voicetech/asr/cloud_engine/cli/nirvana/make_kenlm_lingware
- - voicetech/asr/cloud_engine/cli/nirvana/make_subword_lm_lingware
- - voicetech/asr/cloud_engine/cli/nirvana/merge_lingwares
- - voicetech/asr/cloud_engine/cli/nirvana/select_best_kenlm
- - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_freq_table
- - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_mixture_model
- - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_model
- - voicetech/asr/core/lib/subword_ngram_lm/nirvana/fit_lambda
- - voicetech/asr/core/lib/subword_ngram_lm/nirvana/fit_vocabulary
- - voicetech/asr/tools/language_model/tests/test_linear_merge
- - yabs/outdoor/viewer/back/logic
+ - quality/ab_testing/cofe/projects/alice/sesame/v1_1
+ - quality/neural_net/bert/bert/utils/distributed
+ - quality/neural_net/bert/bert/utils/fs
+ - sandbox/projects/tycoon/TycoonAdverts
+ - search/base_search/tools/explain_l1
+ - search/mon/rviewer/app
+ - search/mon/rviewer/db
+ - search/mon/rviewer/modules
+ - search/mon/rviewer/modules/clients
+ - search/mon/rviewer/modules/orchestrator
+ - travel/avia/avia_api
+ - travel/avia/avia_api/avia/settings
+ - travel/avia/avia_api/avia/v1/schemas
+ - travel/avia/avia_api/tests
+ - voicetech/asr/cloud_engine/cli/nirvana/aggregate_kenlm_outputs
+ - voicetech/asr/cloud_engine/cli/nirvana/aggregate_subword_lm_outputs
+ - voicetech/asr/cloud_engine/cli/nirvana/build_lm
+ - voicetech/asr/cloud_engine/cli/nirvana/make_kenlm_lingware
+ - voicetech/asr/cloud_engine/cli/nirvana/make_subword_lm_lingware
+ - voicetech/asr/cloud_engine/cli/nirvana/merge_lingwares
+ - voicetech/asr/cloud_engine/cli/nirvana/select_best_kenlm
+ - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_freq_table
+ - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_mixture_model
+ - voicetech/asr/core/lib/subword_ngram_lm/nirvana/build_model
+ - voicetech/asr/core/lib/subword_ngram_lm/nirvana/fit_lambda
+ - voicetech/asr/core/lib/subword_ngram_lm/nirvana/fit_vocabulary
+ - voicetech/asr/tools/language_model/tests/test_linear_merge
+ - yabs/outdoor/viewer/back/logic
F401:
ignore:
- F401
diff --git a/build/ya.conf.json b/build/ya.conf.json
index 5f7cc875d6..3486620b7b 100644
--- a/build/ya.conf.json
+++ b/build/ya.conf.json
@@ -5743,7 +5743,7 @@
"sandbox_id": [
1206141440
],
- "match": "ymake"
+ "match": "ymake"
},
"executable": {
"ymake": [
diff --git a/build/ymake.core.conf b/build/ymake.core.conf
index 081833998b..5d14628a43 100644
--- a/build/ymake.core.conf
+++ b/build/ymake.core.conf
@@ -5621,7 +5621,7 @@ SSE4_DEFINES=
SSE4_CFLAGS=
# tag:cpu
-when (($ARCH_X86_64 || $ARCH_I386) && $DISABLE_INSTRUCTION_SETS != "yes") {
+when (($ARCH_X86_64 || $ARCH_I386) && $DISABLE_INSTRUCTION_SETS != "yes") {
when ($CLANG || $CLANG_CL || $GCC) {
PIC_CFLAGS=-fPIC
SSE2_CFLAGS=-msse2
diff --git a/contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h b/contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h
index ba1ee14ef1..6a468f2f8f 100644
--- a/contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h
+++ b/contrib/libs/libc_compat/include/readpassphrase/readpassphrase.h
@@ -1,28 +1,28 @@
/* $OpenBSD: readpassphrase.h,v 1.6 2019/01/25 00:19:25 millert Exp $ */
-
-/*
+
+/*
* Copyright (c) 2000, 2002 Todd C. Miller <millert@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Sponsored in part by the Defense Advanced Research Projects
- * Agency (DARPA) and Air Force Research Laboratory, Air Force
- * Materiel Command, USAF, under agreement number F39502-99-1-0512.
- */
-
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ */
+
#ifndef _READPASSPHRASE_H_
#define _READPASSPHRASE_H_
-
+
#define RPP_ECHO_OFF 0x00 /* Turn off echo (default). */
#define RPP_ECHO_ON 0x01 /* Leave echo on. */
#define RPP_REQUIRE_TTY 0x02 /* Fail if there is no tty. */
@@ -30,15 +30,15 @@
#define RPP_FORCEUPPER 0x08 /* Force input to upper case. */
#define RPP_SEVENBIT 0x10 /* Strip the high bit from input. */
#define RPP_STDIN 0x20 /* Read from stdin, not /dev/tty */
-
+
#include <sys/cdefs.h>
#ifdef __cplusplus
-extern "C" {
-#endif
-char * readpassphrase(const char *, char *, size_t, int);
+extern "C" {
+#endif
+char * readpassphrase(const char *, char *, size_t, int);
#ifdef __cplusplus
} // extern "C"
#endif
-
+
#endif /* !_READPASSPHRASE_H_ */
diff --git a/contrib/libs/libc_compat/readpassphrase.c b/contrib/libs/libc_compat/readpassphrase.c
index df677e9ca5..b25d10f52f 100644
--- a/contrib/libs/libc_compat/readpassphrase.c
+++ b/contrib/libs/libc_compat/readpassphrase.c
@@ -1,192 +1,192 @@
/* $OpenBSD: readpassphrase.c,v 1.27 2019/01/25 00:19:25 millert Exp $ */
-
-/*
- * Copyright (c) 2000-2002, 2007, 2010
+
+/*
+ * Copyright (c) 2000-2002, 2007, 2010
* Todd C. Miller <millert@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Sponsored in part by the Defense Advanced Research Projects
- * Agency (DARPA) and Air Force Research Laboratory, Air Force
- * Materiel Command, USAF, under agreement number F39502-99-1-0512.
- */
-
-#include <ctype.h>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ */
+
+#include <ctype.h>
#include <errno.h>
-#include <fcntl.h>
+#include <fcntl.h>
#include <paths.h>
#include <pwd.h>
#include <signal.h>
-#include <string.h>
+#include <string.h>
#include <termios.h>
-#include <unistd.h>
+#include <unistd.h>
#include <readpassphrase.h>
-
-#ifndef TCSASOFT
-/* If we don't have TCSASOFT define it so that ORing it it below is a no-op. */
-# define TCSASOFT 0
-#endif
-
-/* SunOS 4.x which lacks _POSIX_VDISABLE, but has VDISABLE */
-#if !defined(_POSIX_VDISABLE) && defined(VDISABLE)
-# define _POSIX_VDISABLE VDISABLE
-#endif
-
+
+#ifndef TCSASOFT
+/* If we don't have TCSASOFT define it so that ORing it it below is a no-op. */
+# define TCSASOFT 0
+#endif
+
+/* SunOS 4.x which lacks _POSIX_VDISABLE, but has VDISABLE */
+#if !defined(_POSIX_VDISABLE) && defined(VDISABLE)
+# define _POSIX_VDISABLE VDISABLE
+#endif
+
static volatile sig_atomic_t signo[_NSIG];
-
-static void handler(int);
-
-char *
-readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags)
-{
- ssize_t nr;
- int input, output, save_errno, i, need_restart;
- char ch, *p, *end;
- struct termios term, oterm;
- struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm;
- struct sigaction savetstp, savettin, savettou, savepipe;
-
- /* I suppose we could alloc on demand in this case (XXX). */
- if (bufsiz == 0) {
- errno = EINVAL;
- return(NULL);
- }
-
-restart:
+
+static void handler(int);
+
+char *
+readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags)
+{
+ ssize_t nr;
+ int input, output, save_errno, i, need_restart;
+ char ch, *p, *end;
+ struct termios term, oterm;
+ struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm;
+ struct sigaction savetstp, savettin, savettou, savepipe;
+
+ /* I suppose we could alloc on demand in this case (XXX). */
+ if (bufsiz == 0) {
+ errno = EINVAL;
+ return(NULL);
+ }
+
+restart:
for (i = 0; i < _NSIG; i++)
- signo[i] = 0;
- nr = -1;
- save_errno = 0;
- need_restart = 0;
- /*
- * Read and write to /dev/tty if available. If not, read from
- * stdin and write to stderr unless a tty is required.
- */
- if ((flags & RPP_STDIN) ||
- (input = output = open(_PATH_TTY, O_RDWR)) == -1) {
- if (flags & RPP_REQUIRE_TTY) {
- errno = ENOTTY;
- return(NULL);
- }
- input = STDIN_FILENO;
- output = STDERR_FILENO;
- }
-
- /*
- * Turn off echo if possible.
- * If we are using a tty but are not the foreground pgrp this will
- * generate SIGTTOU, so do it *before* installing the signal handlers.
- */
- if (input != STDIN_FILENO && tcgetattr(input, &oterm) == 0) {
- memcpy(&term, &oterm, sizeof(term));
- if (!(flags & RPP_ECHO_ON))
+ signo[i] = 0;
+ nr = -1;
+ save_errno = 0;
+ need_restart = 0;
+ /*
+ * Read and write to /dev/tty if available. If not, read from
+ * stdin and write to stderr unless a tty is required.
+ */
+ if ((flags & RPP_STDIN) ||
+ (input = output = open(_PATH_TTY, O_RDWR)) == -1) {
+ if (flags & RPP_REQUIRE_TTY) {
+ errno = ENOTTY;
+ return(NULL);
+ }
+ input = STDIN_FILENO;
+ output = STDERR_FILENO;
+ }
+
+ /*
+ * Turn off echo if possible.
+ * If we are using a tty but are not the foreground pgrp this will
+ * generate SIGTTOU, so do it *before* installing the signal handlers.
+ */
+ if (input != STDIN_FILENO && tcgetattr(input, &oterm) == 0) {
+ memcpy(&term, &oterm, sizeof(term));
+ if (!(flags & RPP_ECHO_ON))
term.c_lflag &= ~(ECHO | ECHONL);
- (void)tcsetattr(input, TCSAFLUSH|TCSASOFT, &term);
- } else {
- memset(&term, 0, sizeof(term));
- term.c_lflag |= ECHO;
- memset(&oterm, 0, sizeof(oterm));
- oterm.c_lflag |= ECHO;
- }
-
- /*
- * Catch signals that would otherwise cause the user to end
- * up with echo turned off in the shell. Don't worry about
- * things like SIGXCPU and SIGVTALRM for now.
- */
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = 0; /* don't restart system calls */
- sa.sa_handler = handler;
- (void)sigaction(SIGALRM, &sa, &savealrm);
- (void)sigaction(SIGHUP, &sa, &savehup);
- (void)sigaction(SIGINT, &sa, &saveint);
- (void)sigaction(SIGPIPE, &sa, &savepipe);
- (void)sigaction(SIGQUIT, &sa, &savequit);
- (void)sigaction(SIGTERM, &sa, &saveterm);
- (void)sigaction(SIGTSTP, &sa, &savetstp);
- (void)sigaction(SIGTTIN, &sa, &savettin);
- (void)sigaction(SIGTTOU, &sa, &savettou);
-
- if (!(flags & RPP_STDIN))
- (void)write(output, prompt, strlen(prompt));
- end = buf + bufsiz - 1;
- p = buf;
- while ((nr = read(input, &ch, 1)) == 1 && ch != '\n' && ch != '\r') {
- if (p < end) {
- if ((flags & RPP_SEVENBIT))
- ch &= 0x7f;
- if (isalpha((unsigned char)ch)) {
- if ((flags & RPP_FORCELOWER))
- ch = (char)tolower((unsigned char)ch);
- if ((flags & RPP_FORCEUPPER))
- ch = (char)toupper((unsigned char)ch);
- }
- *p++ = ch;
- }
- }
- *p = '\0';
- save_errno = errno;
- if (!(term.c_lflag & ECHO))
- (void)write(output, "\n", 1);
-
- /* Restore old terminal settings and signals. */
- if (memcmp(&term, &oterm, sizeof(term)) != 0) {
- const int sigttou = signo[SIGTTOU];
-
- /* Ignore SIGTTOU generated when we are not the fg pgrp. */
- while (tcsetattr(input, TCSAFLUSH|TCSASOFT, &oterm) == -1 &&
- errno == EINTR && !signo[SIGTTOU])
- continue;
- signo[SIGTTOU] = sigttou;
- }
- (void)sigaction(SIGALRM, &savealrm, NULL);
- (void)sigaction(SIGHUP, &savehup, NULL);
- (void)sigaction(SIGINT, &saveint, NULL);
- (void)sigaction(SIGQUIT, &savequit, NULL);
- (void)sigaction(SIGPIPE, &savepipe, NULL);
- (void)sigaction(SIGTERM, &saveterm, NULL);
- (void)sigaction(SIGTSTP, &savetstp, NULL);
- (void)sigaction(SIGTTIN, &savettin, NULL);
- (void)sigaction(SIGTTOU, &savettou, NULL);
- if (input != STDIN_FILENO)
- (void)close(input);
-
- /*
- * If we were interrupted by a signal, resend it to ourselves
- * now that we have restored the signal handlers.
- */
+ (void)tcsetattr(input, TCSAFLUSH|TCSASOFT, &term);
+ } else {
+ memset(&term, 0, sizeof(term));
+ term.c_lflag |= ECHO;
+ memset(&oterm, 0, sizeof(oterm));
+ oterm.c_lflag |= ECHO;
+ }
+
+ /*
+ * Catch signals that would otherwise cause the user to end
+ * up with echo turned off in the shell. Don't worry about
+ * things like SIGXCPU and SIGVTALRM for now.
+ */
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0; /* don't restart system calls */
+ sa.sa_handler = handler;
+ (void)sigaction(SIGALRM, &sa, &savealrm);
+ (void)sigaction(SIGHUP, &sa, &savehup);
+ (void)sigaction(SIGINT, &sa, &saveint);
+ (void)sigaction(SIGPIPE, &sa, &savepipe);
+ (void)sigaction(SIGQUIT, &sa, &savequit);
+ (void)sigaction(SIGTERM, &sa, &saveterm);
+ (void)sigaction(SIGTSTP, &sa, &savetstp);
+ (void)sigaction(SIGTTIN, &sa, &savettin);
+ (void)sigaction(SIGTTOU, &sa, &savettou);
+
+ if (!(flags & RPP_STDIN))
+ (void)write(output, prompt, strlen(prompt));
+ end = buf + bufsiz - 1;
+ p = buf;
+ while ((nr = read(input, &ch, 1)) == 1 && ch != '\n' && ch != '\r') {
+ if (p < end) {
+ if ((flags & RPP_SEVENBIT))
+ ch &= 0x7f;
+ if (isalpha((unsigned char)ch)) {
+ if ((flags & RPP_FORCELOWER))
+ ch = (char)tolower((unsigned char)ch);
+ if ((flags & RPP_FORCEUPPER))
+ ch = (char)toupper((unsigned char)ch);
+ }
+ *p++ = ch;
+ }
+ }
+ *p = '\0';
+ save_errno = errno;
+ if (!(term.c_lflag & ECHO))
+ (void)write(output, "\n", 1);
+
+ /* Restore old terminal settings and signals. */
+ if (memcmp(&term, &oterm, sizeof(term)) != 0) {
+ const int sigttou = signo[SIGTTOU];
+
+ /* Ignore SIGTTOU generated when we are not the fg pgrp. */
+ while (tcsetattr(input, TCSAFLUSH|TCSASOFT, &oterm) == -1 &&
+ errno == EINTR && !signo[SIGTTOU])
+ continue;
+ signo[SIGTTOU] = sigttou;
+ }
+ (void)sigaction(SIGALRM, &savealrm, NULL);
+ (void)sigaction(SIGHUP, &savehup, NULL);
+ (void)sigaction(SIGINT, &saveint, NULL);
+ (void)sigaction(SIGQUIT, &savequit, NULL);
+ (void)sigaction(SIGPIPE, &savepipe, NULL);
+ (void)sigaction(SIGTERM, &saveterm, NULL);
+ (void)sigaction(SIGTSTP, &savetstp, NULL);
+ (void)sigaction(SIGTTIN, &savettin, NULL);
+ (void)sigaction(SIGTTOU, &savettou, NULL);
+ if (input != STDIN_FILENO)
+ (void)close(input);
+
+ /*
+ * If we were interrupted by a signal, resend it to ourselves
+ * now that we have restored the signal handlers.
+ */
for (i = 0; i < _NSIG; i++) {
- if (signo[i]) {
- kill(getpid(), i);
- switch (i) {
- case SIGTSTP:
- case SIGTTIN:
- case SIGTTOU:
- need_restart = 1;
- }
- }
- }
- if (need_restart)
- goto restart;
-
- if (save_errno)
- errno = save_errno;
- return(nr == -1 ? NULL : buf);
-}
-
-
-static void handler(int s)
-{
-
- signo[s] = 1;
-}
+ if (signo[i]) {
+ kill(getpid(), i);
+ switch (i) {
+ case SIGTSTP:
+ case SIGTTIN:
+ case SIGTTOU:
+ need_restart = 1;
+ }
+ }
+ }
+ if (need_restart)
+ goto restart;
+
+ if (save_errno)
+ errno = save_errno;
+ return(nr == -1 ? NULL : buf);
+}
+
+
+static void handler(int s)
+{
+
+ signo[s] = 1;
+}
diff --git a/library/cpp/lfalloc/lf_allocX64.h b/library/cpp/lfalloc/lf_allocX64.h
index fd2a906d6f..20df33b60d 100644
--- a/library/cpp/lfalloc/lf_allocX64.h
+++ b/library/cpp/lfalloc/lf_allocX64.h
@@ -1679,7 +1679,7 @@ static void DebugTraceMMgr(const char* pszFormat, ...) // __cdecl
#ifdef _win_
OutputDebugStringA(buff);
#else
- fputs(buff, stderr);
+ fputs(buff, stderr);
#endif
}
diff --git a/library/cpp/threading/local_executor/tbb_local_executor.cpp b/library/cpp/threading/local_executor/tbb_local_executor.cpp
index 65d6659443..9903ef33de 100644
--- a/library/cpp/threading/local_executor/tbb_local_executor.cpp
+++ b/library/cpp/threading/local_executor/tbb_local_executor.cpp
@@ -1,53 +1,53 @@
-#include "tbb_local_executor.h"
-
-template <bool RespectTls>
-void NPar::TTbbLocalExecutor<RespectTls>::SubmitAsyncTasks(TLocallyExecutableFunction exec, int firstId, int lastId) {
- for (int i = firstId; i < lastId; ++i) {
- Group.run([=] { exec(i); });
- }
-}
-
-template <bool RespectTls>
-int NPar::TTbbLocalExecutor<RespectTls>::GetThreadCount() const noexcept {
- return NumberOfTbbThreads - 1;
-}
-
-template <bool RespectTls>
-int NPar::TTbbLocalExecutor<RespectTls>::GetWorkerThreadId() const noexcept {
- return TbbArena.execute([] {
- return tbb::this_task_arena::current_thread_index();
- });
-}
-
-template <bool RespectTls>
-void NPar::TTbbLocalExecutor<RespectTls>::Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) {
- if (flags & WAIT_COMPLETE) {
- exec->LocalExec(id);
- } else {
- TbbArena.execute([=] {
- SubmitAsyncTasks([=] (int id) { exec->LocalExec(id); }, id, id + 1);
- });
- }
-}
-
-template <bool RespectTls>
-void NPar::TTbbLocalExecutor<RespectTls>::ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) {
- if (flags & WAIT_COMPLETE) {
- TbbArena.execute([=] {
- if (RespectTls) {
- tbb::this_task_arena::isolate([=]{
- tbb::parallel_for(firstId, lastId, [=] (int id) { exec->LocalExec(id); });
- });
- } else {
- tbb::parallel_for(firstId, lastId, [=] (int id) { exec->LocalExec(id); });
- }
- });
- } else {
- TbbArena.execute([=] {
- SubmitAsyncTasks([=] (int id) { exec->LocalExec(id); }, firstId, lastId);
- });
- }
-}
-
-template class NPar::TTbbLocalExecutor<true>;
-template class NPar::TTbbLocalExecutor<false>;
+#include "tbb_local_executor.h"
+
+template <bool RespectTls>
+void NPar::TTbbLocalExecutor<RespectTls>::SubmitAsyncTasks(TLocallyExecutableFunction exec, int firstId, int lastId) {
+ for (int i = firstId; i < lastId; ++i) {
+ Group.run([=] { exec(i); });
+ }
+}
+
+template <bool RespectTls>
+int NPar::TTbbLocalExecutor<RespectTls>::GetThreadCount() const noexcept {
+ return NumberOfTbbThreads - 1;
+}
+
+template <bool RespectTls>
+int NPar::TTbbLocalExecutor<RespectTls>::GetWorkerThreadId() const noexcept {
+ return TbbArena.execute([] {
+ return tbb::this_task_arena::current_thread_index();
+ });
+}
+
+template <bool RespectTls>
+void NPar::TTbbLocalExecutor<RespectTls>::Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) {
+ if (flags & WAIT_COMPLETE) {
+ exec->LocalExec(id);
+ } else {
+ TbbArena.execute([=] {
+ SubmitAsyncTasks([=] (int id) { exec->LocalExec(id); }, id, id + 1);
+ });
+ }
+}
+
+template <bool RespectTls>
+void NPar::TTbbLocalExecutor<RespectTls>::ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) {
+ if (flags & WAIT_COMPLETE) {
+ TbbArena.execute([=] {
+ if (RespectTls) {
+ tbb::this_task_arena::isolate([=]{
+ tbb::parallel_for(firstId, lastId, [=] (int id) { exec->LocalExec(id); });
+ });
+ } else {
+ tbb::parallel_for(firstId, lastId, [=] (int id) { exec->LocalExec(id); });
+ }
+ });
+ } else {
+ TbbArena.execute([=] {
+ SubmitAsyncTasks([=] (int id) { exec->LocalExec(id); }, firstId, lastId);
+ });
+ }
+}
+
+template class NPar::TTbbLocalExecutor<true>;
+template class NPar::TTbbLocalExecutor<false>;
diff --git a/library/cpp/threading/local_executor/tbb_local_executor.h b/library/cpp/threading/local_executor/tbb_local_executor.h
index 8d790db18c..f62694d6f7 100644
--- a/library/cpp/threading/local_executor/tbb_local_executor.h
+++ b/library/cpp/threading/local_executor/tbb_local_executor.h
@@ -1,49 +1,49 @@
-#pragma once
-
-#include "local_executor.h"
-#define __TBB_TASK_ISOLATION 1
-#define __TBB_NO_IMPLICIT_LINKAGE 1
-
-#include <contrib/libs/tbb/include/tbb/blocked_range.h>
-#include <contrib/libs/tbb/include/tbb/parallel_for.h>
-#include <contrib/libs/tbb/include/tbb/task_arena.h>
-#include <contrib/libs/tbb/include/tbb/task_group.h>
-
-namespace NPar {
- template <bool RespectTls = false>
- class TTbbLocalExecutor final: public ILocalExecutor {
- public:
- TTbbLocalExecutor(int nThreads)
- : ILocalExecutor()
- , TbbArena(nThreads)
- , NumberOfTbbThreads(nThreads) {}
- ~TTbbLocalExecutor() noexcept override {}
-
- // 0-based ILocalExecutor worker thread identification
- virtual int GetWorkerThreadId() const noexcept override;
- virtual int GetThreadCount() const noexcept override;
-
- // Add task for further execution.
- //
- // @param exec Task description.
- // @param id Task argument.
- // @param flags Bitmask composed by `HIGH_PRIORITY`, `MED_PRIORITY`, `LOW_PRIORITY`
- // and `WAIT_COMPLETE`.
- virtual void Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) override;
-
- // Add tasks range for further execution.
- //
- // @param exec Task description.
- // @param firstId, lastId Task arguments [firstId, lastId)
- // @param flags Same as for `Exec`.
- virtual void ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) override;
-
- // Submit tasks for async run
- void SubmitAsyncTasks(TLocallyExecutableFunction exec, int firstId, int lastId);
-
- private:
- mutable tbb::task_arena TbbArena;
- tbb::task_group Group;
- int NumberOfTbbThreads;
- };
-}
+#pragma once
+
+#include "local_executor.h"
+#define __TBB_TASK_ISOLATION 1
+#define __TBB_NO_IMPLICIT_LINKAGE 1
+
+#include <contrib/libs/tbb/include/tbb/blocked_range.h>
+#include <contrib/libs/tbb/include/tbb/parallel_for.h>
+#include <contrib/libs/tbb/include/tbb/task_arena.h>
+#include <contrib/libs/tbb/include/tbb/task_group.h>
+
+namespace NPar {
+ template <bool RespectTls = false>
+ class TTbbLocalExecutor final: public ILocalExecutor {
+ public:
+ TTbbLocalExecutor(int nThreads)
+ : ILocalExecutor()
+ , TbbArena(nThreads)
+ , NumberOfTbbThreads(nThreads) {}
+ ~TTbbLocalExecutor() noexcept override {}
+
+ // 0-based ILocalExecutor worker thread identification
+ virtual int GetWorkerThreadId() const noexcept override;
+ virtual int GetThreadCount() const noexcept override;
+
+ // Add task for further execution.
+ //
+ // @param exec Task description.
+ // @param id Task argument.
+ // @param flags Bitmask composed by `HIGH_PRIORITY`, `MED_PRIORITY`, `LOW_PRIORITY`
+ // and `WAIT_COMPLETE`.
+ virtual void Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) override;
+
+ // Add tasks range for further execution.
+ //
+ // @param exec Task description.
+ // @param firstId, lastId Task arguments [firstId, lastId)
+ // @param flags Same as for `Exec`.
+ virtual void ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) override;
+
+ // Submit tasks for async run
+ void SubmitAsyncTasks(TLocallyExecutableFunction exec, int firstId, int lastId);
+
+ private:
+ mutable tbb::task_arena TbbArena;
+ tbb::task_group Group;
+ int NumberOfTbbThreads;
+ };
+}
diff --git a/library/cpp/threading/local_executor/ya.make b/library/cpp/threading/local_executor/ya.make
index df210f92bb..7e4ffd2ab5 100644
--- a/library/cpp/threading/local_executor/ya.make
+++ b/library/cpp/threading/local_executor/ya.make
@@ -9,7 +9,7 @@ LIBRARY()
SRCS(
local_executor.cpp
- tbb_local_executor.cpp
+ tbb_local_executor.cpp
)
PEERDIR(
diff --git a/util/charset/ya.make b/util/charset/ya.make
index 26d38cb10b..cf5c171305 100644
--- a/util/charset/ya.make
+++ b/util/charset/ya.make
@@ -19,7 +19,7 @@ JOIN_SRCS(
wide.cpp
)
-IF (ARCH_X86_64 AND NOT DISABLE_INSTRUCTION_SETS)
+IF (ARCH_X86_64 AND NOT DISABLE_INSTRUCTION_SETS)
SRC_CPP_SSE41(wide_sse41.cpp)
ELSE()
SRC(
diff --git a/util/datetime/cputimer.cpp b/util/datetime/cputimer.cpp
index 516d372c37..b804351373 100644
--- a/util/datetime/cputimer.cpp
+++ b/util/datetime/cputimer.cpp
@@ -12,7 +12,7 @@
#include <sys/types.h>
#include <sys/resource.h>
#include <sys/param.h>
-#elif defined(_win_)
+#elif defined(_win_)
#include <util/system/winint.h>
#endif
diff --git a/util/system/info.cpp b/util/system/info.cpp
index cf6681e89a..1dc3de5604 100644
--- a/util/system/info.cpp
+++ b/util/system/info.cpp
@@ -28,9 +28,9 @@ static int getloadavg(double* loadavg, int nelem) {
}
#elif defined(_unix_) || defined(_darwin_)
#include <sys/types.h>
-#endif
-
-#if defined(_freebsd_) || defined(_darwin_)
+#endif
+
+#if defined(_freebsd_) || defined(_darwin_)
#include <sys/sysctl.h>
#endif
diff --git a/util/thread/lfqueue.h b/util/thread/lfqueue.h
index ab523631e4..07b1351624 100644
--- a/util/thread/lfqueue.h
+++ b/util/thread/lfqueue.h
@@ -70,10 +70,10 @@ class TLockFreeQueue: public TNonCopyable {
}
}
- alignas(64) TRootNode* volatile JobQueue;
- alignas(64) volatile TAtomic FreememCounter;
- alignas(64) volatile TAtomic FreeingTaskCounter;
- alignas(64) TRootNode* volatile FreePtr;
+ alignas(64) TRootNode* volatile JobQueue;
+ alignas(64) volatile TAtomic FreememCounter;
+ alignas(64) volatile TAtomic FreeingTaskCounter;
+ alignas(64) TRootNode* volatile FreePtr;
void TryToFreeAsyncMemory() {
TAtomic keepCounter = AtomicAdd(FreeingTaskCounter, 0);
@@ -306,7 +306,7 @@ public:
newRoot = new TRootNode;
AtomicSet(newRoot->PushQueue, nullptr);
listInvertor.DoCopy(AtomicGet(curRoot->PushQueue));
- AtomicSet(newRoot->PopQueue, listInvertor.Copy);
+ AtomicSet(newRoot->PopQueue, listInvertor.Copy);
newRoot->CopyCounter(curRoot);
Y_ASSERT(AtomicGet(curRoot->PopQueue) == nullptr);
if (AtomicCas(&JobQueue, newRoot, curRoot)) {
diff --git a/ya.bat b/ya.bat
index dc6eba5aed..8de43fde13 100755
--- a/ya.bat
+++ b/ya.bat
@@ -15,7 +15,7 @@ exit /b %ERRORLEVEL%
:find_ya
call :dbg Searching for ya near ya.bat...
set YA_BAT_REAL=%~dp0ya
-if exist "%YA_BAT_REAL%" exit /b 0
+if exist "%YA_BAT_REAL%" exit /b 0
call :err Ya not found
exit /b 1
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h b/ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h
index 9cd104925a..41bbb03e0a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/function_traits.h
@@ -1,16 +1,16 @@
-#pragma once
-
-#include <tuple>
-#include <type_traits>
-
-
-template <typename T>
-struct function_traits;
-
-template <typename ReturnType, typename... Args>
-struct function_traits<ReturnType(Args...)>
-{
- using result = ReturnType;
- using arguments = std::tuple<Args...>;
- using arguments_decay = std::tuple<typename std::decay<Args>::type...>;
-};
+#pragma once
+
+#include <tuple>
+#include <type_traits>
+
+
+template <typename T>
+struct function_traits;
+
+template <typename ReturnType, typename... Args>
+struct function_traits<ReturnType(Args...)>
+{
+ using result = ReturnType;
+ using arguments = std::tuple<Args...>;
+ using arguments_decay = std::tuple<typename std::decay<Args>::type...>;
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp
index f67b37bd71..a80495b5d3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.cpp
@@ -1,25 +1,25 @@
-#include <Poco/Net/DNS.h>
-#include <common/getFQDNOrHostName.h>
-
-
-namespace
-{
- std::string getFQDNOrHostNameImpl()
- {
- try
- {
- return Poco::Net::DNS::thisHost().name();
- }
- catch (...)
- {
- return Poco::Net::DNS::hostName();
- }
- }
-}
-
-
-const std::string & getFQDNOrHostName()
-{
- static std::string result = getFQDNOrHostNameImpl();
- return result;
-}
+#include <Poco/Net/DNS.h>
+#include <common/getFQDNOrHostName.h>
+
+
+namespace
+{
+ std::string getFQDNOrHostNameImpl()
+ {
+ try
+ {
+ return Poco::Net::DNS::thisHost().name();
+ }
+ catch (...)
+ {
+ return Poco::Net::DNS::hostName();
+ }
+ }
+}
+
+
+const std::string & getFQDNOrHostName()
+{
+ static std::string result = getFQDNOrHostNameImpl();
+ return result;
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h
index fe164a6420..a1a9af6ca0 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getFQDNOrHostName.h
@@ -1,9 +1,9 @@
-#pragma once
-
-#include <string>
-
-
-/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the 'hostname' tool with the -f flag.
- * If it does not work, return hostname - similar to calling 'hostname' without flags or 'uname -n'.
- */
-const std::string & getFQDNOrHostName();
+#pragma once
+
+#include <string>
+
+
+/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the 'hostname' tool with the -f flag.
+ * If it does not work, return hostname - similar to calling 'hostname' without flags or 'uname -n'.
+ */
+const std::string & getFQDNOrHostName();
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp
index 054e9be907..d0f409007f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/getThreadId.cpp
@@ -6,7 +6,7 @@
#elif defined(OS_LINUX)
#include <unistd.h>
#include <syscall.h>
-#elif defined(OS_FREEBSD)
+#elif defined(OS_FREEBSD)
#include <pthread_np.h>
#else
#include <pthread.h>
@@ -23,7 +23,7 @@ uint64_t getThreadId()
current_tid = gettid();
#elif defined(OS_LINUX)
current_tid = syscall(SYS_gettid); /// This call is always successful. - man gettid
-#elif defined(OS_FREEBSD)
+#elif defined(OS_FREEBSD)
current_tid = pthread_getthreadid_np();
#elif defined(OS_SUNOS)
// On Solaris-derived systems, this returns the ID of the LWP, analogous
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp b/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp
index 49d566dac1..3fc0272d41 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp
@@ -1,19 +1,19 @@
/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
-#include <common/defines.h>
+#include <common/defines.h>
#if defined(__linux__) && !defined(THREAD_SANITIZER)
#define USE_PHDR_CACHE 1
#endif
-/// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own.
-#ifdef USE_PHDR_CACHE
-
-#if defined(__clang__)
-# pragma clang diagnostic ignored "-Wreserved-id-macro"
-# pragma clang diagnostic ignored "-Wunused-macros"
-#endif
-
+/// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own.
+#ifdef USE_PHDR_CACHE
+
+#if defined(__clang__)
+# pragma clang diagnostic ignored "-Wreserved-id-macro"
+# pragma clang diagnostic ignored "-Wunused-macros"
+#endif
+
#define __msan_unpoison(X, Y) // NOLINT
#if defined(ch_has_feature)
# if ch_has_feature(memory_sanitizer)
@@ -61,7 +61,7 @@ extern "C"
#endif
int dl_iterate_phdr(int (*callback) (dl_phdr_info * info, size_t size, void * data), void * data)
{
- auto * current_phdr_cache = phdr_cache.load();
+ auto * current_phdr_cache = phdr_cache.load();
if (!current_phdr_cache)
{
// Cache is not yet populated, pass through to the original function.
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp
index c9968d841c..bcc762ef2a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.cpp
@@ -1,90 +1,90 @@
-#include <Common/typeid_cast.h>
-#include <Common/assert_cast.h>
-#include <Columns/FilterDescription.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnNullable.h>
-#include <Columns/ColumnConst.h>
-#include <Core/ColumnWithTypeAndName.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
-}
-
-
-ConstantFilterDescription::ConstantFilterDescription(const IColumn & column)
-{
- if (column.onlyNull())
- {
- always_false = true;
- return;
- }
-
- if (isColumnConst(column))
- {
- const ColumnConst & column_const = assert_cast<const ColumnConst &>(column);
- ColumnPtr column_nested = column_const.getDataColumnPtr()->convertToFullColumnIfLowCardinality();
-
- if (!typeid_cast<const ColumnUInt8 *>(column_nested.get()))
- {
- const ColumnNullable * column_nested_nullable = checkAndGetColumn<ColumnNullable>(*column_nested);
- if (!column_nested_nullable || !typeid_cast<const ColumnUInt8 *>(&column_nested_nullable->getNestedColumn()))
- {
- throw Exception("Illegal type " + column_nested->getName() + " of column for constant filter. Must be UInt8 or Nullable(UInt8).",
- ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
- }
- }
-
- if (column_const.getValue<UInt64>())
- always_true = true;
- else
- always_false = true;
- return;
- }
-}
-
-
-FilterDescription::FilterDescription(const IColumn & column_)
-{
- if (column_.lowCardinality())
- data_holder = column_.convertToFullColumnIfLowCardinality();
-
- const auto & column = data_holder ? *data_holder : column_;
-
- if (const ColumnUInt8 * concrete_column = typeid_cast<const ColumnUInt8 *>(&column))
- {
- data = &concrete_column->getData();
- return;
- }
-
- if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(column))
- {
- ColumnPtr nested_column = nullable_column->getNestedColumnPtr();
- MutableColumnPtr mutable_holder = IColumn::mutate(std::move(nested_column));
-
- ColumnUInt8 * concrete_column = typeid_cast<ColumnUInt8 *>(mutable_holder.get());
- if (!concrete_column)
- throw Exception("Illegal type " + column.getName() + " of column for filter. Must be UInt8 or Nullable(UInt8).",
- ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
-
- const NullMap & null_map = nullable_column->getNullMapData();
- IColumn::Filter & res = concrete_column->getData();
-
- size_t size = res.size();
- for (size_t i = 0; i < size; ++i)
- res[i] = res[i] && !null_map[i];
-
- data = &res;
- data_holder = std::move(mutable_holder);
- return;
- }
-
- throw Exception("Illegal type " + column.getName() + " of column for filter. Must be UInt8 or Nullable(UInt8) or Const variants of them.",
- ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
-}
-
-}
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+#include <Columns/FilterDescription.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnConst.h>
+#include <Core/ColumnWithTypeAndName.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
+}
+
+
+ConstantFilterDescription::ConstantFilterDescription(const IColumn & column)
+{
+ if (column.onlyNull())
+ {
+ always_false = true;
+ return;
+ }
+
+ if (isColumnConst(column))
+ {
+ const ColumnConst & column_const = assert_cast<const ColumnConst &>(column);
+ ColumnPtr column_nested = column_const.getDataColumnPtr()->convertToFullColumnIfLowCardinality();
+
+ if (!typeid_cast<const ColumnUInt8 *>(column_nested.get()))
+ {
+ const ColumnNullable * column_nested_nullable = checkAndGetColumn<ColumnNullable>(*column_nested);
+ if (!column_nested_nullable || !typeid_cast<const ColumnUInt8 *>(&column_nested_nullable->getNestedColumn()))
+ {
+ throw Exception("Illegal type " + column_nested->getName() + " of column for constant filter. Must be UInt8 or Nullable(UInt8).",
+ ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
+ }
+ }
+
+ if (column_const.getValue<UInt64>())
+ always_true = true;
+ else
+ always_false = true;
+ return;
+ }
+}
+
+
+FilterDescription::FilterDescription(const IColumn & column_)
+{
+ if (column_.lowCardinality())
+ data_holder = column_.convertToFullColumnIfLowCardinality();
+
+ const auto & column = data_holder ? *data_holder : column_;
+
+ if (const ColumnUInt8 * concrete_column = typeid_cast<const ColumnUInt8 *>(&column))
+ {
+ data = &concrete_column->getData();
+ return;
+ }
+
+ if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(column))
+ {
+ ColumnPtr nested_column = nullable_column->getNestedColumnPtr();
+ MutableColumnPtr mutable_holder = IColumn::mutate(std::move(nested_column));
+
+ ColumnUInt8 * concrete_column = typeid_cast<ColumnUInt8 *>(mutable_holder.get());
+ if (!concrete_column)
+ throw Exception("Illegal type " + column.getName() + " of column for filter. Must be UInt8 or Nullable(UInt8).",
+ ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
+
+ const NullMap & null_map = nullable_column->getNullMapData();
+ IColumn::Filter & res = concrete_column->getData();
+
+ size_t size = res.size();
+ for (size_t i = 0; i < size; ++i)
+ res[i] = res[i] && !null_map[i];
+
+ data = &res;
+ data_holder = std::move(mutable_holder);
+ return;
+ }
+
+ throw Exception("Illegal type " + column.getName() + " of column for filter. Must be UInt8 or Nullable(UInt8) or Const variants of them.",
+ ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h
index 05812fea28..13f04fdd7a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Columns/FilterDescription.h
@@ -1,35 +1,35 @@
-#pragma once
-
-#include <Columns/IColumn.h>
-
-
-namespace DB
-{
-
-/// Support methods for implementation of WHERE, PREWHERE and HAVING.
-
-
-/// Analyze if the column for filter is constant thus filter is always false or always true.
-struct ConstantFilterDescription
-{
- bool always_false = false;
- bool always_true = false;
-
- ConstantFilterDescription() {}
- explicit ConstantFilterDescription(const IColumn & column);
-};
-
-
-/// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8).
-struct FilterDescription
-{
- const IColumn::Filter * data = nullptr; /// Pointer to filter when it is not always true or always false.
- ColumnPtr data_holder; /// If new column was generated, it will be owned by holder.
-
- explicit FilterDescription(const IColumn & column);
-};
-
-
-struct ColumnWithTypeAndName;
-
-}
+#pragma once
+
+#include <Columns/IColumn.h>
+
+
+namespace DB
+{
+
+/// Support methods for implementation of WHERE, PREWHERE and HAVING.
+
+
+/// Analyze if the column for filter is constant thus filter is always false or always true.
+struct ConstantFilterDescription
+{
+ bool always_false = false;
+ bool always_true = false;
+
+ ConstantFilterDescription() {}
+ explicit ConstantFilterDescription(const IColumn & column);
+};
+
+
+/// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8).
+struct FilterDescription
+{
+ const IColumn::Filter * data = nullptr; /// Pointer to filter when it is not always true or always false.
+ ColumnPtr data_holder; /// If new column was generated, it will be owned by holder.
+
+ explicit FilterDescription(const IColumn & column);
+};
+
+
+struct ColumnWithTypeAndName;
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp
index e38856a75e..f967c02f9e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.cpp
@@ -1,7 +1,7 @@
-#include <Common/ClickHouseRevision.h>
-
-namespace ClickHouseRevision
-{
+#include <Common/ClickHouseRevision.h>
+
+namespace ClickHouseRevision
+{
unsigned getVersionRevision() { return 0; }
unsigned getVersionInteger() { return 0; }
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h
index 86d1e3db33..5d90422c56 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ClickHouseRevision.h
@@ -1,7 +1,7 @@
-#pragma once
-
-namespace ClickHouseRevision
-{
+#pragma once
+
+namespace ClickHouseRevision
+{
unsigned getVersionRevision();
- unsigned getVersionInteger();
-}
+ unsigned getVersionInteger();
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h
index 3ffa9bb71d..575c2b2b05 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h
@@ -1,460 +1,460 @@
-#pragma once
-
-#include <Common/HashTable/HashTable.h>
-#include <Common/HashTable/HashTableKeyHolder.h>
-#include <Common/ColumnsHashingImpl.h>
-#include <Common/Arena.h>
-#include <Common/LRUCache.h>
-#include <Common/assert_cast.h>
-#include <common/unaligned.h>
-
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnLowCardinality.h>
-
-#include <Core/Defines.h>
-#include <memory>
+#pragma once
+
+#include <Common/HashTable/HashTable.h>
+#include <Common/HashTable/HashTableKeyHolder.h>
+#include <Common/ColumnsHashingImpl.h>
+#include <Common/Arena.h>
+#include <Common/LRUCache.h>
+#include <Common/assert_cast.h>
+#include <common/unaligned.h>
+
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnLowCardinality.h>
+
+#include <Core/Defines.h>
+#include <memory>
#include <cassert>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-namespace ColumnsHashing
-{
-
-/// For the case when there is one numeric key.
-/// UInt8/16/32/64 for any type with corresponding bit width.
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+namespace ColumnsHashing
+{
+
+/// For the case when there is one numeric key.
+/// UInt8/16/32/64 for any type with corresponding bit width.
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false>
-struct HashMethodOneNumber
+struct HashMethodOneNumber
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
+{
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
-
- const char * vec;
-
- /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
- HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
- {
- vec = key_columns[0]->getRawData().data;
- }
-
- HashMethodOneNumber(const IColumn * column)
- {
- vec = column->getRawData().data;
- }
-
- /// Creates context. Method is called once and result context is used in all threads.
- using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr
-
- /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr.
- /// Data is a HashTable where to insert key from column's row.
- /// For Serialized method, key may be placed in pool.
- using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult
-
- /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr.
- using Base::findKey; /// (Data & data, size_t row, Arena & pool) -> FindResult
-
- /// Get hash value of row.
- using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t
-
- /// Is used for default implementation in HashMethodBase.
- FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); }
+
+ const char * vec;
+
+ /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
+ HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
+ {
+ vec = key_columns[0]->getRawData().data;
+ }
+
+ HashMethodOneNumber(const IColumn * column)
+ {
+ vec = column->getRawData().data;
+ }
+
+ /// Creates context. Method is called once and result context is used in all threads.
+ using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr
+
+ /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr.
+ /// Data is a HashTable where to insert key from column's row.
+ /// For Serialized method, key may be placed in pool.
+ using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult
+
+ /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr.
+ using Base::findKey; /// (Data & data, size_t row, Arena & pool) -> FindResult
+
+ /// Get hash value of row.
+ using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t
+
+ /// Is used for default implementation in HashMethodBase.
+ FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); }
const FieldType * getKeyData() const { return reinterpret_cast<const FieldType *>(vec); }
-};
-
-
-/// For the case when there is one string key.
+};
+
+
+/// For the case when there is one string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
-struct HashMethodString
+struct HashMethodString
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
+{
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
-
- const IColumn::Offset * offsets;
- const UInt8 * chars;
-
- HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
- {
- const IColumn & column = *key_columns[0];
- const ColumnString & column_string = assert_cast<const ColumnString &>(column);
- offsets = column_string.getOffsets().data();
- chars = column_string.getChars().data();
- }
-
- auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const
- {
- StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
-
- if constexpr (place_string_to_arena)
- {
- return ArenaKeyHolder{key, pool};
- }
- else
- {
- return key;
- }
- }
-
-protected:
- friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
-};
-
-
-/// For the case when there is one fixed-length string key.
+
+ const IColumn::Offset * offsets;
+ const UInt8 * chars;
+
+ HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
+ {
+ const IColumn & column = *key_columns[0];
+ const ColumnString & column_string = assert_cast<const ColumnString &>(column);
+ offsets = column_string.getOffsets().data();
+ chars = column_string.getChars().data();
+ }
+
+ auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const
+ {
+ StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
+
+ if constexpr (place_string_to_arena)
+ {
+ return ArenaKeyHolder{key, pool};
+ }
+ else
+ {
+ return key;
+ }
+ }
+
+protected:
+ friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+};
+
+
+/// For the case when there is one fixed-length string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
-struct HashMethodFixedString
+struct HashMethodFixedString
: public columns_hashing_impl::
HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
+{
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
-
- size_t n;
- const ColumnFixedString::Chars * chars;
-
- HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
- {
- const IColumn & column = *key_columns[0];
- const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column);
- n = column_string.getN();
- chars = &column_string.getChars();
- }
-
- auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const
- {
- StringRef key(&(*chars)[row * n], n);
-
- if constexpr (place_string_to_arena)
- {
- return ArenaKeyHolder{key, pool};
- }
- else
- {
- return key;
- }
- }
-
-protected:
- friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
-};
-
-
-/// Cache stores dictionaries and saved_hash per dictionary key.
-class LowCardinalityDictionaryCache : public HashMethodContext
-{
-public:
- /// Will assume that dictionaries with same hash has the same keys.
- /// Just in case, check that they have also the same size.
- struct DictionaryKey
- {
- UInt128 hash;
- UInt64 size;
-
- bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; }
- };
-
- struct DictionaryKeyHash
- {
- size_t operator()(const DictionaryKey & key) const
- {
- SipHash hash;
+
+ size_t n;
+ const ColumnFixedString::Chars * chars;
+
+ HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
+ {
+ const IColumn & column = *key_columns[0];
+ const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column);
+ n = column_string.getN();
+ chars = &column_string.getChars();
+ }
+
+ auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const
+ {
+ StringRef key(&(*chars)[row * n], n);
+
+ if constexpr (place_string_to_arena)
+ {
+ return ArenaKeyHolder{key, pool};
+ }
+ else
+ {
+ return key;
+ }
+ }
+
+protected:
+ friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+};
+
+
+/// Cache stores dictionaries and saved_hash per dictionary key.
+class LowCardinalityDictionaryCache : public HashMethodContext
+{
+public:
+ /// Will assume that dictionaries with same hash has the same keys.
+ /// Just in case, check that they have also the same size.
+ struct DictionaryKey
+ {
+ UInt128 hash;
+ UInt64 size;
+
+ bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; }
+ };
+
+ struct DictionaryKeyHash
+ {
+ size_t operator()(const DictionaryKey & key) const
+ {
+ SipHash hash;
hash.update(key.hash);
- hash.update(key.size);
- return hash.get64();
- }
- };
-
- struct CachedValues
- {
- /// Store ptr to dictionary to be sure it won't be deleted.
- ColumnPtr dictionary_holder;
- /// Hashes for dictionary keys.
- const UInt64 * saved_hash = nullptr;
- };
-
- using CachedValuesPtr = std::shared_ptr<CachedValues>;
-
- explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {}
-
- CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); }
- void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); }
-
-private:
- using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>;
- Cache cache;
-};
-
-
-/// Single low cardinality column.
-template <typename SingleColumnMethod, typename Mapped, bool use_cache>
-struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
-{
- using Base = SingleColumnMethod;
-
- enum class VisitValue
- {
- Empty = 0,
- Found = 1,
- NotFound = 2,
- };
-
- static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
- using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>;
- using FindResult = columns_hashing_impl::FindResultImpl<Mapped>;
-
- static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings)
- {
- return std::make_shared<LowCardinalityDictionaryCache>(settings);
- }
-
- ColumnRawPtrs key_columns;
- const IColumn * positions = nullptr;
- size_t size_of_index_type = 0;
-
- /// saved hash is from current column or from cache.
- const UInt64 * saved_hash = nullptr;
- /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted.
- ColumnPtr dictionary_holder;
-
- /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages.
- columns_hashing_impl::MappedCache<Mapped> mapped_cache;
- PaddedPODArray<VisitValue> visit_cache;
-
- /// If initialized column is nullable.
- bool is_nullable = false;
-
- static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * column)
- {
- auto low_cardinality_column = typeid_cast<const ColumnLowCardinality *>(column);
- if (!low_cardinality_column)
- throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. "
- "Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR);
- return *low_cardinality_column;
- }
-
- HashMethodSingleLowCardinalityColumn(
- const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context)
- : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context)
- {
- auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]);
-
- if (!context)
- throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn",
- ErrorCodes::LOGICAL_ERROR);
-
- LowCardinalityDictionaryCache * lcd_cache;
- if constexpr (use_cache)
- {
- lcd_cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get());
- if (!lcd_cache)
- {
- const auto & cached_val = *context;
- throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: "
- + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR);
- }
- }
-
- auto * dict = column->getDictionary().getNestedNotNullableColumn().get();
- is_nullable = column->getDictionary().nestedColumnIsNullable();
- key_columns = {dict};
- bool is_shared_dict = column->isSharedDictionary();
-
- typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key;
- typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values;
-
- if (is_shared_dict)
- {
- dictionary_key = {column->getDictionary().getHash(), dict->size()};
- if constexpr (use_cache)
- cached_values = lcd_cache->get(dictionary_key);
- }
-
- if (cached_values)
- {
- saved_hash = cached_values->saved_hash;
- dictionary_holder = cached_values->dictionary_holder;
- }
- else
- {
- saved_hash = column->getDictionary().tryGetSavedHash();
- dictionary_holder = column->getDictionaryPtr();
-
- if constexpr (use_cache)
- {
- if (is_shared_dict)
- {
- cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>();
- cached_values->saved_hash = saved_hash;
- cached_values->dictionary_holder = dictionary_holder;
-
- lcd_cache->set(dictionary_key, cached_values);
- }
- }
- }
-
- if constexpr (has_mapped)
- mapped_cache.resize(key_columns[0]->size());
-
- VisitValue empty(VisitValue::Empty);
- visit_cache.assign(key_columns[0]->size(), empty);
-
- size_of_index_type = column->getSizeOfIndexType();
- positions = column->getIndexesPtr().get();
- }
-
- ALWAYS_INLINE size_t getIndexAt(size_t row) const
- {
- switch (size_of_index_type)
- {
- case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(positions)->getElement(row);
- case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(positions)->getElement(row);
- case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(positions)->getElement(row);
- case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(positions)->getElement(row);
- default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
- }
- }
-
- /// Get the key holder from the key columns for insertion into the hash table.
- ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const
- {
- return Base::getKeyHolder(getIndexAt(row), pool);
- }
-
- template <typename Data>
- ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool)
- {
- size_t row = getIndexAt(row_);
-
- if (is_nullable && row == 0)
- {
- visit_cache[row] = VisitValue::Found;
- bool has_null_key = data.hasNullKeyData();
- data.hasNullKeyData() = true;
-
- if constexpr (has_mapped)
- return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key);
- else
- return EmplaceResult(!has_null_key);
- }
-
- if (visit_cache[row] == VisitValue::Found)
- {
- if constexpr (has_mapped)
- return EmplaceResult(mapped_cache[row], mapped_cache[row], false);
- else
- return EmplaceResult(false);
- }
-
- auto key_holder = getKeyHolder(row_, pool);
-
- bool inserted = false;
- typename Data::LookupResult it;
- if (saved_hash)
- data.emplace(key_holder, it, inserted, saved_hash[row]);
- else
- data.emplace(key_holder, it, inserted);
-
- visit_cache[row] = VisitValue::Found;
-
- if constexpr (has_mapped)
- {
- auto & mapped = it->getMapped();
- if (inserted)
- {
- new (&mapped) Mapped();
- }
- mapped_cache[row] = mapped;
- return EmplaceResult(mapped, mapped_cache[row], inserted);
- }
- else
- return EmplaceResult(inserted);
- }
-
- ALWAYS_INLINE bool isNullAt(size_t i)
- {
- if (!is_nullable)
- return false;
-
- return getIndexAt(i) == 0;
- }
-
- template <typename Data>
- ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool)
- {
- size_t row = getIndexAt(row_);
-
- if (is_nullable && row == 0)
- {
- if constexpr (has_mapped)
- return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData());
- else
- return FindResult(data.hasNullKeyData());
- }
-
- if (visit_cache[row] != VisitValue::Empty)
- {
- if constexpr (has_mapped)
- return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found);
- else
- return FindResult(visit_cache[row] == VisitValue::Found);
- }
-
- auto key_holder = getKeyHolder(row_, pool);
-
- typename Data::iterator it;
- if (saved_hash)
- it = data.find(*key_holder, saved_hash[row]);
- else
- it = data.find(*key_holder);
-
- bool found = it != data.end();
- visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
-
- if constexpr (has_mapped)
- {
- if (found)
- mapped_cache[row] = it->second;
- }
-
- if constexpr (has_mapped)
- return FindResult(&mapped_cache[row], found);
- else
- return FindResult(found);
- }
-
- template <typename Data>
- ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
- {
- row = getIndexAt(row);
- if (saved_hash)
- return saved_hash[row];
-
- return Base::getHash(data, row, pool);
- }
-};
-
-
-// Optional mask for low cardinality columns.
-template <bool has_low_cardinality>
-struct LowCardinalityKeys
-{
- ColumnRawPtrs nested_columns;
- ColumnRawPtrs positions;
- Sizes position_sizes;
-};
-
-template <>
-struct LowCardinalityKeys<false> {};
-
-/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
+ hash.update(key.size);
+ return hash.get64();
+ }
+ };
+
+ struct CachedValues
+ {
+ /// Store ptr to dictionary to be sure it won't be deleted.
+ ColumnPtr dictionary_holder;
+ /// Hashes for dictionary keys.
+ const UInt64 * saved_hash = nullptr;
+ };
+
+ using CachedValuesPtr = std::shared_ptr<CachedValues>;
+
+ explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {}
+
+ CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); }
+ void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); }
+
+private:
+ using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>;
+ Cache cache;
+};
+
+
+/// Single low cardinality column.
+template <typename SingleColumnMethod, typename Mapped, bool use_cache>
+struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
+{
+ using Base = SingleColumnMethod;
+
+ enum class VisitValue
+ {
+ Empty = 0,
+ Found = 1,
+ NotFound = 2,
+ };
+
+ static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
+ using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>;
+ using FindResult = columns_hashing_impl::FindResultImpl<Mapped>;
+
+ static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings)
+ {
+ return std::make_shared<LowCardinalityDictionaryCache>(settings);
+ }
+
+ ColumnRawPtrs key_columns;
+ const IColumn * positions = nullptr;
+ size_t size_of_index_type = 0;
+
+ /// saved hash is from current column or from cache.
+ const UInt64 * saved_hash = nullptr;
+ /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted.
+ ColumnPtr dictionary_holder;
+
+ /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages.
+ columns_hashing_impl::MappedCache<Mapped> mapped_cache;
+ PaddedPODArray<VisitValue> visit_cache;
+
+ /// If initialized column is nullable.
+ bool is_nullable = false;
+
+ static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * column)
+ {
+ auto low_cardinality_column = typeid_cast<const ColumnLowCardinality *>(column);
+ if (!low_cardinality_column)
+ throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. "
+ "Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR);
+ return *low_cardinality_column;
+ }
+
+ HashMethodSingleLowCardinalityColumn(
+ const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context)
+ : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context)
+ {
+ auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]);
+
+ if (!context)
+ throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn",
+ ErrorCodes::LOGICAL_ERROR);
+
+ LowCardinalityDictionaryCache * lcd_cache;
+ if constexpr (use_cache)
+ {
+ lcd_cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get());
+ if (!lcd_cache)
+ {
+ const auto & cached_val = *context;
+ throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: "
+ + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR);
+ }
+ }
+
+ auto * dict = column->getDictionary().getNestedNotNullableColumn().get();
+ is_nullable = column->getDictionary().nestedColumnIsNullable();
+ key_columns = {dict};
+ bool is_shared_dict = column->isSharedDictionary();
+
+ typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key;
+ typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values;
+
+ if (is_shared_dict)
+ {
+ dictionary_key = {column->getDictionary().getHash(), dict->size()};
+ if constexpr (use_cache)
+ cached_values = lcd_cache->get(dictionary_key);
+ }
+
+ if (cached_values)
+ {
+ saved_hash = cached_values->saved_hash;
+ dictionary_holder = cached_values->dictionary_holder;
+ }
+ else
+ {
+ saved_hash = column->getDictionary().tryGetSavedHash();
+ dictionary_holder = column->getDictionaryPtr();
+
+ if constexpr (use_cache)
+ {
+ if (is_shared_dict)
+ {
+ cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>();
+ cached_values->saved_hash = saved_hash;
+ cached_values->dictionary_holder = dictionary_holder;
+
+ lcd_cache->set(dictionary_key, cached_values);
+ }
+ }
+ }
+
+ if constexpr (has_mapped)
+ mapped_cache.resize(key_columns[0]->size());
+
+ VisitValue empty(VisitValue::Empty);
+ visit_cache.assign(key_columns[0]->size(), empty);
+
+ size_of_index_type = column->getSizeOfIndexType();
+ positions = column->getIndexesPtr().get();
+ }
+
+ ALWAYS_INLINE size_t getIndexAt(size_t row) const
+ {
+ switch (size_of_index_type)
+ {
+ case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(positions)->getElement(row);
+ case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(positions)->getElement(row);
+ case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(positions)->getElement(row);
+ case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(positions)->getElement(row);
+ default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
+ }
+ }
+
+ /// Get the key holder from the key columns for insertion into the hash table.
+ ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const
+ {
+ return Base::getKeyHolder(getIndexAt(row), pool);
+ }
+
+ template <typename Data>
+ ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool)
+ {
+ size_t row = getIndexAt(row_);
+
+ if (is_nullable && row == 0)
+ {
+ visit_cache[row] = VisitValue::Found;
+ bool has_null_key = data.hasNullKeyData();
+ data.hasNullKeyData() = true;
+
+ if constexpr (has_mapped)
+ return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key);
+ else
+ return EmplaceResult(!has_null_key);
+ }
+
+ if (visit_cache[row] == VisitValue::Found)
+ {
+ if constexpr (has_mapped)
+ return EmplaceResult(mapped_cache[row], mapped_cache[row], false);
+ else
+ return EmplaceResult(false);
+ }
+
+ auto key_holder = getKeyHolder(row_, pool);
+
+ bool inserted = false;
+ typename Data::LookupResult it;
+ if (saved_hash)
+ data.emplace(key_holder, it, inserted, saved_hash[row]);
+ else
+ data.emplace(key_holder, it, inserted);
+
+ visit_cache[row] = VisitValue::Found;
+
+ if constexpr (has_mapped)
+ {
+ auto & mapped = it->getMapped();
+ if (inserted)
+ {
+ new (&mapped) Mapped();
+ }
+ mapped_cache[row] = mapped;
+ return EmplaceResult(mapped, mapped_cache[row], inserted);
+ }
+ else
+ return EmplaceResult(inserted);
+ }
+
+ ALWAYS_INLINE bool isNullAt(size_t i)
+ {
+ if (!is_nullable)
+ return false;
+
+ return getIndexAt(i) == 0;
+ }
+
+ template <typename Data>
+ ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool)
+ {
+ size_t row = getIndexAt(row_);
+
+ if (is_nullable && row == 0)
+ {
+ if constexpr (has_mapped)
+ return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData());
+ else
+ return FindResult(data.hasNullKeyData());
+ }
+
+ if (visit_cache[row] != VisitValue::Empty)
+ {
+ if constexpr (has_mapped)
+ return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found);
+ else
+ return FindResult(visit_cache[row] == VisitValue::Found);
+ }
+
+ auto key_holder = getKeyHolder(row_, pool);
+
+ typename Data::iterator it;
+ if (saved_hash)
+ it = data.find(*key_holder, saved_hash[row]);
+ else
+ it = data.find(*key_holder);
+
+ bool found = it != data.end();
+ visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
+
+ if constexpr (has_mapped)
+ {
+ if (found)
+ mapped_cache[row] = it->second;
+ }
+
+ if constexpr (has_mapped)
+ return FindResult(&mapped_cache[row], found);
+ else
+ return FindResult(found);
+ }
+
+ template <typename Data>
+ ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
+ {
+ row = getIndexAt(row);
+ if (saved_hash)
+ return saved_hash[row];
+
+ return Base::getHash(data, row, pool);
+ }
+};
+
+
+// Optional mask for low cardinality columns.
+template <bool has_low_cardinality>
+struct LowCardinalityKeys
+{
+ ColumnRawPtrs nested_columns;
+ ColumnRawPtrs positions;
+ Sizes position_sizes;
+};
+
+template <>
+struct LowCardinalityKeys<false> {};
+
+/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
template <
typename Value,
typename Key,
@@ -463,21 +463,21 @@ template <
bool has_low_cardinality_ = false,
bool use_cache = true,
bool need_offset = false>
-struct HashMethodKeysFixed
- : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
+struct HashMethodKeysFixed
+ : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
, public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
+{
using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>;
using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
- using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
-
- static constexpr bool has_nullable_keys = has_nullable_keys_;
- static constexpr bool has_low_cardinality = has_low_cardinality_;
-
- LowCardinalityKeys<has_low_cardinality> low_cardinality_keys;
- Sizes key_sizes;
- size_t keys_size;
-
+ using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
+
+ static constexpr bool has_nullable_keys = has_nullable_keys_;
+ static constexpr bool has_low_cardinality = has_low_cardinality_;
+
+ LowCardinalityKeys<has_low_cardinality> low_cardinality_keys;
+ Sizes key_sizes;
+ size_t keys_size;
+
/// SSSE3 shuffle method can be used. Shuffle masks will be calculated and stored here.
#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
std::unique_ptr<uint8_t[]> masks;
@@ -498,26 +498,26 @@ struct HashMethodKeysFixed
return true;
}
- HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &)
- : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size())
- {
- if constexpr (has_low_cardinality)
- {
- low_cardinality_keys.nested_columns.resize(key_columns.size());
- low_cardinality_keys.positions.assign(key_columns.size(), nullptr);
- low_cardinality_keys.position_sizes.resize(key_columns.size());
- for (size_t i = 0; i < key_columns.size(); ++i)
- {
- if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
- {
- low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get();
- low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes();
- low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType();
- }
- else
- low_cardinality_keys.nested_columns[i] = key_columns[i];
- }
- }
+ HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &)
+ : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size())
+ {
+ if constexpr (has_low_cardinality)
+ {
+ low_cardinality_keys.nested_columns.resize(key_columns.size());
+ low_cardinality_keys.positions.assign(key_columns.size(), nullptr);
+ low_cardinality_keys.position_sizes.resize(key_columns.size());
+ for (size_t i = 0; i < key_columns.size(); ++i)
+ {
+ if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
+ {
+ low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get();
+ low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes();
+ low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType();
+ }
+ else
+ low_cardinality_keys.nested_columns[i] = key_columns[i];
+ }
+ }
if (usePreparedKeys(key_sizes))
{
@@ -575,21 +575,21 @@ struct HashMethodKeysFixed
columns_data[i] = Base::getActualColumns()[i]->getRawData().data;
}
#endif
- }
-
- ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
- {
- if constexpr (has_nullable_keys)
- {
- auto bitmap = Base::createBitmap(row);
- return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap);
- }
- else
- {
- if constexpr (has_low_cardinality)
- return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes,
- &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
-
+ }
+
+ ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
+ {
+ if constexpr (has_nullable_keys)
+ {
+ auto bitmap = Base::createBitmap(row);
+ return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap);
+ }
+ else
+ {
+ if constexpr (has_low_cardinality)
+ return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes,
+ &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
+
if (!prepared_keys.empty())
return prepared_keys[row];
@@ -600,9 +600,9 @@ struct HashMethodKeysFixed
return packFixedShuffle<Key>(columns_data.get(), keys_size, key_sizes.data(), row, masks.get());
}
#endif
- return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes);
- }
- }
+ return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes);
+ }
+ }
static std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> & key_columns, const Sizes & key_sizes)
{
@@ -634,56 +634,56 @@ struct HashMethodKeysFixed
key_columns.swap(new_columns);
return new_sizes;
}
-};
-
-/** Hash by concatenating serialized key values.
- * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
- * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
- * Therefore, when aggregating by several strings, there is no ambiguity.
- */
-template <typename Value, typename Mapped>
-struct HashMethodSerialized
- : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false>
-{
- using Self = HashMethodSerialized<Value, Mapped>;
- using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
-
- ColumnRawPtrs key_columns;
- size_t keys_size;
-
- HashMethodSerialized(const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
- : key_columns(key_columns_), keys_size(key_columns_.size()) {}
-
-protected:
- friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
-
- ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const
- {
- return SerializedKeyHolder{
- serializeKeysToPoolContiguous(row, keys_size, key_columns, pool),
- pool};
- }
-};
-
-/// For the case when there is one string key.
+};
+
+/** Hash by concatenating serialized key values.
+ * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
+ * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
+ * Therefore, when aggregating by several strings, there is no ambiguity.
+ */
+template <typename Value, typename Mapped>
+struct HashMethodSerialized
+ : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false>
+{
+ using Self = HashMethodSerialized<Value, Mapped>;
+ using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
+
+ ColumnRawPtrs key_columns;
+ size_t keys_size;
+
+ HashMethodSerialized(const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
+ : key_columns(key_columns_), keys_size(key_columns_.size()) {}
+
+protected:
+ friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
+
+ ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const
+ {
+ return SerializedKeyHolder{
+ serializeKeysToPoolContiguous(row, keys_size, key_columns, pool),
+ pool};
+ }
+};
+
+/// For the case when there is one string key.
template <typename Value, typename Mapped, bool use_cache = true, bool need_offset = false>
-struct HashMethodHashed
+struct HashMethodHashed
: public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
- using Key = UInt128;
+{
+ using Key = UInt128;
using Self = HashMethodHashed<Value, Mapped, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
-
- ColumnRawPtrs key_columns;
-
- HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &)
- : key_columns(std::move(key_columns_)) {}
-
- ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
- {
- return hash128(row, key_columns.size(), key_columns);
- }
-};
-
-}
-}
+
+ ColumnRawPtrs key_columns;
+
+ HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &)
+ : key_columns(std::move(key_columns_)) {}
+
+ ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
+ {
+ return hash128(row, key_columns.size(), key_columns);
+ }
+};
+
+}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h
index aa7ae6ea29..f6ed2cd05e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h
@@ -1,104 +1,104 @@
-#pragma once
-
-#include <Columns/IColumn.h>
+#pragma once
+
+#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
-#include <Common/assert_cast.h>
-#include <Common/HashTable/HashTableKeyHolder.h>
-#include <Interpreters/AggregationCommon.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-namespace ColumnsHashing
-{
-
-/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe.
-/// Is used for caching.
-class HashMethodContext
-{
-public:
- virtual ~HashMethodContext() = default;
-
- struct Settings
- {
- size_t max_threads;
- };
-};
-
-using HashMethodContextPtr = std::shared_ptr<HashMethodContext>;
-
-
-namespace columns_hashing_impl
-{
-
-template <typename Value, bool consecutive_keys_optimization_>
-struct LastElementCache
-{
- static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
- Value value;
- bool empty = true;
- bool found = false;
-
- bool check(const Value & value_) { return !empty && value == value_; }
-
- template <typename Key>
- bool check(const Key & key) { return !empty && value.first == key; }
-};
-
-template <typename Data>
-struct LastElementCache<Data, false>
-{
- static constexpr bool consecutive_keys_optimization = false;
-};
-
-template <typename Mapped>
-class EmplaceResultImpl
-{
- Mapped & value;
- Mapped & cached_value;
- bool inserted;
-
-public:
- EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_)
- : value(value_), cached_value(cached_value_), inserted(inserted_) {}
-
- bool isInserted() const { return inserted; }
- auto & getMapped() const { return value; }
-
- void setMapped(const Mapped & mapped)
- {
- cached_value = mapped;
- value = mapped;
- }
-};
-
-template <>
-class EmplaceResultImpl<void>
-{
- bool inserted;
-
-public:
- explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {}
- bool isInserted() const { return inserted; }
-};
-
+#include <Common/assert_cast.h>
+#include <Common/HashTable/HashTableKeyHolder.h>
+#include <Interpreters/AggregationCommon.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+namespace ColumnsHashing
+{
+
+/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe.
+/// Is used for caching.
+class HashMethodContext
+{
+public:
+ virtual ~HashMethodContext() = default;
+
+ struct Settings
+ {
+ size_t max_threads;
+ };
+};
+
+using HashMethodContextPtr = std::shared_ptr<HashMethodContext>;
+
+
+namespace columns_hashing_impl
+{
+
+template <typename Value, bool consecutive_keys_optimization_>
+struct LastElementCache
+{
+ static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
+ Value value;
+ bool empty = true;
+ bool found = false;
+
+ bool check(const Value & value_) { return !empty && value == value_; }
+
+ template <typename Key>
+ bool check(const Key & key) { return !empty && value.first == key; }
+};
+
+template <typename Data>
+struct LastElementCache<Data, false>
+{
+ static constexpr bool consecutive_keys_optimization = false;
+};
+
+template <typename Mapped>
+class EmplaceResultImpl
+{
+ Mapped & value;
+ Mapped & cached_value;
+ bool inserted;
+
+public:
+ EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_)
+ : value(value_), cached_value(cached_value_), inserted(inserted_) {}
+
+ bool isInserted() const { return inserted; }
+ auto & getMapped() const { return value; }
+
+ void setMapped(const Mapped & mapped)
+ {
+ cached_value = mapped;
+ value = mapped;
+ }
+};
+
+template <>
+class EmplaceResultImpl<void>
+{
+ bool inserted;
+
+public:
+ explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {}
+ bool isInserted() const { return inserted; }
+};
+
/// FindResult optionally may contain pointer to value and offset in hashtable buffer.
/// Only bool found is required.
/// So we will have 4 different specializations for FindResultImpl
class FindResultImplBase
-{
- bool found;
-
-public:
+{
+ bool found;
+
+public:
explicit FindResultImplBase(bool found_) : found(found_) {}
- bool isFound() const { return found; }
-};
-
+ bool isFound() const { return found; }
+};
+
template <bool need_offset = false>
class FindResultImplOffsetBase
{
@@ -107,13 +107,13 @@ public:
explicit FindResultImplOffsetBase(size_t /* off */) {}
};
-template <>
+template <>
class FindResultImplOffsetBase<true>
-{
+{
size_t offset;
public:
constexpr static bool has_offset = true;
-
+
explicit FindResultImplOffsetBase(size_t off) : offset(off) {}
ALWAYS_INLINE size_t getOffset() const { return offset; }
};
@@ -123,7 +123,7 @@ class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBas
{
Mapped * value;
-public:
+public:
FindResultImpl()
: FindResultImplBase(false), FindResultImplOffsetBase<need_offset>(0)
{}
@@ -131,8 +131,8 @@ public:
FindResultImpl(Mapped * value_, bool found_, size_t off)
: FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off), value(value_) {}
Mapped & getMapped() const { return *value; }
-};
-
+};
+
template <bool need_offset>
class FindResultImpl<void, need_offset> : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
{
@@ -141,254 +141,254 @@ public:
};
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false>
-class HashMethodBase
-{
-public:
- using EmplaceResult = EmplaceResultImpl<Mapped>;
+class HashMethodBase
+{
+public:
+ using EmplaceResult = EmplaceResultImpl<Mapped>;
using FindResult = FindResultImpl<Mapped, need_offset>;
- static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
- using Cache = LastElementCache<Value, consecutive_keys_optimization>;
-
- static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; }
-
- template <typename Data>
- ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
- {
- auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
- return emplaceImpl(key_holder, data);
- }
-
- template <typename Data>
- ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
- {
- auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
- return findKeyImpl(keyHolderGetKey(key_holder), data);
- }
-
- template <typename Data>
- ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
- {
- auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
- return data.hash(keyHolderGetKey(key_holder));
- }
-
-protected:
- Cache cache;
-
- HashMethodBase()
- {
- if constexpr (consecutive_keys_optimization)
- {
- if constexpr (has_mapped)
- {
- /// Init PairNoInit elements.
- cache.value.second = Mapped();
- cache.value.first = {};
- }
- else
- cache.value = Value();
- }
- }
-
- template <typename Data, typename KeyHolder>
- ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data)
- {
- if constexpr (Cache::consecutive_keys_optimization)
- {
- if (cache.found && cache.check(keyHolderGetKey(key_holder)))
- {
- if constexpr (has_mapped)
- return EmplaceResult(cache.value.second, cache.value.second, false);
- else
- return EmplaceResult(false);
- }
- }
-
- typename Data::LookupResult it;
- bool inserted = false;
- data.emplace(key_holder, it, inserted);
-
- [[maybe_unused]] Mapped * cached = nullptr;
- if constexpr (has_mapped)
- cached = &it->getMapped();
-
- if (inserted)
- {
- if constexpr (has_mapped)
- {
- new (&it->getMapped()) Mapped();
- }
- }
-
- if constexpr (consecutive_keys_optimization)
- {
- cache.found = true;
- cache.empty = false;
-
- if constexpr (has_mapped)
- {
- cache.value.first = it->getKey();
- cache.value.second = it->getMapped();
- cached = &cache.value.second;
- }
- else
- {
- cache.value = it->getKey();
- }
- }
-
- if constexpr (has_mapped)
- return EmplaceResult(it->getMapped(), *cached, inserted);
- else
- return EmplaceResult(inserted);
- }
-
- template <typename Data, typename Key>
- ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data)
- {
- if constexpr (Cache::consecutive_keys_optimization)
- {
+ static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
+ using Cache = LastElementCache<Value, consecutive_keys_optimization>;
+
+ static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; }
+
+ template <typename Data>
+ ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
+ {
+ auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
+ return emplaceImpl(key_holder, data);
+ }
+
+ template <typename Data>
+ ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
+ {
+ auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
+ return findKeyImpl(keyHolderGetKey(key_holder), data);
+ }
+
+ template <typename Data>
+ ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
+ {
+ auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
+ return data.hash(keyHolderGetKey(key_holder));
+ }
+
+protected:
+ Cache cache;
+
+ HashMethodBase()
+ {
+ if constexpr (consecutive_keys_optimization)
+ {
+ if constexpr (has_mapped)
+ {
+ /// Init PairNoInit elements.
+ cache.value.second = Mapped();
+ cache.value.first = {};
+ }
+ else
+ cache.value = Value();
+ }
+ }
+
+ template <typename Data, typename KeyHolder>
+ ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data)
+ {
+ if constexpr (Cache::consecutive_keys_optimization)
+ {
+ if (cache.found && cache.check(keyHolderGetKey(key_holder)))
+ {
+ if constexpr (has_mapped)
+ return EmplaceResult(cache.value.second, cache.value.second, false);
+ else
+ return EmplaceResult(false);
+ }
+ }
+
+ typename Data::LookupResult it;
+ bool inserted = false;
+ data.emplace(key_holder, it, inserted);
+
+ [[maybe_unused]] Mapped * cached = nullptr;
+ if constexpr (has_mapped)
+ cached = &it->getMapped();
+
+ if (inserted)
+ {
+ if constexpr (has_mapped)
+ {
+ new (&it->getMapped()) Mapped();
+ }
+ }
+
+ if constexpr (consecutive_keys_optimization)
+ {
+ cache.found = true;
+ cache.empty = false;
+
+ if constexpr (has_mapped)
+ {
+ cache.value.first = it->getKey();
+ cache.value.second = it->getMapped();
+ cached = &cache.value.second;
+ }
+ else
+ {
+ cache.value = it->getKey();
+ }
+ }
+
+ if constexpr (has_mapped)
+ return EmplaceResult(it->getMapped(), *cached, inserted);
+ else
+ return EmplaceResult(inserted);
+ }
+
+ template <typename Data, typename Key>
+ ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data)
+ {
+ if constexpr (Cache::consecutive_keys_optimization)
+ {
/// It's possible to support such combination, but code will became more complex.
/// Now there's not place where we need this options enabled together
static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options");
- if (cache.check(key))
- {
- if constexpr (has_mapped)
+ if (cache.check(key))
+ {
+ if constexpr (has_mapped)
return FindResult(&cache.value.second, cache.found, 0);
- else
+ else
return FindResult(cache.found, 0);
- }
- }
-
- auto it = data.find(key);
-
- if constexpr (consecutive_keys_optimization)
- {
- cache.found = it != nullptr;
- cache.empty = false;
-
- if constexpr (has_mapped)
- {
- cache.value.first = key;
- if (it)
- {
- cache.value.second = it->getMapped();
- }
- }
- else
- {
- cache.value = key;
- }
- }
-
+ }
+ }
+
+ auto it = data.find(key);
+
+ if constexpr (consecutive_keys_optimization)
+ {
+ cache.found = it != nullptr;
+ cache.empty = false;
+
+ if constexpr (has_mapped)
+ {
+ cache.value.first = key;
+ if (it)
+ {
+ cache.value.second = it->getMapped();
+ }
+ }
+ else
+ {
+ cache.value = key;
+ }
+ }
+
size_t offset = 0;
if constexpr (FindResult::has_offset)
{
offset = it ? data.offsetInternal(it) : 0;
}
- if constexpr (has_mapped)
+ if constexpr (has_mapped)
return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset);
- else
+ else
return FindResult(it != nullptr, offset);
- }
-};
-
-
-template <typename T>
-struct MappedCache : public PaddedPODArray<T> {};
-
-template <>
-struct MappedCache<void> {};
-
-
-/// This class is designed to provide the functionality that is required for
-/// supporting nullable keys in HashMethodKeysFixed. If there are
-/// no nullable keys, this class is merely implemented as an empty shell.
-template <typename Key, bool has_nullable_keys>
-class BaseStateKeysFixed;
-
-/// Case where nullable keys are supported.
-template <typename Key>
-class BaseStateKeysFixed<Key, true>
-{
-protected:
- BaseStateKeysFixed(const ColumnRawPtrs & key_columns)
- {
- null_maps.reserve(key_columns.size());
- actual_columns.reserve(key_columns.size());
-
- for (const auto & col : key_columns)
- {
- if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(col))
- {
- actual_columns.push_back(&nullable_col->getNestedColumn());
- null_maps.push_back(&nullable_col->getNullMapColumn());
- }
- else
- {
- actual_columns.push_back(col);
- null_maps.push_back(nullptr);
- }
- }
- }
-
- /// Return the columns which actually contain the values of the keys.
- /// For a given key column, if it is nullable, we return its nested
- /// column. Otherwise we return the key column itself.
- inline const ColumnRawPtrs & getActualColumns() const
- {
- return actual_columns;
- }
-
- /// Create a bitmap that indicates whether, for a particular row,
- /// a key column bears a null value or not.
- KeysNullMap<Key> createBitmap(size_t row) const
- {
- KeysNullMap<Key> bitmap{};
-
- for (size_t k = 0; k < null_maps.size(); ++k)
- {
- if (null_maps[k] != nullptr)
- {
- const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
- if (null_map[row] == 1)
- {
- size_t bucket = k / 8;
- size_t offset = k % 8;
- bitmap[bucket] |= UInt8(1) << offset;
- }
- }
- }
-
- return bitmap;
- }
-
-private:
- ColumnRawPtrs actual_columns;
- ColumnRawPtrs null_maps;
-};
-
-/// Case where nullable keys are not supported.
-template <typename Key>
-class BaseStateKeysFixed<Key, false>
-{
-protected:
- BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {}
-
- const ColumnRawPtrs & getActualColumns() const { return actual_columns; }
-
- KeysNullMap<Key> createBitmap(size_t) const
- {
- throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
- " is forbidden", ErrorCodes::LOGICAL_ERROR};
- }
-
-private:
- ColumnRawPtrs actual_columns;
-};
-
-}
-
-}
-
-}
+ }
+};
+
+
+template <typename T>
+struct MappedCache : public PaddedPODArray<T> {};
+
+template <>
+struct MappedCache<void> {};
+
+
+/// This class is designed to provide the functionality that is required for
+/// supporting nullable keys in HashMethodKeysFixed. If there are
+/// no nullable keys, this class is merely implemented as an empty shell.
+template <typename Key, bool has_nullable_keys>
+class BaseStateKeysFixed;
+
+/// Case where nullable keys are supported.
+template <typename Key>
+class BaseStateKeysFixed<Key, true>
+{
+protected:
+ BaseStateKeysFixed(const ColumnRawPtrs & key_columns)
+ {
+ null_maps.reserve(key_columns.size());
+ actual_columns.reserve(key_columns.size());
+
+ for (const auto & col : key_columns)
+ {
+ if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(col))
+ {
+ actual_columns.push_back(&nullable_col->getNestedColumn());
+ null_maps.push_back(&nullable_col->getNullMapColumn());
+ }
+ else
+ {
+ actual_columns.push_back(col);
+ null_maps.push_back(nullptr);
+ }
+ }
+ }
+
+ /// Return the columns which actually contain the values of the keys.
+ /// For a given key column, if it is nullable, we return its nested
+ /// column. Otherwise we return the key column itself.
+ inline const ColumnRawPtrs & getActualColumns() const
+ {
+ return actual_columns;
+ }
+
+ /// Create a bitmap that indicates whether, for a particular row,
+ /// a key column bears a null value or not.
+ KeysNullMap<Key> createBitmap(size_t row) const
+ {
+ KeysNullMap<Key> bitmap{};
+
+ for (size_t k = 0; k < null_maps.size(); ++k)
+ {
+ if (null_maps[k] != nullptr)
+ {
+ const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
+ if (null_map[row] == 1)
+ {
+ size_t bucket = k / 8;
+ size_t offset = k % 8;
+ bitmap[bucket] |= UInt8(1) << offset;
+ }
+ }
+ }
+
+ return bitmap;
+ }
+
+private:
+ ColumnRawPtrs actual_columns;
+ ColumnRawPtrs null_maps;
+};
+
+/// Case where nullable keys are not supported.
+template <typename Key>
+class BaseStateKeysFixed<Key, false>
+{
+protected:
+ BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {}
+
+ const ColumnRawPtrs & getActualColumns() const { return actual_columns; }
+
+ KeysNullMap<Key> createBitmap(size_t) const
+ {
+ throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
+ " is forbidden", ErrorCodes::LOGICAL_ERROR};
+ }
+
+private:
+ ColumnRawPtrs actual_columns;
+};
+
+}
+
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h
index bc9d55ff8f..dc1f748764 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ConcurrentBoundedQueue.h
@@ -1,15 +1,15 @@
-#pragma once
-
-#include <queue>
-#include <type_traits>
+#pragma once
+
+#include <queue>
+#include <type_traits>
#include <atomic>
-
-#include <Poco/Mutex.h>
-#include <Poco/Semaphore.h>
-
+
+#include <Poco/Mutex.h>
+#include <Poco/Semaphore.h>
+
#include <common/MoveOrCopyIfThrow.h>
#include <Common/Exception.h>
-
+
namespace DB
{
namespace ErrorCodes
@@ -18,20 +18,20 @@ namespace ErrorCodes
}
}
-/** A very simple thread-safe queue of limited size.
- * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty.
- * If you try to push an element into an overflowed queue, the thread is blocked until space appears in the queue.
- */
-template <typename T>
-class ConcurrentBoundedQueue
-{
-private:
- std::queue<T> queue;
+/** A very simple thread-safe queue of limited size.
+ * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty.
+ * If you try to push an element into an overflowed queue, the thread is blocked until space appears in the queue.
+ */
+template <typename T>
+class ConcurrentBoundedQueue
+{
+private:
+ std::queue<T> queue;
mutable Poco::FastMutex mutex;
- Poco::Semaphore fill_count;
- Poco::Semaphore empty_count;
+ Poco::Semaphore fill_count;
+ Poco::Semaphore empty_count;
std::atomic_bool closed = false;
-
+
template <typename... Args>
bool tryEmplaceImpl(Args &&... args)
{
@@ -63,71 +63,71 @@ private:
empty_count.set();
}
-public:
+public:
explicit ConcurrentBoundedQueue(size_t max_fill)
: fill_count(0, max_fill)
, empty_count(max_fill, max_fill)
{}
-
- void push(const T & x)
- {
- empty_count.wait();
+
+ void push(const T & x)
+ {
+ empty_count.wait();
if (!tryEmplaceImpl(x))
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "tryPush/tryEmplace must be used with close()");
- }
-
- template <typename... Args>
- void emplace(Args &&... args)
- {
- empty_count.wait();
+ }
+
+ template <typename... Args>
+ void emplace(Args &&... args)
+ {
+ empty_count.wait();
if (!tryEmplaceImpl(std::forward<Args>(args)...))
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "tryPush/tryEmplace must be used with close()");
- }
-
- void pop(T & x)
- {
- fill_count.wait();
+ }
+
+ void pop(T & x)
+ {
+ fill_count.wait();
popImpl(x);
- }
-
- bool tryPush(const T & x, UInt64 milliseconds = 0)
- {
+ }
+
+ bool tryPush(const T & x, UInt64 milliseconds = 0)
+ {
if (!empty_count.tryWait(milliseconds))
return false;
return tryEmplaceImpl(x);
- }
-
- template <typename... Args>
- bool tryEmplace(UInt64 milliseconds, Args &&... args)
- {
+ }
+
+ template <typename... Args>
+ bool tryEmplace(UInt64 milliseconds, Args &&... args)
+ {
if (!empty_count.tryWait(milliseconds))
return false;
return tryEmplaceImpl(std::forward<Args>(args)...);
- }
-
- bool tryPop(T & x, UInt64 milliseconds = 0)
- {
+ }
+
+ bool tryPop(T & x, UInt64 milliseconds = 0)
+ {
if (!fill_count.tryWait(milliseconds))
return false;
popImpl(x);
return true;
- }
-
+ }
+
size_t size() const
- {
- Poco::ScopedLock<Poco::FastMutex> lock(mutex);
- return queue.size();
- }
-
+ {
+ Poco::ScopedLock<Poco::FastMutex> lock(mutex);
+ return queue.size();
+ }
+
size_t empty() const
- {
- Poco::ScopedLock<Poco::FastMutex> lock(mutex);
- return queue.empty();
- }
-
+ {
+ Poco::ScopedLock<Poco::FastMutex> lock(mutex);
+ return queue.empty();
+ }
+
/// Forbids to push new elements to queue.
/// Returns false if queue was not closed before call, returns true if queue was already closed.
bool close()
@@ -141,15 +141,15 @@ public:
return closed.load();
}
- void clear()
- {
- while (fill_count.tryWait(0))
- {
- {
- Poco::ScopedLock<Poco::FastMutex> lock(mutex);
- queue.pop();
- }
- empty_count.set();
- }
- }
-};
+ void clear()
+ {
+ while (fill_count.tryWait(0))
+ {
+ {
+ Poco::ScopedLock<Poco::FastMutex> lock(mutex);
+ queue.pop();
+ }
+ empty_count.set();
+ }
+ }
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp
index eb677debb0..2166dcc3e8 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.cpp
@@ -1,37 +1,37 @@
-#include <Common/Config/AbstractConfigurationComparison.h>
+#include <Common/Config/AbstractConfigurationComparison.h>
#include <Common/getMultipleKeysFromConfig.h>
-
-#include <unordered_set>
-#include <common/StringRef.h>
-#include <Poco/Util/AbstractConfiguration.h>
-
-
-namespace DB
-{
-namespace
-{
- String concatKeyAndSubKey(const String & key, const String & subkey)
- {
- // Copied from Poco::Util::ConfigurationView::translateKey():
- String result = key;
- if (!result.empty() && !subkey.empty() && subkey[0] != '[')
- result += '.';
- result += subkey;
- return result;
- };
-}
-
-
-bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right)
-{
- return isSameConfiguration(left, String(), right, String());
-}
-
+
+#include <unordered_set>
+#include <common/StringRef.h>
+#include <Poco/Util/AbstractConfiguration.h>
+
+
+namespace DB
+{
+namespace
+{
+ String concatKeyAndSubKey(const String & key, const String & subkey)
+ {
+ // Copied from Poco::Util::ConfigurationView::translateKey():
+ String result = key;
+ if (!result.empty() && !subkey.empty() && subkey[0] != '[')
+ result += '.';
+ result += subkey;
+ return result;
+ };
+}
+
+
+bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right)
+{
+ return isSameConfiguration(left, String(), right, String());
+}
+
bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right, const String & key)
{
return isSameConfiguration(left, key, right, key);
}
-
+
bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right, const String & root, const String & name)
{
if (&left == &right)
@@ -49,44 +49,44 @@ bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration
return true;
}
-bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key,
- const Poco::Util::AbstractConfiguration & right, const String & right_key)
-{
- if (&left == &right && left_key == right_key)
- return true;
-
- bool has_property = left.hasProperty(left_key);
- if (has_property != right.hasProperty(right_key))
- return false;
- if (has_property)
- {
- /// The left and right configurations contains values so we can compare them.
- if (left.getRawString(left_key) != right.getRawString(right_key))
- return false;
- }
-
- /// Get the subkeys of the left and right configurations.
- Poco::Util::AbstractConfiguration::Keys subkeys;
- left.keys(left_key, subkeys);
-
- {
- /// Check that the right configuration has the same set of subkeys as the left configuration.
- Poco::Util::AbstractConfiguration::Keys right_subkeys;
- right.keys(right_key, right_subkeys);
- std::unordered_set<StringRef> left_subkeys{subkeys.begin(), subkeys.end()};
- if ((left_subkeys.size() != right_subkeys.size()) || (left_subkeys.size() != subkeys.size()))
- return false;
- for (const auto & right_subkey : right_subkeys)
- if (!left_subkeys.count(right_subkey))
- return false;
- }
-
- /// Go through all the subkeys and compare corresponding parts of the configurations.
- for (const auto & subkey : subkeys)
- if (!isSameConfiguration(left, concatKeyAndSubKey(left_key, subkey), right, concatKeyAndSubKey(right_key, subkey)))
- return false;
-
- return true;
-}
-
-}
+bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key,
+ const Poco::Util::AbstractConfiguration & right, const String & right_key)
+{
+ if (&left == &right && left_key == right_key)
+ return true;
+
+ bool has_property = left.hasProperty(left_key);
+ if (has_property != right.hasProperty(right_key))
+ return false;
+ if (has_property)
+ {
+ /// The left and right configurations contains values so we can compare them.
+ if (left.getRawString(left_key) != right.getRawString(right_key))
+ return false;
+ }
+
+ /// Get the subkeys of the left and right configurations.
+ Poco::Util::AbstractConfiguration::Keys subkeys;
+ left.keys(left_key, subkeys);
+
+ {
+ /// Check that the right configuration has the same set of subkeys as the left configuration.
+ Poco::Util::AbstractConfiguration::Keys right_subkeys;
+ right.keys(right_key, right_subkeys);
+ std::unordered_set<StringRef> left_subkeys{subkeys.begin(), subkeys.end()};
+ if ((left_subkeys.size() != right_subkeys.size()) || (left_subkeys.size() != subkeys.size()))
+ return false;
+ for (const auto & right_subkey : right_subkeys)
+ if (!left_subkeys.count(right_subkey))
+ return false;
+ }
+
+ /// Go through all the subkeys and compare corresponding parts of the configurations.
+ for (const auto & subkey : subkeys)
+ if (!isSameConfiguration(left, concatKeyAndSubKey(left_key, subkey), right, concatKeyAndSubKey(right_key, subkey)))
+ return false;
+
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h
index 6e1d8a890b..9ef3118235 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Config/AbstractConfigurationComparison.h
@@ -1,18 +1,18 @@
-#pragma once
-
+#pragma once
+
#include <common/types.h>
-
-namespace Poco::Util
-{
- class AbstractConfiguration;
-}
-
-namespace DB
-{
- /// Returns true if two configurations contains the same keys and values.
- bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left,
- const Poco::Util::AbstractConfiguration & right);
-
+
+namespace Poco::Util
+{
+ class AbstractConfiguration;
+}
+
+namespace DB
+{
+ /// Returns true if two configurations contains the same keys and values.
+ bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left,
+ const Poco::Util::AbstractConfiguration & right);
+
/// Config may have multiple keys with one name. For example:
/// <root>
/// <some_key>...</some_key>
@@ -29,17 +29,17 @@ namespace DB
const Poco::Util::AbstractConfiguration & right,
const String & key);
- /// Returns true if specified subviews of the two configurations contains the same keys and values.
- bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key,
- const Poco::Util::AbstractConfiguration & right, const String & right_key);
-
- inline bool operator==(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right)
- {
- return isSameConfiguration(left, right);
- }
-
- inline bool operator!=(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right)
- {
- return !isSameConfiguration(left, right);
- }
-}
+ /// Returns true if specified subviews of the two configurations contains the same keys and values.
+ bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key,
+ const Poco::Util::AbstractConfiguration & right, const String & right_key);
+
+ inline bool operator==(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right)
+ {
+ return isSameConfiguration(left, right);
+ }
+
+ inline bool operator!=(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right)
+ {
+ return !isSameConfiguration(left, right);
+ }
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp
index 4fe0f0bb8c..b279a6ca12 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.cpp
@@ -1,92 +1,92 @@
-#include "DNSResolver.h"
-#include <common/SimpleCache.h>
-#include <Common/Exception.h>
-#include <Common/ProfileEvents.h>
-#include <Core/Names.h>
+#include "DNSResolver.h"
+#include <common/SimpleCache.h>
+#include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
+#include <Core/Names.h>
#include <common/types.h>
-#include <Poco/Net/IPAddress.h>
-#include <Poco/Net/DNS.h>
-#include <Poco/Net/NetException.h>
-#include <Poco/NumberParser.h>
-#include <arpa/inet.h>
-#include <atomic>
-#include <optional>
-#include <string_view>
-
-namespace ProfileEvents
-{
- extern Event DNSError;
-}
-
-namespace std
-{
-template<> struct hash<Poco::Net::IPAddress>
-{
- size_t operator()(const Poco::Net::IPAddress & address) const noexcept
- {
- std::string_view addr(static_cast<const char *>(address.addr()), address.length());
- std::hash<std::string_view> hash_impl;
- return hash_impl(addr);
- }
-};
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int BAD_ARGUMENTS;
- extern const int DNS_ERROR;
-}
-
-
-/// Slightly altered implementation from https://github.com/pocoproject/poco/blob/poco-1.6.1/Net/src/SocketAddress.cpp#L86
-static void splitHostAndPort(const std::string & host_and_port, std::string & out_host, UInt16 & out_port)
-{
- String port_str;
- out_host.clear();
-
- auto it = host_and_port.begin();
- auto end = host_and_port.end();
-
- if (*it == '[') /// Try parse case '[<IPv6 or something else>]:<port>'
- {
- ++it;
- while (it != end && *it != ']')
- out_host += *it++;
- if (it == end)
- throw Exception("Malformed IPv6 address", ErrorCodes::BAD_ARGUMENTS);
- ++it;
- }
- else /// Case '<IPv4 or domain name or something else>:<port>'
- {
- while (it != end && *it != ':')
- out_host += *it++;
- }
-
- if (it != end && *it == ':')
- {
- ++it;
- while (it != end)
- port_str += *it++;
- }
- else
- throw Exception("Missing port number", ErrorCodes::BAD_ARGUMENTS);
-
- unsigned port;
- if (Poco::NumberParser::tryParseUnsigned(port_str, port) && port <= 0xFFFF)
- {
- out_port = static_cast<UInt16>(port);
- }
- else
+#include <Poco/Net/IPAddress.h>
+#include <Poco/Net/DNS.h>
+#include <Poco/Net/NetException.h>
+#include <Poco/NumberParser.h>
+#include <arpa/inet.h>
+#include <atomic>
+#include <optional>
+#include <string_view>
+
+namespace ProfileEvents
+{
+ extern Event DNSError;
+}
+
+namespace std
+{
+template<> struct hash<Poco::Net::IPAddress>
+{
+ size_t operator()(const Poco::Net::IPAddress & address) const noexcept
+ {
+ std::string_view addr(static_cast<const char *>(address.addr()), address.length());
+ std::hash<std::string_view> hash_impl;
+ return hash_impl(addr);
+ }
+};
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+ extern const int DNS_ERROR;
+}
+
+
+/// Slightly altered implementation from https://github.com/pocoproject/poco/blob/poco-1.6.1/Net/src/SocketAddress.cpp#L86
+static void splitHostAndPort(const std::string & host_and_port, std::string & out_host, UInt16 & out_port)
+{
+ String port_str;
+ out_host.clear();
+
+ auto it = host_and_port.begin();
+ auto end = host_and_port.end();
+
+ if (*it == '[') /// Try parse case '[<IPv6 or something else>]:<port>'
+ {
+ ++it;
+ while (it != end && *it != ']')
+ out_host += *it++;
+ if (it == end)
+ throw Exception("Malformed IPv6 address", ErrorCodes::BAD_ARGUMENTS);
+ ++it;
+ }
+ else /// Case '<IPv4 or domain name or something else>:<port>'
+ {
+ while (it != end && *it != ':')
+ out_host += *it++;
+ }
+
+ if (it != end && *it == ':')
+ {
+ ++it;
+ while (it != end)
+ port_str += *it++;
+ }
+ else
+ throw Exception("Missing port number", ErrorCodes::BAD_ARGUMENTS);
+
+ unsigned port;
+ if (Poco::NumberParser::tryParseUnsigned(port_str, port) && port <= 0xFFFF)
+ {
+ out_port = static_cast<UInt16>(port);
+ }
+ else
throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS);
-}
-
-static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
-{
- Poco::Net::IPAddress ip;
-
+}
+
+static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
+{
+ Poco::Net::IPAddress ip;
+
/// NOTE:
/// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
/// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (w/o brackets)
@@ -101,24 +101,24 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
if (Poco::Net::IPAddress::tryParse(host, ip))
return DNSResolver::IPAddresses(1, ip);
}
-
- /// Family: AF_UNSPEC
- /// AI_ALL is required for checking if client is allowed to connect from an address
- auto flags = Poco::Net::DNS::DNS_HINT_AI_V4MAPPED | Poco::Net::DNS::DNS_HINT_AI_ALL;
- /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured.
- /// It should not affect client address checking, since client cannot connect from IPv6 address
- /// if server has no IPv6 addresses.
- flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG;
+
+ /// Family: AF_UNSPEC
+ /// AI_ALL is required for checking if client is allowed to connect from an address
+ auto flags = Poco::Net::DNS::DNS_HINT_AI_V4MAPPED | Poco::Net::DNS::DNS_HINT_AI_ALL;
+ /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured.
+ /// It should not affect client address checking, since client cannot connect from IPv6 address
+ /// if server has no IPv6 addresses.
+ flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG;
DNSResolver::IPAddresses addresses;
try
{
-#if defined(ARCADIA_BUILD)
+#if defined(ARCADIA_BUILD)
addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses();
-#else
+#else
addresses = Poco::Net::DNS::hostByName(host, flags).addresses();
-#endif
+#endif
}
catch (const Poco::Net::DNSException & e)
{
@@ -126,225 +126,225 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
addresses.clear();
}
- if (addresses.empty())
- throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR);
-
- return addresses;
-}
-
-static String reverseResolveImpl(const Poco::Net::IPAddress & address)
-{
- Poco::Net::SocketAddress sock_addr(address, 0);
-
- /// Resolve by hand, because Poco::Net::DNS::hostByAddress(...) does getaddrinfo(...) after getnameinfo(...)
- char host[1024];
- int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD);
- if (err)
- throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
- return host;
-}
-
-struct DNSResolver::Impl
-{
- SimpleCache<decltype(resolveIPAddressImpl), &resolveIPAddressImpl> cache_host;
- SimpleCache<decltype(reverseResolveImpl), &reverseResolveImpl> cache_address;
-
- std::mutex drop_mutex;
- std::mutex update_mutex;
-
- /// Cached server host name
- std::optional<String> host_name;
-
- /// Store hosts, which was asked to resolve from last update of DNS cache.
- NameSet new_hosts;
- std::unordered_set<Poco::Net::IPAddress> new_addresses;
-
- /// Store all hosts, which was whenever asked to resolve
- NameSet known_hosts;
- std::unordered_set<Poco::Net::IPAddress> known_addresses;
-
- /// If disabled, will not make cache lookups, will resolve addresses manually on each call
- std::atomic<bool> disable_cache{false};
-};
-
-
-DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()), log(&Poco::Logger::get("DNSResolver")) {}
-
-Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host)
-{
- return resolveHostAll(host).front();
-}
-
-DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host)
-{
- if (impl->disable_cache)
- return resolveIPAddressImpl(host);
-
- addToNewHosts(host);
- return impl->cache_host(host);
-}
-
-Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port)
-{
- if (impl->disable_cache)
- return Poco::Net::SocketAddress(host_and_port);
-
- String host;
- UInt16 port;
- splitHostAndPort(host_and_port, host, port);
-
- addToNewHosts(host);
- return Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
-}
-
-Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port)
-{
- if (impl->disable_cache)
- return Poco::Net::SocketAddress(host, port);
-
- addToNewHosts(host);
- return Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
-}
-
-String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address)
-{
- if (impl->disable_cache)
- return reverseResolveImpl(address);
-
- addToNewAddresses(address);
- return impl->cache_address(address);
-}
-
-void DNSResolver::dropCache()
-{
- impl->cache_host.drop();
- impl->cache_address.drop();
-
- std::scoped_lock lock(impl->update_mutex, impl->drop_mutex);
-
- impl->known_hosts.clear();
- impl->known_addresses.clear();
- impl->new_hosts.clear();
- impl->new_addresses.clear();
- impl->host_name.reset();
-}
-
-void DNSResolver::setDisableCacheFlag(bool is_disabled)
-{
- impl->disable_cache = is_disabled;
-}
-
-String DNSResolver::getHostName()
-{
- if (impl->disable_cache)
- return Poco::Net::DNS::hostName();
-
- std::lock_guard lock(impl->drop_mutex);
-
- if (!impl->host_name.has_value())
- impl->host_name.emplace(Poco::Net::DNS::hostName());
-
- return *impl->host_name;
-}
-
-static const String & cacheElemToString(const String & str) { return str; }
-static String cacheElemToString(const Poco::Net::IPAddress & addr) { return addr.toString(); }
-
-template<typename UpdateF, typename ElemsT>
-bool DNSResolver::updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg)
-{
- bool updated = false;
- String lost_elems;
- for (const auto & elem : elems)
- {
- try
- {
- updated |= (this->*update_func)(elem);
- }
- catch (const Poco::Net::NetException &)
- {
- ProfileEvents::increment(ProfileEvents::DNSError);
-
- if (!lost_elems.empty())
- lost_elems += ", ";
- lost_elems += cacheElemToString(elem);
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
- }
-
- if (!lost_elems.empty())
- LOG_INFO(log, log_msg, lost_elems);
-
- return updated;
-}
-
-bool DNSResolver::updateCache()
-{
- LOG_DEBUG(log, "Updating DNS cache");
-
- {
+ if (addresses.empty())
+ throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR);
+
+ return addresses;
+}
+
+static String reverseResolveImpl(const Poco::Net::IPAddress & address)
+{
+ Poco::Net::SocketAddress sock_addr(address, 0);
+
+ /// Resolve by hand, because Poco::Net::DNS::hostByAddress(...) does getaddrinfo(...) after getnameinfo(...)
+ char host[1024];
+ int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD);
+ if (err)
+ throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
+ return host;
+}
+
+struct DNSResolver::Impl
+{
+ SimpleCache<decltype(resolveIPAddressImpl), &resolveIPAddressImpl> cache_host;
+ SimpleCache<decltype(reverseResolveImpl), &reverseResolveImpl> cache_address;
+
+ std::mutex drop_mutex;
+ std::mutex update_mutex;
+
+ /// Cached server host name
+ std::optional<String> host_name;
+
+ /// Store hosts, which was asked to resolve from last update of DNS cache.
+ NameSet new_hosts;
+ std::unordered_set<Poco::Net::IPAddress> new_addresses;
+
+ /// Store all hosts, which was whenever asked to resolve
+ NameSet known_hosts;
+ std::unordered_set<Poco::Net::IPAddress> known_addresses;
+
+ /// If disabled, will not make cache lookups, will resolve addresses manually on each call
+ std::atomic<bool> disable_cache{false};
+};
+
+
+DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()), log(&Poco::Logger::get("DNSResolver")) {}
+
+Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host)
+{
+ return resolveHostAll(host).front();
+}
+
+DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host)
+{
+ if (impl->disable_cache)
+ return resolveIPAddressImpl(host);
+
+ addToNewHosts(host);
+ return impl->cache_host(host);
+}
+
+Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port)
+{
+ if (impl->disable_cache)
+ return Poco::Net::SocketAddress(host_and_port);
+
+ String host;
+ UInt16 port;
+ splitHostAndPort(host_and_port, host, port);
+
+ addToNewHosts(host);
+ return Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
+}
+
+Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port)
+{
+ if (impl->disable_cache)
+ return Poco::Net::SocketAddress(host, port);
+
+ addToNewHosts(host);
+ return Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
+}
+
+String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address)
+{
+ if (impl->disable_cache)
+ return reverseResolveImpl(address);
+
+ addToNewAddresses(address);
+ return impl->cache_address(address);
+}
+
+void DNSResolver::dropCache()
+{
+ impl->cache_host.drop();
+ impl->cache_address.drop();
+
+ std::scoped_lock lock(impl->update_mutex, impl->drop_mutex);
+
+ impl->known_hosts.clear();
+ impl->known_addresses.clear();
+ impl->new_hosts.clear();
+ impl->new_addresses.clear();
+ impl->host_name.reset();
+}
+
+void DNSResolver::setDisableCacheFlag(bool is_disabled)
+{
+ impl->disable_cache = is_disabled;
+}
+
+String DNSResolver::getHostName()
+{
+ if (impl->disable_cache)
+ return Poco::Net::DNS::hostName();
+
+ std::lock_guard lock(impl->drop_mutex);
+
+ if (!impl->host_name.has_value())
+ impl->host_name.emplace(Poco::Net::DNS::hostName());
+
+ return *impl->host_name;
+}
+
+static const String & cacheElemToString(const String & str) { return str; }
+static String cacheElemToString(const Poco::Net::IPAddress & addr) { return addr.toString(); }
+
+template<typename UpdateF, typename ElemsT>
+bool DNSResolver::updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg)
+{
+ bool updated = false;
+ String lost_elems;
+ for (const auto & elem : elems)
+ {
+ try
+ {
+ updated |= (this->*update_func)(elem);
+ }
+ catch (const Poco::Net::NetException &)
+ {
+ ProfileEvents::increment(ProfileEvents::DNSError);
+
+ if (!lost_elems.empty())
+ lost_elems += ", ";
+ lost_elems += cacheElemToString(elem);
+ }
+ catch (...)
+ {
+ tryLogCurrentException(__PRETTY_FUNCTION__);
+ }
+ }
+
+ if (!lost_elems.empty())
+ LOG_INFO(log, log_msg, lost_elems);
+
+ return updated;
+}
+
+bool DNSResolver::updateCache()
+{
+ LOG_DEBUG(log, "Updating DNS cache");
+
+ {
String updated_host_name = Poco::Net::DNS::hostName();
- std::lock_guard lock(impl->drop_mutex);
-
- for (const auto & host : impl->new_hosts)
- impl->known_hosts.insert(host);
- impl->new_hosts.clear();
-
- for (const auto & address : impl->new_addresses)
- impl->known_addresses.insert(address);
- impl->new_addresses.clear();
-
+ std::lock_guard lock(impl->drop_mutex);
+
+ for (const auto & host : impl->new_hosts)
+ impl->known_hosts.insert(host);
+ impl->new_hosts.clear();
+
+ for (const auto & address : impl->new_addresses)
+ impl->known_addresses.insert(address);
+ impl->new_addresses.clear();
+
impl->host_name.emplace(updated_host_name);
- }
-
+ }
+
/// FIXME Updating may take a long time because we cannot manage timeouts of getaddrinfo(...) and getnameinfo(...).
- /// DROP DNS CACHE will wait on update_mutex (possibly while holding drop_mutex)
- std::lock_guard lock(impl->update_mutex);
-
- bool hosts_updated = updateCacheImpl(&DNSResolver::updateHost, impl->known_hosts, "Cached hosts not found: {}");
- updateCacheImpl(&DNSResolver::updateAddress, impl->known_addresses, "Cached addresses not found: {}");
-
- LOG_DEBUG(log, "Updated DNS cache");
- return hosts_updated;
-}
-
-bool DNSResolver::updateHost(const String & host)
-{
- /// Usage of updateHost implies that host is already in cache and there is no extra computations
- auto old_value = impl->cache_host(host);
- impl->cache_host.update(host);
- return old_value != impl->cache_host(host);
-}
-
-bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address)
-{
- auto old_value = impl->cache_address(address);
- impl->cache_address.update(address);
- return old_value == impl->cache_address(address);
-}
-
-void DNSResolver::addToNewHosts(const String & host)
-{
- std::lock_guard lock(impl->drop_mutex);
- impl->new_hosts.insert(host);
-}
-
-void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address)
-{
- std::lock_guard lock(impl->drop_mutex);
- impl->new_addresses.insert(address);
-}
-
-DNSResolver::~DNSResolver() = default;
-
-DNSResolver & DNSResolver::instance()
-{
- static DNSResolver ret;
- return ret;
-}
-
-}
+ /// DROP DNS CACHE will wait on update_mutex (possibly while holding drop_mutex)
+ std::lock_guard lock(impl->update_mutex);
+
+ bool hosts_updated = updateCacheImpl(&DNSResolver::updateHost, impl->known_hosts, "Cached hosts not found: {}");
+ updateCacheImpl(&DNSResolver::updateAddress, impl->known_addresses, "Cached addresses not found: {}");
+
+ LOG_DEBUG(log, "Updated DNS cache");
+ return hosts_updated;
+}
+
+bool DNSResolver::updateHost(const String & host)
+{
+ /// Usage of updateHost implies that host is already in cache and there is no extra computations
+ auto old_value = impl->cache_host(host);
+ impl->cache_host.update(host);
+ return old_value != impl->cache_host(host);
+}
+
+bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address)
+{
+ auto old_value = impl->cache_address(address);
+ impl->cache_address.update(address);
+ return old_value == impl->cache_address(address);
+}
+
+void DNSResolver::addToNewHosts(const String & host)
+{
+ std::lock_guard lock(impl->drop_mutex);
+ impl->new_hosts.insert(host);
+}
+
+void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address)
+{
+ std::lock_guard lock(impl->drop_mutex);
+ impl->new_addresses.insert(address);
+}
+
+DNSResolver::~DNSResolver() = default;
+
+DNSResolver & DNSResolver::instance()
+{
+ static DNSResolver ret;
+ return ret;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h
index 57c28188f5..102a5d2786 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/DNSResolver.h
@@ -1,73 +1,73 @@
-#pragma once
-#include <Poco/Net/IPAddress.h>
-#include <Poco/Net/SocketAddress.h>
-#include <memory>
+#pragma once
+#include <Poco/Net/IPAddress.h>
+#include <Poco/Net/SocketAddress.h>
+#include <memory>
#include <common/types.h>
-#include <Core/Names.h>
-#include <boost/noncopyable.hpp>
-#include <common/logger_useful.h>
-
-
-namespace DB
-{
-
-/// A singleton implementing DNS names resolving with optional DNS cache
-/// The cache is being updated asynchronous in separate thread (see DNSCacheUpdater)
-/// or it could be updated manually via drop() method.
-class DNSResolver : private boost::noncopyable
-{
-public:
- typedef std::vector<Poco::Net::IPAddress> IPAddresses;
-
- static DNSResolver & instance();
-
- DNSResolver(const DNSResolver &) = delete;
-
- /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves its IP
- Poco::Net::IPAddress resolveHost(const std::string & host);
-
- /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs
- IPAddresses resolveHostAll(const std::string & host);
-
- /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port
- Poco::Net::SocketAddress resolveAddress(const std::string & host_and_port);
-
- Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port);
-
- /// Accepts host IP and resolves its host name
- String reverseResolve(const Poco::Net::IPAddress & address);
-
- /// Get this server host name
- String getHostName();
-
- /// Disables caching
- void setDisableCacheFlag(bool is_disabled = true);
-
- /// Drops all caches
- void dropCache();
-
- /// Updates all known hosts in cache.
- /// Returns true if IP of any host has been changed.
- bool updateCache();
-
- ~DNSResolver();
-
-private:
- template<typename UpdateF, typename ElemsT>
- bool updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg);
-
- DNSResolver();
-
- struct Impl;
- std::unique_ptr<Impl> impl;
- Poco::Logger * log;
-
- /// Updates cached value and returns true it has been changed.
- bool updateHost(const String & host);
- bool updateAddress(const Poco::Net::IPAddress & address);
-
- void addToNewHosts(const String & host);
- void addToNewAddresses(const Poco::Net::IPAddress & address);
-};
-
-}
+#include <Core/Names.h>
+#include <boost/noncopyable.hpp>
+#include <common/logger_useful.h>
+
+
+namespace DB
+{
+
+/// A singleton implementing DNS names resolving with optional DNS cache
+/// The cache is being updated asynchronous in separate thread (see DNSCacheUpdater)
+/// or it could be updated manually via drop() method.
+class DNSResolver : private boost::noncopyable
+{
+public:
+ typedef std::vector<Poco::Net::IPAddress> IPAddresses;
+
+ static DNSResolver & instance();
+
+ DNSResolver(const DNSResolver &) = delete;
+
+ /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves its IP
+ Poco::Net::IPAddress resolveHost(const std::string & host);
+
+ /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs
+ IPAddresses resolveHostAll(const std::string & host);
+
+ /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port
+ Poco::Net::SocketAddress resolveAddress(const std::string & host_and_port);
+
+ Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port);
+
+ /// Accepts host IP and resolves its host name
+ String reverseResolve(const Poco::Net::IPAddress & address);
+
+ /// Get this server host name
+ String getHostName();
+
+ /// Disables caching
+ void setDisableCacheFlag(bool is_disabled = true);
+
+ /// Drops all caches
+ void dropCache();
+
+ /// Updates all known hosts in cache.
+ /// Returns true if IP of any host has been changed.
+ bool updateCache();
+
+ ~DNSResolver();
+
+private:
+ template<typename UpdateF, typename ElemsT>
+ bool updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg);
+
+ DNSResolver();
+
+ struct Impl;
+ std::unique_ptr<Impl> impl;
+ Poco::Logger * log;
+
+ /// Updates cached value and returns true it has been changed.
+ bool updateHost(const String & host);
+ bool updateAddress(const Poco::Net::IPAddress & address);
+
+ void addToNewHosts(const String & host);
+ void addToNewAddresses(const Poco::Net::IPAddress & address);
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h
index 37bd81c8b4..e3ca42398e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h
@@ -1,53 +1,53 @@
-#pragma once
-
-#include <Common/HashTable/FixedHashTable.h>
-#include <Common/HashTable/HashMap.h>
-
-
-template <typename Key, typename TMapped, typename TState = HashTableNoState>
-struct FixedHashMapCell
-{
- using Mapped = TMapped;
- using State = TState;
-
- using value_type = PairNoInit<Key, Mapped>;
- using mapped_type = TMapped;
-
- bool full;
- Mapped mapped;
-
+#pragma once
+
+#include <Common/HashTable/FixedHashTable.h>
+#include <Common/HashTable/HashMap.h>
+
+
+template <typename Key, typename TMapped, typename TState = HashTableNoState>
+struct FixedHashMapCell
+{
+ using Mapped = TMapped;
+ using State = TState;
+
+ using value_type = PairNoInit<Key, Mapped>;
+ using mapped_type = TMapped;
+
+ bool full;
+ Mapped mapped;
+
FixedHashMapCell() {} //-V730
- FixedHashMapCell(const Key &, const State &) : full(true) {}
- FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
-
- const VoidKey getKey() const { return {}; }
- Mapped & getMapped() { return mapped; }
- const Mapped & getMapped() const { return mapped; }
-
- bool isZero(const State &) const { return !full; }
- void setZero() { full = false; }
-
- /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
- /// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
- struct CellExt
- {
+ FixedHashMapCell(const Key &, const State &) : full(true) {}
+ FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
+
+ const VoidKey getKey() const { return {}; }
+ Mapped & getMapped() { return mapped; }
+ const Mapped & getMapped() const { return mapped; }
+
+ bool isZero(const State &) const { return !full; }
+ void setZero() { full = false; }
+
+ /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
+ /// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
+ struct CellExt
+ {
CellExt() {} //-V730
- CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {}
- void update(Key && key_, const FixedHashMapCell * ptr_)
- {
- key = key_;
- ptr = const_cast<FixedHashMapCell *>(ptr_);
- }
- Key key;
- FixedHashMapCell * ptr;
-
- const Key & getKey() const { return key; }
- Mapped & getMapped() { return ptr->mapped; }
- const Mapped & getMapped() const { return ptr->mapped; }
- const value_type getValue() const { return {key, ptr->mapped}; }
- };
-};
-
+ CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {}
+ void update(Key && key_, const FixedHashMapCell * ptr_)
+ {
+ key = key_;
+ ptr = const_cast<FixedHashMapCell *>(ptr_);
+ }
+ Key key;
+ FixedHashMapCell * ptr;
+
+ const Key & getKey() const { return key; }
+ Mapped & getMapped() { return ptr->mapped; }
+ const Mapped & getMapped() const { return ptr->mapped; }
+ const value_type getValue() const { return {key, ptr->mapped}; }
+ };
+};
+
/// In case when we can encode empty cells with zero mapped values.
template <typename Key, typename TMapped, typename TState = HashTableNoState>
@@ -101,64 +101,64 @@ template <
typename Size = FixedHashTableStoredSize<Cell>,
typename Allocator = HashTableAllocator>
class FixedHashMap : public FixedHashTable<Key, Cell, Size, Allocator>
-{
-public:
+{
+public:
using Base = FixedHashTable<Key, Cell, Size, Allocator>;
- using Self = FixedHashMap;
- using LookupResult = typename Base::LookupResult;
-
- using Base::Base;
-
- template <typename Func>
- void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
- {
- for (auto it = this->begin(), end = this->end(); it != end; ++it)
- {
- typename Self::LookupResult res_it;
- bool inserted;
- that.emplace(it->getKey(), res_it, inserted, it.getHash());
- func(res_it->getMapped(), it->getMapped(), inserted);
- }
- }
-
- template <typename Func>
- void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
- {
- for (auto it = this->begin(), end = this->end(); it != end; ++it)
- {
- auto res_it = that.find(it->getKey(), it.getHash());
- if (!res_it)
- func(it->getMapped(), it->getMapped(), false);
- else
- func(res_it->getMapped(), it->getMapped(), true);
- }
- }
-
- template <typename Func>
- void forEachValue(Func && func)
- {
- for (auto & v : *this)
- func(v.getKey(), v.getMapped());
- }
-
- template <typename Func>
- void forEachMapped(Func && func)
- {
- for (auto & v : *this)
- func(v.getMapped());
- }
-
- Mapped & ALWAYS_INLINE operator[](const Key & x)
- {
- LookupResult it;
- bool inserted;
- this->emplace(x, it, inserted);
- if (inserted)
- new (&it->getMapped()) Mapped();
-
- return it->getMapped();
- }
-};
+ using Self = FixedHashMap;
+ using LookupResult = typename Base::LookupResult;
+
+ using Base::Base;
+
+ template <typename Func>
+ void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
+ {
+ for (auto it = this->begin(), end = this->end(); it != end; ++it)
+ {
+ typename Self::LookupResult res_it;
+ bool inserted;
+ that.emplace(it->getKey(), res_it, inserted, it.getHash());
+ func(res_it->getMapped(), it->getMapped(), inserted);
+ }
+ }
+
+ template <typename Func>
+ void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
+ {
+ for (auto it = this->begin(), end = this->end(); it != end; ++it)
+ {
+ auto res_it = that.find(it->getKey(), it.getHash());
+ if (!res_it)
+ func(it->getMapped(), it->getMapped(), false);
+ else
+ func(res_it->getMapped(), it->getMapped(), true);
+ }
+ }
+
+ template <typename Func>
+ void forEachValue(Func && func)
+ {
+ for (auto & v : *this)
+ func(v.getKey(), v.getMapped());
+ }
+
+ template <typename Func>
+ void forEachMapped(Func && func)
+ {
+ for (auto & v : *this)
+ func(v.getMapped());
+ }
+
+ Mapped & ALWAYS_INLINE operator[](const Key & x)
+ {
+ LookupResult it;
+ bool inserted;
+ this->emplace(x, it, inserted);
+ if (inserted)
+ new (&it->getMapped()) Mapped();
+
+ return it->getMapped();
+ }
+};
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h
index c1d2c8fe6e..2b9a5d61de 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h
@@ -1,52 +1,52 @@
-#pragma once
-
-#include <Common/HashTable/HashTable.h>
-
-namespace DB
-{
- namespace ErrorCodes
- {
- extern const int NO_AVAILABLE_DATA;
- }
-}
-
-template <typename Key, typename TState = HashTableNoState>
-struct FixedHashTableCell
-{
- using State = TState;
-
- using value_type = Key;
- using mapped_type = VoidMapped;
- bool full;
-
+#pragma once
+
+#include <Common/HashTable/HashTable.h>
+
+namespace DB
+{
+ namespace ErrorCodes
+ {
+ extern const int NO_AVAILABLE_DATA;
+ }
+}
+
+template <typename Key, typename TState = HashTableNoState>
+struct FixedHashTableCell
+{
+ using State = TState;
+
+ using value_type = Key;
+ using mapped_type = VoidMapped;
+ bool full;
+
FixedHashTableCell() {} //-V730
- FixedHashTableCell(const Key &, const State &) : full(true) {}
-
- const VoidKey getKey() const { return {}; }
- VoidMapped getMapped() const { return {}; }
-
- bool isZero(const State &) const { return !full; }
- void setZero() { full = false; }
- static constexpr bool need_zero_value_storage = false;
-
+ FixedHashTableCell(const Key &, const State &) : full(true) {}
+
+ const VoidKey getKey() const { return {}; }
+ VoidMapped getMapped() const { return {}; }
+
+ bool isZero(const State &) const { return !full; }
+ void setZero() { full = false; }
+ static constexpr bool need_zero_value_storage = false;
+
/// This Cell is only stored inside an iterator. It's used to accommodate the fact
- /// that the iterator based API always provide a reference to a continuous memory
- /// containing the Key. As a result, we have to instantiate a real Key field.
- /// All methods that return a mutable reference to the Key field are named with
- /// -Mutable suffix, indicating this is uncommon usage. As this is only for lookup
- /// tables, it's totally fine to discard the Key mutations.
- struct CellExt
- {
- Key key;
-
- const VoidKey getKey() const { return {}; }
- VoidMapped getMapped() const { return {}; }
- const value_type & getValue() const { return key; }
- void update(Key && key_, FixedHashTableCell *) { key = key_; }
- };
-};
-
-
+ /// that the iterator based API always provide a reference to a continuous memory
+ /// containing the Key. As a result, we have to instantiate a real Key field.
+ /// All methods that return a mutable reference to the Key field are named with
+ /// -Mutable suffix, indicating this is uncommon usage. As this is only for lookup
+ /// tables, it's totally fine to discard the Key mutations.
+ struct CellExt
+ {
+ Key key;
+
+ const VoidKey getKey() const { return {}; }
+ VoidMapped getMapped() const { return {}; }
+ const value_type & getValue() const { return key; }
+ void update(Key && key_, FixedHashTableCell *) { key = key_; }
+ };
+};
+
+
/// How to obtain the size of the table.
template <typename Cell>
@@ -88,394 +88,394 @@ struct FixedHashTableCalculatedSize
};
-/** Used as a lookup table for small keys such as UInt8, UInt16. It's different
- * than a HashTable in that keys are not stored in the Cell buf, but inferred
- * inside each iterator. There are a bunch of to make it faster than using
- * HashTable: a) It doesn't have a conflict chain; b) There is no key
+/** Used as a lookup table for small keys such as UInt8, UInt16. It's different
+ * than a HashTable in that keys are not stored in the Cell buf, but inferred
+ * inside each iterator. There are a bunch of to make it faster than using
+ * HashTable: a) It doesn't have a conflict chain; b) There is no key
* comparison; c) The number of cycles for checking cell empty is halved; d)
- * Memory layout is tighter, especially the Clearable variants.
- *
- * NOTE: For Set variants this should always be better. For Map variants
- * however, as we need to assemble the real cell inside each iterator, there
- * might be some cases we fall short.
- *
- * TODO: Deprecate the cell API so that end users don't rely on the structure
- * of cell. Instead iterator should be used for operations such as cell
- * transfer, key updates (f.g. StringRef) and serde. This will allow
- * TwoLevelHashSet(Map) to contain different type of sets(maps).
- */
+ * Memory layout is tighter, especially the Clearable variants.
+ *
+ * NOTE: For Set variants this should always be better. For Map variants
+ * however, as we need to assemble the real cell inside each iterator, there
+ * might be some cases we fall short.
+ *
+ * TODO: Deprecate the cell API so that end users don't rely on the structure
+ * of cell. Instead iterator should be used for operations such as cell
+ * transfer, key updates (f.g. StringRef) and serde. This will allow
+ * TwoLevelHashSet(Map) to contain different type of sets(maps).
+ */
template <typename Key, typename Cell, typename Size, typename Allocator>
class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size
-{
- static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
-
-protected:
- friend class const_iterator;
- friend class iterator;
- friend class Reader;
-
- using Self = FixedHashTable;
-
- Cell * buf; /// A piece of memory for all elements.
-
- void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); }
-
- void free()
- {
- if (buf)
- {
- Allocator::free(buf, getBufferSizeInBytes());
- buf = nullptr;
- }
- }
-
- void destroyElements()
- {
- if (!std::is_trivially_destructible_v<Cell>)
- for (iterator it = begin(), it_end = end(); it != it_end; ++it)
- it.ptr->~Cell();
- }
-
-
- template <typename Derived, bool is_const>
- class iterator_base
- {
- using Container = std::conditional_t<is_const, const Self, Self>;
- using cell_type = std::conditional_t<is_const, const Cell, Cell>;
-
- Container * container;
- cell_type * ptr;
-
- friend class FixedHashTable;
-
- public:
- iterator_base() {}
- iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_)
- {
- cell.update(ptr - container->buf, ptr);
- }
-
- bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; }
- bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; }
-
- Derived & operator++()
- {
- ++ptr;
-
- /// Skip empty cells in the main buffer.
- auto buf_end = container->buf + container->NUM_CELLS;
- while (ptr < buf_end && ptr->isZero(*container))
- ++ptr;
-
- return static_cast<Derived &>(*this);
- }
-
- auto & operator*()
- {
- if (cell.key != ptr - container->buf)
- cell.update(ptr - container->buf, ptr);
- return cell;
- }
- auto * operator-> ()
- {
- if (cell.key != ptr - container->buf)
- cell.update(ptr - container->buf, ptr);
- return &cell;
- }
-
- auto getPtr() const { return ptr; }
- size_t getHash() const { return ptr - container->buf; }
- size_t getCollisionChainLength() const { return 0; }
- typename cell_type::CellExt cell;
- };
-
-
-public:
- using key_type = Key;
- using mapped_type = typename Cell::mapped_type;
- using value_type = typename Cell::value_type;
- using cell_type = Cell;
-
- using LookupResult = Cell *;
- using ConstLookupResult = const Cell *;
-
-
- size_t hash(const Key & x) const { return x; }
-
- FixedHashTable() { alloc(); }
-
- FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); }
-
- ~FixedHashTable()
- {
- destroyElements();
- free();
- }
-
- FixedHashTable & operator=(FixedHashTable && rhs)
- {
- destroyElements();
- free();
-
- std::swap(buf, rhs.buf);
+{
+ static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
+
+protected:
+ friend class const_iterator;
+ friend class iterator;
+ friend class Reader;
+
+ using Self = FixedHashTable;
+
+ Cell * buf; /// A piece of memory for all elements.
+
+ void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); }
+
+ void free()
+ {
+ if (buf)
+ {
+ Allocator::free(buf, getBufferSizeInBytes());
+ buf = nullptr;
+ }
+ }
+
+ void destroyElements()
+ {
+ if (!std::is_trivially_destructible_v<Cell>)
+ for (iterator it = begin(), it_end = end(); it != it_end; ++it)
+ it.ptr->~Cell();
+ }
+
+
+ template <typename Derived, bool is_const>
+ class iterator_base
+ {
+ using Container = std::conditional_t<is_const, const Self, Self>;
+ using cell_type = std::conditional_t<is_const, const Cell, Cell>;
+
+ Container * container;
+ cell_type * ptr;
+
+ friend class FixedHashTable;
+
+ public:
+ iterator_base() {}
+ iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_)
+ {
+ cell.update(ptr - container->buf, ptr);
+ }
+
+ bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; }
+ bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; }
+
+ Derived & operator++()
+ {
+ ++ptr;
+
+ /// Skip empty cells in the main buffer.
+ auto buf_end = container->buf + container->NUM_CELLS;
+ while (ptr < buf_end && ptr->isZero(*container))
+ ++ptr;
+
+ return static_cast<Derived &>(*this);
+ }
+
+ auto & operator*()
+ {
+ if (cell.key != ptr - container->buf)
+ cell.update(ptr - container->buf, ptr);
+ return cell;
+ }
+ auto * operator-> ()
+ {
+ if (cell.key != ptr - container->buf)
+ cell.update(ptr - container->buf, ptr);
+ return &cell;
+ }
+
+ auto getPtr() const { return ptr; }
+ size_t getHash() const { return ptr - container->buf; }
+ size_t getCollisionChainLength() const { return 0; }
+ typename cell_type::CellExt cell;
+ };
+
+
+public:
+ using key_type = Key;
+ using mapped_type = typename Cell::mapped_type;
+ using value_type = typename Cell::value_type;
+ using cell_type = Cell;
+
+ using LookupResult = Cell *;
+ using ConstLookupResult = const Cell *;
+
+
+ size_t hash(const Key & x) const { return x; }
+
+ FixedHashTable() { alloc(); }
+
+ FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); }
+
+ ~FixedHashTable()
+ {
+ destroyElements();
+ free();
+ }
+
+ FixedHashTable & operator=(FixedHashTable && rhs)
+ {
+ destroyElements();
+ free();
+
+ std::swap(buf, rhs.buf);
this->setSize(rhs.size());
-
- Allocator::operator=(std::move(rhs));
- Cell::State::operator=(std::move(rhs));
-
- return *this;
- }
-
- class Reader final : private Cell::State
- {
- public:
- Reader(DB::ReadBuffer & in_) : in(in_) {}
-
- Reader(const Reader &) = delete;
- Reader & operator=(const Reader &) = delete;
-
- bool next()
- {
- if (!is_initialized)
- {
- Cell::State::read(in);
- DB::readVarUInt(size, in);
- is_initialized = true;
- }
-
- if (read_count == size)
- {
- is_eof = true;
- return false;
- }
-
- cell.read(in);
- ++read_count;
-
- return true;
- }
-
- inline const value_type & get() const
- {
- if (!is_initialized || is_eof)
- throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
-
- return cell.getValue();
- }
-
- private:
- DB::ReadBuffer & in;
- Cell cell;
- size_t read_count = 0;
+
+ Allocator::operator=(std::move(rhs));
+ Cell::State::operator=(std::move(rhs));
+
+ return *this;
+ }
+
+ class Reader final : private Cell::State
+ {
+ public:
+ Reader(DB::ReadBuffer & in_) : in(in_) {}
+
+ Reader(const Reader &) = delete;
+ Reader & operator=(const Reader &) = delete;
+
+ bool next()
+ {
+ if (!is_initialized)
+ {
+ Cell::State::read(in);
+ DB::readVarUInt(size, in);
+ is_initialized = true;
+ }
+
+ if (read_count == size)
+ {
+ is_eof = true;
+ return false;
+ }
+
+ cell.read(in);
+ ++read_count;
+
+ return true;
+ }
+
+ inline const value_type & get() const
+ {
+ if (!is_initialized || is_eof)
+ throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
+
+ return cell.getValue();
+ }
+
+ private:
+ DB::ReadBuffer & in;
+ Cell cell;
+ size_t read_count = 0;
size_t size = 0;
- bool is_eof = false;
- bool is_initialized = false;
- };
-
-
- class iterator : public iterator_base<iterator, false>
- {
- public:
- using iterator_base<iterator, false>::iterator_base;
- };
-
- class const_iterator : public iterator_base<const_iterator, true>
- {
- public:
- using iterator_base<const_iterator, true>::iterator_base;
- };
-
-
- const_iterator begin() const
- {
- if (!buf)
- return end();
-
- const Cell * ptr = buf;
- auto buf_end = buf + NUM_CELLS;
- while (ptr < buf_end && ptr->isZero(*this))
- ++ptr;
-
- return const_iterator(this, ptr);
- }
-
- const_iterator cbegin() const { return begin(); }
-
- iterator begin()
- {
- if (!buf)
- return end();
-
- Cell * ptr = buf;
- auto buf_end = buf + NUM_CELLS;
- while (ptr < buf_end && ptr->isZero(*this))
- ++ptr;
-
- return iterator(this, ptr);
- }
-
- const_iterator end() const
- {
- /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
- return const_iterator(this, buf ? buf + NUM_CELLS : buf);
- }
-
- const_iterator cend() const
- {
- return end();
- }
-
- iterator end()
- {
- return iterator(this, buf ? buf + NUM_CELLS : buf);
- }
-
-
-public:
- /// The last parameter is unused but exists for compatibility with HashTable interface.
- void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
- {
- it = &buf[x];
-
- if (!buf[x].isZero(*this))
- {
- inserted = false;
- return;
- }
-
- new (&buf[x]) Cell(x, *this);
- inserted = true;
+ bool is_eof = false;
+ bool is_initialized = false;
+ };
+
+
+ class iterator : public iterator_base<iterator, false>
+ {
+ public:
+ using iterator_base<iterator, false>::iterator_base;
+ };
+
+ class const_iterator : public iterator_base<const_iterator, true>
+ {
+ public:
+ using iterator_base<const_iterator, true>::iterator_base;
+ };
+
+
+ const_iterator begin() const
+ {
+ if (!buf)
+ return end();
+
+ const Cell * ptr = buf;
+ auto buf_end = buf + NUM_CELLS;
+ while (ptr < buf_end && ptr->isZero(*this))
+ ++ptr;
+
+ return const_iterator(this, ptr);
+ }
+
+ const_iterator cbegin() const { return begin(); }
+
+ iterator begin()
+ {
+ if (!buf)
+ return end();
+
+ Cell * ptr = buf;
+ auto buf_end = buf + NUM_CELLS;
+ while (ptr < buf_end && ptr->isZero(*this))
+ ++ptr;
+
+ return iterator(this, ptr);
+ }
+
+ const_iterator end() const
+ {
+ /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
+ return const_iterator(this, buf ? buf + NUM_CELLS : buf);
+ }
+
+ const_iterator cend() const
+ {
+ return end();
+ }
+
+ iterator end()
+ {
+ return iterator(this, buf ? buf + NUM_CELLS : buf);
+ }
+
+
+public:
+ /// The last parameter is unused but exists for compatibility with HashTable interface.
+ void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
+ {
+ it = &buf[x];
+
+ if (!buf[x].isZero(*this))
+ {
+ inserted = false;
+ return;
+ }
+
+ new (&buf[x]) Cell(x, *this);
+ inserted = true;
this->increaseSize();
- }
-
- std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x)
- {
- std::pair<LookupResult, bool> res;
- emplace(Cell::getKey(x), res.first, res.second);
- if (res.second)
- insertSetMapped(res.first->getMapped(), x);
-
- return res;
- }
-
- LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; }
-
- ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); }
-
- LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; }
-
- ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const
- {
- return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value);
- }
-
- bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
- bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
-
- void write(DB::WriteBuffer & wb) const
- {
- Cell::State::write(wb);
+ }
+
+ std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x)
+ {
+ std::pair<LookupResult, bool> res;
+ emplace(Cell::getKey(x), res.first, res.second);
+ if (res.second)
+ insertSetMapped(res.first->getMapped(), x);
+
+ return res;
+ }
+
+ LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; }
+
+ ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); }
+
+ LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; }
+
+ ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const
+ {
+ return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value);
+ }
+
+ bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
+ bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
+
+ void write(DB::WriteBuffer & wb) const
+ {
+ Cell::State::write(wb);
DB::writeVarUInt(size(), wb);
-
- if (!buf)
- return;
-
- for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
- {
- if (!ptr->isZero(*this))
- {
- DB::writeVarUInt(ptr - buf);
- ptr->write(wb);
- }
- }
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- Cell::State::writeText(wb);
+
+ if (!buf)
+ return;
+
+ for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
+ {
+ if (!ptr->isZero(*this))
+ {
+ DB::writeVarUInt(ptr - buf);
+ ptr->write(wb);
+ }
+ }
+ }
+
+ void writeText(DB::WriteBuffer & wb) const
+ {
+ Cell::State::writeText(wb);
DB::writeText(size(), wb);
-
- if (!buf)
- return;
-
- for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
- {
- if (!ptr->isZero(*this))
- {
- DB::writeChar(',', wb);
- DB::writeText(ptr - buf, wb);
- DB::writeChar(',', wb);
- ptr->writeText(wb);
- }
- }
- }
-
- void read(DB::ReadBuffer & rb)
- {
- Cell::State::read(rb);
- destroyElements();
+
+ if (!buf)
+ return;
+
+ for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
+ {
+ if (!ptr->isZero(*this))
+ {
+ DB::writeChar(',', wb);
+ DB::writeText(ptr - buf, wb);
+ DB::writeChar(',', wb);
+ ptr->writeText(wb);
+ }
+ }
+ }
+
+ void read(DB::ReadBuffer & rb)
+ {
+ Cell::State::read(rb);
+ destroyElements();
size_t m_size;
- DB::readVarUInt(m_size, rb);
+ DB::readVarUInt(m_size, rb);
this->setSize(m_size);
- free();
- alloc();
-
- for (size_t i = 0; i < m_size; ++i)
- {
- size_t place_value = 0;
- DB::readVarUInt(place_value, rb);
- Cell x;
- x.read(rb);
- new (&buf[place_value]) Cell(x, *this);
- }
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- Cell::State::readText(rb);
- destroyElements();
+ free();
+ alloc();
+
+ for (size_t i = 0; i < m_size; ++i)
+ {
+ size_t place_value = 0;
+ DB::readVarUInt(place_value, rb);
+ Cell x;
+ x.read(rb);
+ new (&buf[place_value]) Cell(x, *this);
+ }
+ }
+
+ void readText(DB::ReadBuffer & rb)
+ {
+ Cell::State::readText(rb);
+ destroyElements();
size_t m_size;
- DB::readText(m_size, rb);
+ DB::readText(m_size, rb);
this->setSize(m_size);
- free();
- alloc();
-
- for (size_t i = 0; i < m_size; ++i)
- {
- size_t place_value = 0;
- DB::assertChar(',', rb);
- DB::readText(place_value, rb);
- Cell x;
- DB::assertChar(',', rb);
- x.readText(rb);
- new (&buf[place_value]) Cell(x, *this);
- }
- }
-
+ free();
+ alloc();
+
+ for (size_t i = 0; i < m_size; ++i)
+ {
+ size_t place_value = 0;
+ DB::assertChar(',', rb);
+ DB::readText(place_value, rb);
+ Cell x;
+ DB::assertChar(',', rb);
+ x.readText(rb);
+ new (&buf[place_value]) Cell(x, *this);
+ }
+ }
+
size_t size() const { return this->getSize(buf, *this, NUM_CELLS); }
bool empty() const { return this->isEmpty(buf, *this, NUM_CELLS); }
-
- void clear()
- {
- destroyElements();
+
+ void clear()
+ {
+ destroyElements();
this->clearSize();
-
- memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf));
- }
-
- /// After executing this function, the table can only be destroyed,
- /// and also you can use the methods `size`, `empty`, `begin`, `end`.
- void clearAndShrink()
- {
- destroyElements();
+
+ memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf));
+ }
+
+ /// After executing this function, the table can only be destroyed,
+ /// and also you can use the methods `size`, `empty`, `begin`, `end`.
+ void clearAndShrink()
+ {
+ destroyElements();
this->clearSize();
- free();
- }
-
- size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); }
-
- size_t getBufferSizeInCells() const { return NUM_CELLS; }
-
+ free();
+ }
+
+ size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); }
+
+ size_t getBufferSizeInCells() const { return NUM_CELLS; }
+
/// Return offset for result in internal buffer.
/// Result can have value up to `getBufferSizeInCells() + 1`
/// because offset for zero value considered to be 0
@@ -490,7 +490,7 @@ public:
const Cell * data() const { return buf; }
Cell * data() { return buf; }
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
- size_t getCollisions() const { return 0; }
-#endif
-};
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+ size_t getCollisions() const { return 0; }
+#endif
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h
index 298580dc83..dc922dde2f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h
@@ -1,189 +1,189 @@
-#pragma once
-
-#include <Common/HashTable/HashMap.h>
-#include <Common/HashTable/HashTableAllocator.h>
-#include <Common/HashTable/StringHashTable.h>
-
-template <typename Key, typename TMapped>
-struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
-{
- using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
- using value_type = typename Base::value_type;
- using Base::Base;
- static constexpr bool need_zero_value_storage = false;
- // external
- const StringRef getKey() const { return toStringRef(this->value.first); }
- // internal
- static const Key & getKey(const value_type & value_) { return value_.first; }
-};
-
-template <typename TMapped>
-struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
-{
- using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
- using value_type = typename Base::value_type;
- using Base::Base;
- static constexpr bool need_zero_value_storage = false;
- bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
-
- // Zero means unoccupied cells in hash table. Use key with last word = 0 as
- // zero keys, because such keys are unrepresentable (no way to encode length).
+#pragma once
+
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashTableAllocator.h>
+#include <Common/HashTable/StringHashTable.h>
+
+template <typename Key, typename TMapped>
+struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
+{
+ using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
+ using value_type = typename Base::value_type;
+ using Base::Base;
+ static constexpr bool need_zero_value_storage = false;
+ // external
+ const StringRef getKey() const { return toStringRef(this->value.first); }
+ // internal
+ static const Key & getKey(const value_type & value_) { return value_.first; }
+};
+
+template <typename TMapped>
+struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
+{
+ using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
+ using value_type = typename Base::value_type;
+ using Base::Base;
+ static constexpr bool need_zero_value_storage = false;
+ bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
+
+ // Zero means unoccupied cells in hash table. Use key with last word = 0 as
+ // zero keys, because such keys are unrepresentable (no way to encode length).
static bool isZero(const StringKey16 & key, const HashTableNoState &) { return key.items[1] == 0; }
void setZero() { this->value.first.items[1] = 0; }
-
- // external
- const StringRef getKey() const { return toStringRef(this->value.first); }
- // internal
- static const StringKey16 & getKey(const value_type & value_) { return value_.first; }
-};
-
-template <typename TMapped>
-struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
-{
- using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
- using value_type = typename Base::value_type;
- using Base::Base;
- static constexpr bool need_zero_value_storage = false;
- bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
-
- // Zero means unoccupied cells in hash table. Use key with last word = 0 as
- // zero keys, because such keys are unrepresentable (no way to encode length).
- static bool isZero(const StringKey24 & key, const HashTableNoState &)
- { return key.c == 0; }
- void setZero() { this->value.first.c = 0; }
-
- // external
- const StringRef getKey() const { return toStringRef(this->value.first); }
- // internal
- static const StringKey24 & getKey(const value_type & value_) { return value_.first; }
-};
-
-template <typename TMapped>
-struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
-{
- using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
- using value_type = typename Base::value_type;
- using Base::Base;
- static constexpr bool need_zero_value_storage = false;
- // external
- using Base::getKey;
- // internal
- static const StringRef & getKey(const value_type & value_) { return value_.first; }
-};
-
-template <typename TMapped, typename Allocator>
-struct StringHashMapSubMaps
-{
- using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>;
- using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
-};
-
-template <typename TMapped, typename Allocator = HashTableAllocator>
-class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
-{
-public:
- using Key = StringRef;
- using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
- using Self = StringHashMap;
- using LookupResult = typename Base::LookupResult;
-
- using Base::Base;
-
- /// Merge every cell's value of current map into the destination map.
- /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
- /// Each filled cell in current map will invoke func once. If that map doesn't
- /// have a key equals to the given cell, a new cell gets emplaced into that map,
- /// and func is invoked with the third argument emplaced set to true. Otherwise
- /// emplaced is set to false.
- template <typename Func>
- void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
- {
- if (this->m0.hasZero() && that.m0.hasZero())
- func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
- else if (this->m0.hasZero())
- {
- that.m0.setHasZero();
- func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
- }
- this->m1.mergeToViaEmplace(that.m1, func);
- this->m2.mergeToViaEmplace(that.m2, func);
- this->m3.mergeToViaEmplace(that.m3, func);
- this->ms.mergeToViaEmplace(that.ms, func);
- }
-
- /// Merge every cell's value of current map into the destination map via find.
- /// Func should have signature void(Mapped & dst, Mapped & src, bool exist).
- /// Each filled cell in current map will invoke func once. If that map doesn't
- /// have a key equals to the given cell, func is invoked with the third argument
- /// exist set to false. Otherwise exist is set to true.
- template <typename Func>
- void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
- {
- if (this->m0.size() && that.m0.size())
- func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
- else if (this->m0.size())
- func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
- this->m1.mergeToViaFind(that.m1, func);
- this->m2.mergeToViaFind(that.m2, func);
- this->m3.mergeToViaFind(that.m3, func);
- this->ms.mergeToViaFind(that.ms, func);
- }
-
- TMapped & ALWAYS_INLINE operator[](const Key & x)
- {
- LookupResult it;
- bool inserted;
- this->emplace(x, it, inserted);
- if (inserted)
- new (&it->getMapped()) TMapped();
-
- return it->getMapped();
- }
-
- template <typename Func>
- void ALWAYS_INLINE forEachValue(Func && func)
- {
- if (this->m0.size())
- {
- func(StringRef{}, this->m0.zeroValue()->getMapped());
- }
-
- for (auto & v : this->m1)
- {
- func(v.getKey(), v.getMapped());
- }
-
- for (auto & v : this->m2)
- {
- func(v.getKey(), v.getMapped());
- }
-
- for (auto & v : this->m3)
- {
- func(v.getKey(), v.getMapped());
- }
-
- for (auto & v : this->ms)
- {
- func(v.getKey(), v.getMapped());
- }
- }
-
- template <typename Func>
- void ALWAYS_INLINE forEachMapped(Func && func)
- {
- if (this->m0.size())
- func(this->m0.zeroValue()->getMapped());
- for (auto & v : this->m1)
- func(v.getMapped());
- for (auto & v : this->m2)
- func(v.getMapped());
- for (auto & v : this->m3)
- func(v.getMapped());
- for (auto & v : this->ms)
- func(v.getMapped());
- }
-};
+
+ // external
+ const StringRef getKey() const { return toStringRef(this->value.first); }
+ // internal
+ static const StringKey16 & getKey(const value_type & value_) { return value_.first; }
+};
+
+template <typename TMapped>
+struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
+{
+ using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
+ using value_type = typename Base::value_type;
+ using Base::Base;
+ static constexpr bool need_zero_value_storage = false;
+ bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
+
+ // Zero means unoccupied cells in hash table. Use key with last word = 0 as
+ // zero keys, because such keys are unrepresentable (no way to encode length).
+ static bool isZero(const StringKey24 & key, const HashTableNoState &)
+ { return key.c == 0; }
+ void setZero() { this->value.first.c = 0; }
+
+ // external
+ const StringRef getKey() const { return toStringRef(this->value.first); }
+ // internal
+ static const StringKey24 & getKey(const value_type & value_) { return value_.first; }
+};
+
+template <typename TMapped>
+struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
+{
+ using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
+ using value_type = typename Base::value_type;
+ using Base::Base;
+ static constexpr bool need_zero_value_storage = false;
+ // external
+ using Base::getKey;
+ // internal
+ static const StringRef & getKey(const value_type & value_) { return value_.first; }
+};
+
+template <typename TMapped, typename Allocator>
+struct StringHashMapSubMaps
+{
+ using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>;
+ using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
+ using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
+ using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
+ using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
+};
+
+template <typename TMapped, typename Allocator = HashTableAllocator>
+class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
+{
+public:
+ using Key = StringRef;
+ using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
+ using Self = StringHashMap;
+ using LookupResult = typename Base::LookupResult;
+
+ using Base::Base;
+
+ /// Merge every cell's value of current map into the destination map.
+ /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
+ /// Each filled cell in current map will invoke func once. If that map doesn't
+ /// have a key equals to the given cell, a new cell gets emplaced into that map,
+ /// and func is invoked with the third argument emplaced set to true. Otherwise
+ /// emplaced is set to false.
+ template <typename Func>
+ void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
+ {
+ if (this->m0.hasZero() && that.m0.hasZero())
+ func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
+ else if (this->m0.hasZero())
+ {
+ that.m0.setHasZero();
+ func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
+ }
+ this->m1.mergeToViaEmplace(that.m1, func);
+ this->m2.mergeToViaEmplace(that.m2, func);
+ this->m3.mergeToViaEmplace(that.m3, func);
+ this->ms.mergeToViaEmplace(that.ms, func);
+ }
+
+ /// Merge every cell's value of current map into the destination map via find.
+ /// Func should have signature void(Mapped & dst, Mapped & src, bool exist).
+ /// Each filled cell in current map will invoke func once. If that map doesn't
+ /// have a key equals to the given cell, func is invoked with the third argument
+ /// exist set to false. Otherwise exist is set to true.
+ template <typename Func>
+ void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
+ {
+ if (this->m0.size() && that.m0.size())
+ func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
+ else if (this->m0.size())
+ func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
+ this->m1.mergeToViaFind(that.m1, func);
+ this->m2.mergeToViaFind(that.m2, func);
+ this->m3.mergeToViaFind(that.m3, func);
+ this->ms.mergeToViaFind(that.ms, func);
+ }
+
+ TMapped & ALWAYS_INLINE operator[](const Key & x)
+ {
+ LookupResult it;
+ bool inserted;
+ this->emplace(x, it, inserted);
+ if (inserted)
+ new (&it->getMapped()) TMapped();
+
+ return it->getMapped();
+ }
+
+ template <typename Func>
+ void ALWAYS_INLINE forEachValue(Func && func)
+ {
+ if (this->m0.size())
+ {
+ func(StringRef{}, this->m0.zeroValue()->getMapped());
+ }
+
+ for (auto & v : this->m1)
+ {
+ func(v.getKey(), v.getMapped());
+ }
+
+ for (auto & v : this->m2)
+ {
+ func(v.getKey(), v.getMapped());
+ }
+
+ for (auto & v : this->m3)
+ {
+ func(v.getKey(), v.getMapped());
+ }
+
+ for (auto & v : this->ms)
+ {
+ func(v.getKey(), v.getMapped());
+ }
+ }
+
+ template <typename Func>
+ void ALWAYS_INLINE forEachMapped(Func && func)
+ {
+ if (this->m0.size())
+ func(this->m0.zeroValue()->getMapped());
+ for (auto & v : this->m1)
+ func(v.getMapped());
+ for (auto & v : this->m2)
+ func(v.getMapped());
+ for (auto & v : this->m3)
+ func(v.getMapped());
+ for (auto & v : this->ms)
+ func(v.getMapped());
+ }
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h
index d30271d65d..289195267c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h
@@ -1,227 +1,227 @@
-#pragma once
-
-#include <Common/HashTable/HashMap.h>
-#include <Common/HashTable/HashTable.h>
-
+#pragma once
+
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashTable.h>
+
#include <new>
-#include <variant>
-
-
-using StringKey8 = UInt64;
-using StringKey16 = DB::UInt128;
-struct StringKey24
-{
- UInt64 a;
- UInt64 b;
- UInt64 c;
-
- bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
-};
-
-inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
-{
- assert(n != 0);
- return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)};
-}
-inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n)
-{
+#include <variant>
+
+
+using StringKey8 = UInt64;
+using StringKey16 = DB::UInt128;
+struct StringKey24
+{
+ UInt64 a;
+ UInt64 b;
+ UInt64 c;
+
+ bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
+};
+
+inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
+{
+ assert(n != 0);
+ return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)};
+}
+inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n)
+{
assert(n.items[1] != 0);
return {reinterpret_cast<const char *>(&n), 16ul - (__builtin_clzll(n.items[1]) >> 3)};
-}
-inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
-{
- assert(n.c != 0);
- return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
-}
-
-struct StringHashTableHash
-{
-#if defined(__SSE4_2__)
- size_t ALWAYS_INLINE operator()(StringKey8 key) const
- {
- size_t res = -1ULL;
- res = _mm_crc32_u64(res, key);
- return res;
- }
- size_t ALWAYS_INLINE operator()(StringKey16 key) const
- {
- size_t res = -1ULL;
+}
+inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
+{
+ assert(n.c != 0);
+ return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
+}
+
+struct StringHashTableHash
+{
+#if defined(__SSE4_2__)
+ size_t ALWAYS_INLINE operator()(StringKey8 key) const
+ {
+ size_t res = -1ULL;
+ res = _mm_crc32_u64(res, key);
+ return res;
+ }
+ size_t ALWAYS_INLINE operator()(StringKey16 key) const
+ {
+ size_t res = -1ULL;
res = _mm_crc32_u64(res, key.items[0]);
res = _mm_crc32_u64(res, key.items[1]);
- return res;
- }
- size_t ALWAYS_INLINE operator()(StringKey24 key) const
- {
- size_t res = -1ULL;
- res = _mm_crc32_u64(res, key.a);
- res = _mm_crc32_u64(res, key.b);
- res = _mm_crc32_u64(res, key.c);
- return res;
- }
-#else
- size_t ALWAYS_INLINE operator()(StringKey8 key) const
- {
- return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 8);
- }
- size_t ALWAYS_INLINE operator()(StringKey16 key) const
- {
- return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 16);
- }
- size_t ALWAYS_INLINE operator()(StringKey24 key) const
- {
- return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 24);
- }
-#endif
- size_t ALWAYS_INLINE operator()(StringRef key) const
- {
- return StringRefHash()(key);
- }
-};
-
-template <typename Cell>
+ return res;
+ }
+ size_t ALWAYS_INLINE operator()(StringKey24 key) const
+ {
+ size_t res = -1ULL;
+ res = _mm_crc32_u64(res, key.a);
+ res = _mm_crc32_u64(res, key.b);
+ res = _mm_crc32_u64(res, key.c);
+ return res;
+ }
+#else
+ size_t ALWAYS_INLINE operator()(StringKey8 key) const
+ {
+ return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 8);
+ }
+ size_t ALWAYS_INLINE operator()(StringKey16 key) const
+ {
+ return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 16);
+ }
+ size_t ALWAYS_INLINE operator()(StringKey24 key) const
+ {
+ return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 24);
+ }
+#endif
+ size_t ALWAYS_INLINE operator()(StringRef key) const
+ {
+ return StringRefHash()(key);
+ }
+};
+
+template <typename Cell>
struct StringHashTableEmpty //-V730
-{
- using Self = StringHashTableEmpty;
-
- bool has_zero = false;
- std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key.
-
-public:
- bool hasZero() const { return has_zero; }
-
- void setHasZero()
- {
- has_zero = true;
- new (zeroValue()) Cell();
- }
-
- void setHasZero(const Cell & other)
- {
- has_zero = true;
- new (zeroValue()) Cell(other);
- }
-
- void clearHasZero()
- {
- has_zero = false;
- if (!std::is_trivially_destructible_v<Cell>)
- zeroValue()->~Cell();
- }
-
+{
+ using Self = StringHashTableEmpty;
+
+ bool has_zero = false;
+ std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key.
+
+public:
+ bool hasZero() const { return has_zero; }
+
+ void setHasZero()
+ {
+ has_zero = true;
+ new (zeroValue()) Cell();
+ }
+
+ void setHasZero(const Cell & other)
+ {
+ has_zero = true;
+ new (zeroValue()) Cell(other);
+ }
+
+ void clearHasZero()
+ {
+ has_zero = false;
+ if (!std::is_trivially_destructible_v<Cell>)
+ zeroValue()->~Cell();
+ }
+
Cell * zeroValue() { return std::launder(reinterpret_cast<Cell *>(&zero_value_storage)); }
const Cell * zeroValue() const { return std::launder(reinterpret_cast<const Cell *>(&zero_value_storage)); }
-
- using LookupResult = Cell *;
- using ConstLookupResult = const Cell *;
-
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0)
- {
- if (!hasZero())
- {
- setHasZero();
- inserted = true;
- }
- else
- inserted = false;
- it = zeroValue();
- }
-
- template <typename Key>
- LookupResult ALWAYS_INLINE find(const Key &, size_t = 0)
- {
- return hasZero() ? zeroValue() : nullptr;
- }
-
- template <typename Key>
- ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const
- {
- return hasZero() ? zeroValue() : nullptr;
- }
-
- void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
- void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
- void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); }
- void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); }
- size_t size() const { return hasZero() ? 1 : 0; }
- bool empty() const { return !hasZero(); }
- size_t getBufferSizeInBytes() const { return sizeof(Cell); }
- size_t getCollisions() const { return 0; }
-};
-
-template <size_t initial_size_degree = 8>
-struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
-{
- // Smooth growing for string maps
- void increaseSize() { this->size_degree += 1; }
-};
-
-template <typename Mapped>
-struct StringHashTableLookupResult
-{
- Mapped * mapped_ptr;
- StringHashTableLookupResult() {}
- StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
- StringHashTableLookupResult(std::nullptr_t) {}
- const VoidKey getKey() const { return {}; }
- auto & getMapped() { return *mapped_ptr; }
- auto & operator*() { return *this; }
- auto & operator*() const { return *this; }
- auto * operator->() { return this; }
- auto * operator->() const { return this; }
- operator bool() const { return mapped_ptr; }
- friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; }
- friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; }
- friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; }
- friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; }
-};
-
-template <typename SubMaps>
-class StringHashTable : private boost::noncopyable
-{
-protected:
- static constexpr size_t NUM_MAPS = 5;
- // Map for storing empty string
- using T0 = typename SubMaps::T0;
-
- // Short strings are stored as numbers
- using T1 = typename SubMaps::T1;
- using T2 = typename SubMaps::T2;
- using T3 = typename SubMaps::T3;
-
- // Long strings are stored as StringRef along with saved hash
- using Ts = typename SubMaps::Ts;
- using Self = StringHashTable;
-
- template <typename, typename, size_t>
- friend class TwoLevelStringHashTable;
-
- T0 m0;
- T1 m1;
- T2 m2;
- T3 m3;
- Ts ms;
-
-public:
- using Key = StringRef;
- using key_type = Key;
- using mapped_type = typename Ts::mapped_type;
- using value_type = typename Ts::value_type;
- using cell_type = typename Ts::cell_type;
-
- using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>;
- using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>;
-
+
+ using LookupResult = Cell *;
+ using ConstLookupResult = const Cell *;
+
+ template <typename KeyHolder>
+ void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0)
+ {
+ if (!hasZero())
+ {
+ setHasZero();
+ inserted = true;
+ }
+ else
+ inserted = false;
+ it = zeroValue();
+ }
+
+ template <typename Key>
+ LookupResult ALWAYS_INLINE find(const Key &, size_t = 0)
+ {
+ return hasZero() ? zeroValue() : nullptr;
+ }
+
+ template <typename Key>
+ ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const
+ {
+ return hasZero() ? zeroValue() : nullptr;
+ }
+
+ void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
+ void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
+ void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); }
+ void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); }
+ size_t size() const { return hasZero() ? 1 : 0; }
+ bool empty() const { return !hasZero(); }
+ size_t getBufferSizeInBytes() const { return sizeof(Cell); }
+ size_t getCollisions() const { return 0; }
+};
+
+template <size_t initial_size_degree = 8>
+struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
+{
+ // Smooth growing for string maps
+ void increaseSize() { this->size_degree += 1; }
+};
+
+template <typename Mapped>
+struct StringHashTableLookupResult
+{
+ Mapped * mapped_ptr;
+ StringHashTableLookupResult() {}
+ StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
+ StringHashTableLookupResult(std::nullptr_t) {}
+ const VoidKey getKey() const { return {}; }
+ auto & getMapped() { return *mapped_ptr; }
+ auto & operator*() { return *this; }
+ auto & operator*() const { return *this; }
+ auto * operator->() { return this; }
+ auto * operator->() const { return this; }
+ operator bool() const { return mapped_ptr; }
+ friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; }
+ friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; }
+ friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; }
+ friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; }
+};
+
+template <typename SubMaps>
+class StringHashTable : private boost::noncopyable
+{
+protected:
+ static constexpr size_t NUM_MAPS = 5;
+ // Map for storing empty string
+ using T0 = typename SubMaps::T0;
+
+ // Short strings are stored as numbers
+ using T1 = typename SubMaps::T1;
+ using T2 = typename SubMaps::T2;
+ using T3 = typename SubMaps::T3;
+
+ // Long strings are stored as StringRef along with saved hash
+ using Ts = typename SubMaps::Ts;
+ using Self = StringHashTable;
+
+ template <typename, typename, size_t>
+ friend class TwoLevelStringHashTable;
+
+ T0 m0;
+ T1 m1;
+ T2 m2;
+ T3 m3;
+ Ts ms;
+
+public:
+ using Key = StringRef;
+ using key_type = Key;
+ using mapped_type = typename Ts::mapped_type;
+ using value_type = typename Ts::value_type;
+ using cell_type = typename Ts::cell_type;
+
+ using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>;
+ using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>;
+
StringHashTable() = default;
-
- StringHashTable(size_t reserve_for_num_elements)
- : m1{reserve_for_num_elements / 4}
- , m2{reserve_for_num_elements / 4}
- , m3{reserve_for_num_elements / 4}
- , ms{reserve_for_num_elements / 4}
- {
- }
-
+
+ StringHashTable(size_t reserve_for_num_elements)
+ : m1{reserve_for_num_elements / 4}
+ , m2{reserve_for_num_elements / 4}
+ , m3{reserve_for_num_elements / 4}
+ , ms{reserve_for_num_elements / 4}
+ {
+ }
+
StringHashTable(StringHashTable && rhs)
: m1(std::move(rhs.m1))
, m2(std::move(rhs.m2))
@@ -229,207 +229,207 @@ public:
, ms(std::move(rhs.ms))
{
}
-
+
~StringHashTable() = default;
-public:
- // Dispatch is written in a way that maximizes the performance:
- // 1. Always memcpy 8 times bytes
- // 2. Use switch case extension to generate fast dispatching table
- // 3. Funcs are named callables that can be force_inlined
+public:
+ // Dispatch is written in a way that maximizes the performance:
+ // 1. Always memcpy 8 times bytes
+ // 2. Use switch case extension to generate fast dispatching table
+ // 3. Funcs are named callables that can be force_inlined
//
- // NOTE: It relies on Little Endianness
+ // NOTE: It relies on Little Endianness
//
// NOTE: It requires padded to 8 bytes keys (IOW you cannot pass
// std::string here, but you can pass i.e. ColumnString::getDataAt()),
// since it copies 8 bytes at a time.
- template <typename Self, typename KeyHolder, typename Func>
- static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
- {
- StringHashTableHash hash;
- const StringRef & x = keyHolderGetKey(key_holder);
- const size_t sz = x.size;
- if (sz == 0)
- {
- keyHolderDiscardKey(key_holder);
- return func(self.m0, VoidKey{}, 0);
- }
-
- if (x.data[sz - 1] == 0)
- {
- // Strings with trailing zeros are not representable as fixed-size
- // string keys. Put them to the generic table.
- return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
- }
-
- const char * p = x.data;
- // pending bits that needs to be shifted out
- const char s = (-sz & 7) * 8;
- union
- {
- StringKey8 k8;
- StringKey16 k16;
- StringKey24 k24;
- UInt64 n[3];
- };
- switch ((sz - 1) >> 3)
- {
- case 0: // 1..8 bytes
- {
- // first half page
- if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
- {
- memcpy(&n[0], p, 8);
- n[0] &= -1ul >> s;
- }
- else
- {
- const char * lp = x.data + x.size - 8;
- memcpy(&n[0], lp, 8);
- n[0] >>= s;
- }
- keyHolderDiscardKey(key_holder);
- return func(self.m1, k8, hash(k8));
- }
- case 1: // 9..16 bytes
- {
- memcpy(&n[0], p, 8);
- const char * lp = x.data + x.size - 8;
- memcpy(&n[1], lp, 8);
- n[1] >>= s;
- keyHolderDiscardKey(key_holder);
- return func(self.m2, k16, hash(k16));
- }
- case 2: // 17..24 bytes
- {
- memcpy(&n[0], p, 16);
- const char * lp = x.data + x.size - 8;
- memcpy(&n[2], lp, 8);
- n[2] >>= s;
- keyHolderDiscardKey(key_holder);
- return func(self.m3, k24, hash(k24));
- }
- default: // >= 25 bytes
- {
- return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
- }
- }
- }
-
- struct EmplaceCallable
- {
- LookupResult & mapped;
- bool & inserted;
-
- EmplaceCallable(LookupResult & mapped_, bool & inserted_)
- : mapped(mapped_), inserted(inserted_) {}
-
- template <typename Map, typename KeyHolder>
- void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash)
- {
- typename Map::LookupResult result;
- map.emplace(key_holder, result, inserted, hash);
- mapped = &result->getMapped();
- }
- };
-
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
- {
- this->dispatch(*this, key_holder, EmplaceCallable(it, inserted));
- }
-
- struct FindCallable
- {
- // find() doesn't need any key memory management, so we don't work with
- // any key holders here, only with normal keys. The key type is still
- // different for every subtable, this is why it is a template parameter.
- template <typename Submap, typename SubmapKey>
- auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
- {
- auto it = map.find(key, hash);
- if (!it)
- return decltype(&it->getMapped()){};
- else
- return &it->getMapped();
- }
- };
-
- LookupResult ALWAYS_INLINE find(const Key & x)
- {
- return dispatch(*this, x, FindCallable{});
- }
-
- ConstLookupResult ALWAYS_INLINE find(const Key & x) const
- {
- return dispatch(*this, x, FindCallable{});
- }
-
- bool ALWAYS_INLINE has(const Key & x, size_t = 0) const
- {
- return dispatch(*this, x, FindCallable{}) != nullptr;
- }
-
- void write(DB::WriteBuffer & wb) const
- {
- m0.write(wb);
- m1.write(wb);
- m2.write(wb);
- m3.write(wb);
- ms.write(wb);
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- m0.writeText(wb);
- DB::writeChar(',', wb);
- m1.writeText(wb);
- DB::writeChar(',', wb);
- m2.writeText(wb);
- DB::writeChar(',', wb);
- m3.writeText(wb);
- DB::writeChar(',', wb);
- ms.writeText(wb);
- }
-
- void read(DB::ReadBuffer & rb)
- {
- m0.read(rb);
- m1.read(rb);
- m2.read(rb);
- m3.read(rb);
- ms.read(rb);
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- m0.readText(rb);
- DB::assertChar(',', rb);
- m1.readText(rb);
- DB::assertChar(',', rb);
- m2.readText(rb);
- DB::assertChar(',', rb);
- m3.readText(rb);
- DB::assertChar(',', rb);
- ms.readText(rb);
- }
-
- size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); }
-
- bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); }
-
- size_t getBufferSizeInBytes() const
- {
- return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes()
- + ms.getBufferSizeInBytes();
- }
-
- void clearAndShrink()
- {
- m1.clearHasZero();
- m1.clearAndShrink();
- m2.clearAndShrink();
- m3.clearAndShrink();
- ms.clearAndShrink();
- }
-};
+ template <typename Self, typename KeyHolder, typename Func>
+ static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
+ {
+ StringHashTableHash hash;
+ const StringRef & x = keyHolderGetKey(key_holder);
+ const size_t sz = x.size;
+ if (sz == 0)
+ {
+ keyHolderDiscardKey(key_holder);
+ return func(self.m0, VoidKey{}, 0);
+ }
+
+ if (x.data[sz - 1] == 0)
+ {
+ // Strings with trailing zeros are not representable as fixed-size
+ // string keys. Put them to the generic table.
+ return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
+ }
+
+ const char * p = x.data;
+ // pending bits that needs to be shifted out
+ const char s = (-sz & 7) * 8;
+ union
+ {
+ StringKey8 k8;
+ StringKey16 k16;
+ StringKey24 k24;
+ UInt64 n[3];
+ };
+ switch ((sz - 1) >> 3)
+ {
+ case 0: // 1..8 bytes
+ {
+ // first half page
+ if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
+ {
+ memcpy(&n[0], p, 8);
+ n[0] &= -1ul >> s;
+ }
+ else
+ {
+ const char * lp = x.data + x.size - 8;
+ memcpy(&n[0], lp, 8);
+ n[0] >>= s;
+ }
+ keyHolderDiscardKey(key_holder);
+ return func(self.m1, k8, hash(k8));
+ }
+ case 1: // 9..16 bytes
+ {
+ memcpy(&n[0], p, 8);
+ const char * lp = x.data + x.size - 8;
+ memcpy(&n[1], lp, 8);
+ n[1] >>= s;
+ keyHolderDiscardKey(key_holder);
+ return func(self.m2, k16, hash(k16));
+ }
+ case 2: // 17..24 bytes
+ {
+ memcpy(&n[0], p, 16);
+ const char * lp = x.data + x.size - 8;
+ memcpy(&n[2], lp, 8);
+ n[2] >>= s;
+ keyHolderDiscardKey(key_holder);
+ return func(self.m3, k24, hash(k24));
+ }
+ default: // >= 25 bytes
+ {
+ return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
+ }
+ }
+ }
+
+ struct EmplaceCallable
+ {
+ LookupResult & mapped;
+ bool & inserted;
+
+ EmplaceCallable(LookupResult & mapped_, bool & inserted_)
+ : mapped(mapped_), inserted(inserted_) {}
+
+ template <typename Map, typename KeyHolder>
+ void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash)
+ {
+ typename Map::LookupResult result;
+ map.emplace(key_holder, result, inserted, hash);
+ mapped = &result->getMapped();
+ }
+ };
+
+ template <typename KeyHolder>
+ void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
+ {
+ this->dispatch(*this, key_holder, EmplaceCallable(it, inserted));
+ }
+
+ struct FindCallable
+ {
+ // find() doesn't need any key memory management, so we don't work with
+ // any key holders here, only with normal keys. The key type is still
+ // different for every subtable, this is why it is a template parameter.
+ template <typename Submap, typename SubmapKey>
+ auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
+ {
+ auto it = map.find(key, hash);
+ if (!it)
+ return decltype(&it->getMapped()){};
+ else
+ return &it->getMapped();
+ }
+ };
+
+ LookupResult ALWAYS_INLINE find(const Key & x)
+ {
+ return dispatch(*this, x, FindCallable{});
+ }
+
+ ConstLookupResult ALWAYS_INLINE find(const Key & x) const
+ {
+ return dispatch(*this, x, FindCallable{});
+ }
+
+ bool ALWAYS_INLINE has(const Key & x, size_t = 0) const
+ {
+ return dispatch(*this, x, FindCallable{}) != nullptr;
+ }
+
+ void write(DB::WriteBuffer & wb) const
+ {
+ m0.write(wb);
+ m1.write(wb);
+ m2.write(wb);
+ m3.write(wb);
+ ms.write(wb);
+ }
+
+ void writeText(DB::WriteBuffer & wb) const
+ {
+ m0.writeText(wb);
+ DB::writeChar(',', wb);
+ m1.writeText(wb);
+ DB::writeChar(',', wb);
+ m2.writeText(wb);
+ DB::writeChar(',', wb);
+ m3.writeText(wb);
+ DB::writeChar(',', wb);
+ ms.writeText(wb);
+ }
+
+ void read(DB::ReadBuffer & rb)
+ {
+ m0.read(rb);
+ m1.read(rb);
+ m2.read(rb);
+ m3.read(rb);
+ ms.read(rb);
+ }
+
+ void readText(DB::ReadBuffer & rb)
+ {
+ m0.readText(rb);
+ DB::assertChar(',', rb);
+ m1.readText(rb);
+ DB::assertChar(',', rb);
+ m2.readText(rb);
+ DB::assertChar(',', rb);
+ m3.readText(rb);
+ DB::assertChar(',', rb);
+ ms.readText(rb);
+ }
+
+ size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); }
+
+ bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); }
+
+ size_t getBufferSizeInBytes() const
+ {
+ return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes()
+ + ms.getBufferSizeInBytes();
+ }
+
+ void clearAndShrink()
+ {
+ m1.clearHasZero();
+ m1.clearAndShrink();
+ m2.clearAndShrink();
+ m3.clearAndShrink();
+ ms.clearAndShrink();
+ }
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h
index 7bebf0d8af..bd59ec714b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h
@@ -1,66 +1,66 @@
-#pragma once
-
-#include <Common/HashTable/TwoLevelHashTable.h>
-#include <Common/HashTable/HashMap.h>
-
-
-template
-<
- typename Key,
- typename Cell,
- typename Hash = DefaultHash<Key>,
- typename Grower = TwoLevelHashTableGrower<>,
- typename Allocator = HashTableAllocator,
- template <typename ...> typename ImplTable = HashMapTable
->
-class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>
-{
-public:
- using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>;
- using LookupResult = typename Impl::LookupResult;
-
- using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>::TwoLevelHashTable;
-
- template <typename Func>
- void ALWAYS_INLINE forEachMapped(Func && func)
- {
- for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
- this->impls[i].forEachMapped(func);
- }
-
- typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
- {
- LookupResult it;
- bool inserted;
- this->emplace(x, it, inserted);
-
- if (inserted)
- new (&it->getMapped()) typename Cell::Mapped();
-
- return it->getMapped();
- }
-};
-
-
-template
-<
- typename Key,
- typename Mapped,
- typename Hash = DefaultHash<Key>,
- typename Grower = TwoLevelHashTableGrower<>,
- typename Allocator = HashTableAllocator,
- template <typename ...> typename ImplTable = HashMapTable
->
-using TwoLevelHashMap = TwoLevelHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>;
-
-
-template
-<
- typename Key,
- typename Mapped,
- typename Hash = DefaultHash<Key>,
- typename Grower = TwoLevelHashTableGrower<>,
- typename Allocator = HashTableAllocator,
- template <typename ...> typename ImplTable = HashMapTable
->
-using TwoLevelHashMapWithSavedHash = TwoLevelHashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>;
+#pragma once
+
+#include <Common/HashTable/TwoLevelHashTable.h>
+#include <Common/HashTable/HashMap.h>
+
+
+template
+<
+ typename Key,
+ typename Cell,
+ typename Hash = DefaultHash<Key>,
+ typename Grower = TwoLevelHashTableGrower<>,
+ typename Allocator = HashTableAllocator,
+ template <typename ...> typename ImplTable = HashMapTable
+>
+class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>
+{
+public:
+ using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>;
+ using LookupResult = typename Impl::LookupResult;
+
+ using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>::TwoLevelHashTable;
+
+ template <typename Func>
+ void ALWAYS_INLINE forEachMapped(Func && func)
+ {
+ for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
+ this->impls[i].forEachMapped(func);
+ }
+
+ typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
+ {
+ LookupResult it;
+ bool inserted;
+ this->emplace(x, it, inserted);
+
+ if (inserted)
+ new (&it->getMapped()) typename Cell::Mapped();
+
+ return it->getMapped();
+ }
+};
+
+
+template
+<
+ typename Key,
+ typename Mapped,
+ typename Hash = DefaultHash<Key>,
+ typename Grower = TwoLevelHashTableGrower<>,
+ typename Allocator = HashTableAllocator,
+ template <typename ...> typename ImplTable = HashMapTable
+>
+using TwoLevelHashMap = TwoLevelHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>;
+
+
+template
+<
+ typename Key,
+ typename Mapped,
+ typename Hash = DefaultHash<Key>,
+ typename Grower = TwoLevelHashTableGrower<>,
+ typename Allocator = HashTableAllocator,
+ template <typename ...> typename ImplTable = HashMapTable
+>
+using TwoLevelHashMapWithSavedHash = TwoLevelHashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>;
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h
index 14afb91c07..2376e08d5f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h
@@ -1,335 +1,335 @@
-#pragma once
-
-#include <Common/HashTable/HashTable.h>
-
-
-/** Two-level hash table.
- * Represents 256 (or 1ULL << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
- * To determine which one to use, one of the bytes of the hash function is taken.
- *
- * Usually works a little slower than a simple hash table.
- * However, it has advantages in some cases:
- * - if you need to merge two hash tables together, then you can easily parallelize it by buckets;
- * - delay during resizes is amortized, since the small hash tables will be resized separately;
- * - in theory, resizes are cache-local in a larger range of sizes.
- */
-
-template <size_t initial_size_degree = 8>
-struct TwoLevelHashTableGrower : public HashTableGrower<initial_size_degree>
-{
- /// Increase the size of the hash table.
- void increaseSize()
- {
- this->size_degree += this->size_degree >= 15 ? 1 : 2;
- }
-};
-
-template
-<
- typename Key,
- typename Cell,
- typename Hash,
- typename Grower,
- typename Allocator,
- typename ImplTable = HashTable<Key, Cell, Hash, Grower, Allocator>,
- size_t BITS_FOR_BUCKET = 8
->
-class TwoLevelHashTable :
- private boost::noncopyable,
- protected Hash /// empty base optimization
-{
-protected:
- friend class const_iterator;
- friend class iterator;
-
- using HashValue = size_t;
- using Self = TwoLevelHashTable;
-public:
- using Impl = ImplTable;
-
- static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
- static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
-
- size_t hash(const Key & x) const { return Hash::operator()(x); }
-
- /// NOTE Bad for hash tables with more than 2^32 cells.
- static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
-
-protected:
- typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket)
- {
- while (bucket != NUM_BUCKETS && impls[bucket].empty())
- ++bucket;
-
- if (bucket != NUM_BUCKETS)
- return impls[bucket].begin();
-
- --bucket;
- return impls[MAX_BUCKET].end();
- }
-
- typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const
- {
- while (bucket != NUM_BUCKETS && impls[bucket].empty())
- ++bucket;
-
- if (bucket != NUM_BUCKETS)
- return impls[bucket].begin();
-
- --bucket;
- return impls[MAX_BUCKET].end();
- }
-
-public:
- using key_type = typename Impl::key_type;
- using mapped_type = typename Impl::mapped_type;
- using value_type = typename Impl::value_type;
- using cell_type = typename Impl::cell_type;
-
- using LookupResult = typename Impl::LookupResult;
- using ConstLookupResult = typename Impl::ConstLookupResult;
-
- Impl impls[NUM_BUCKETS];
-
-
- TwoLevelHashTable() {}
-
- /// Copy the data from another (normal) hash table. It should have the same hash function.
- template <typename Source>
- TwoLevelHashTable(const Source & src)
- {
- typename Source::const_iterator it = src.begin();
-
- /// It is assumed that the zero key (stored separately) is first in iteration order.
- if (it != src.end() && it.getPtr()->isZero(src))
- {
- insert(it->getValue());
- ++it;
- }
-
- for (; it != src.end(); ++it)
- {
- const Cell * cell = it.getPtr();
- size_t hash_value = cell->getHash(src);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].insertUniqueNonZero(cell, hash_value);
- }
- }
-
-
- class iterator
- {
+#pragma once
+
+#include <Common/HashTable/HashTable.h>
+
+
+/** Two-level hash table.
+ * Represents 256 (or 1ULL << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
+ * To determine which one to use, one of the bytes of the hash function is taken.
+ *
+ * Usually works a little slower than a simple hash table.
+ * However, it has advantages in some cases:
+ * - if you need to merge two hash tables together, then you can easily parallelize it by buckets;
+ * - delay during resizes is amortized, since the small hash tables will be resized separately;
+ * - in theory, resizes are cache-local in a larger range of sizes.
+ */
+
+template <size_t initial_size_degree = 8>
+struct TwoLevelHashTableGrower : public HashTableGrower<initial_size_degree>
+{
+ /// Increase the size of the hash table.
+ void increaseSize()
+ {
+ this->size_degree += this->size_degree >= 15 ? 1 : 2;
+ }
+};
+
+template
+<
+ typename Key,
+ typename Cell,
+ typename Hash,
+ typename Grower,
+ typename Allocator,
+ typename ImplTable = HashTable<Key, Cell, Hash, Grower, Allocator>,
+ size_t BITS_FOR_BUCKET = 8
+>
+class TwoLevelHashTable :
+ private boost::noncopyable,
+ protected Hash /// empty base optimization
+{
+protected:
+ friend class const_iterator;
+ friend class iterator;
+
+ using HashValue = size_t;
+ using Self = TwoLevelHashTable;
+public:
+ using Impl = ImplTable;
+
+ static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
+ static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
+
+ size_t hash(const Key & x) const { return Hash::operator()(x); }
+
+ /// NOTE Bad for hash tables with more than 2^32 cells.
+ static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
+
+protected:
+ typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket)
+ {
+ while (bucket != NUM_BUCKETS && impls[bucket].empty())
+ ++bucket;
+
+ if (bucket != NUM_BUCKETS)
+ return impls[bucket].begin();
+
+ --bucket;
+ return impls[MAX_BUCKET].end();
+ }
+
+ typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const
+ {
+ while (bucket != NUM_BUCKETS && impls[bucket].empty())
+ ++bucket;
+
+ if (bucket != NUM_BUCKETS)
+ return impls[bucket].begin();
+
+ --bucket;
+ return impls[MAX_BUCKET].end();
+ }
+
+public:
+ using key_type = typename Impl::key_type;
+ using mapped_type = typename Impl::mapped_type;
+ using value_type = typename Impl::value_type;
+ using cell_type = typename Impl::cell_type;
+
+ using LookupResult = typename Impl::LookupResult;
+ using ConstLookupResult = typename Impl::ConstLookupResult;
+
+ Impl impls[NUM_BUCKETS];
+
+
+ TwoLevelHashTable() {}
+
+ /// Copy the data from another (normal) hash table. It should have the same hash function.
+ template <typename Source>
+ TwoLevelHashTable(const Source & src)
+ {
+ typename Source::const_iterator it = src.begin();
+
+ /// It is assumed that the zero key (stored separately) is first in iteration order.
+ if (it != src.end() && it.getPtr()->isZero(src))
+ {
+ insert(it->getValue());
+ ++it;
+ }
+
+ for (; it != src.end(); ++it)
+ {
+ const Cell * cell = it.getPtr();
+ size_t hash_value = cell->getHash(src);
+ size_t buck = getBucketFromHash(hash_value);
+ impls[buck].insertUniqueNonZero(cell, hash_value);
+ }
+ }
+
+
+ class iterator
+ {
Self * container{};
size_t bucket{};
typename Impl::iterator current_it{};
-
- friend class TwoLevelHashTable;
-
- iterator(Self * container_, size_t bucket_, typename Impl::iterator current_it_)
- : container(container_), bucket(bucket_), current_it(current_it_) {}
-
- public:
- iterator() {}
-
- bool operator== (const iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; }
- bool operator!= (const iterator & rhs) const { return !(*this == rhs); }
-
- iterator & operator++()
- {
- ++current_it;
- if (current_it == container->impls[bucket].end())
- {
- ++bucket;
- current_it = container->beginOfNextNonEmptyBucket(bucket);
- }
-
- return *this;
- }
-
- Cell & operator* () const { return *current_it; }
- Cell * operator->() const { return current_it.getPtr(); }
-
- Cell * getPtr() const { return current_it.getPtr(); }
- size_t getHash() const { return current_it.getHash(); }
- };
-
-
- class const_iterator
- {
+
+ friend class TwoLevelHashTable;
+
+ iterator(Self * container_, size_t bucket_, typename Impl::iterator current_it_)
+ : container(container_), bucket(bucket_), current_it(current_it_) {}
+
+ public:
+ iterator() {}
+
+ bool operator== (const iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; }
+ bool operator!= (const iterator & rhs) const { return !(*this == rhs); }
+
+ iterator & operator++()
+ {
+ ++current_it;
+ if (current_it == container->impls[bucket].end())
+ {
+ ++bucket;
+ current_it = container->beginOfNextNonEmptyBucket(bucket);
+ }
+
+ return *this;
+ }
+
+ Cell & operator* () const { return *current_it; }
+ Cell * operator->() const { return current_it.getPtr(); }
+
+ Cell * getPtr() const { return current_it.getPtr(); }
+ size_t getHash() const { return current_it.getHash(); }
+ };
+
+
+ class const_iterator
+ {
Self * container{};
size_t bucket{};
typename Impl::const_iterator current_it{};
-
- friend class TwoLevelHashTable;
-
- const_iterator(Self * container_, size_t bucket_, typename Impl::const_iterator current_it_)
- : container(container_), bucket(bucket_), current_it(current_it_) {}
-
- public:
- const_iterator() {}
- const_iterator(const iterator & rhs) : container(rhs.container), bucket(rhs.bucket), current_it(rhs.current_it) {}
-
- bool operator== (const const_iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; }
- bool operator!= (const const_iterator & rhs) const { return !(*this == rhs); }
-
- const_iterator & operator++()
- {
- ++current_it;
- if (current_it == container->impls[bucket].end())
- {
- ++bucket;
- current_it = container->beginOfNextNonEmptyBucket(bucket);
- }
-
- return *this;
- }
-
- const Cell & operator* () const { return *current_it; }
- const Cell * operator->() const { return current_it->getPtr(); }
-
- const Cell * getPtr() const { return current_it.getPtr(); }
- size_t getHash() const { return current_it.getHash(); }
- };
-
-
- const_iterator begin() const
- {
- size_t buck = 0;
- typename Impl::const_iterator impl_it = beginOfNextNonEmptyBucket(buck);
- return { this, buck, impl_it };
- }
-
- iterator begin()
- {
- size_t buck = 0;
- typename Impl::iterator impl_it = beginOfNextNonEmptyBucket(buck);
- return { this, buck, impl_it };
- }
-
- const_iterator end() const { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
- iterator end() { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
-
-
- /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
- std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x)
- {
- size_t hash_value = hash(Cell::getKey(x));
-
- std::pair<LookupResult, bool> res;
- emplace(Cell::getKey(x), res.first, res.second, hash_value);
-
- if (res.second)
- insertSetMapped(res.first->getMapped(), x);
-
- return res;
- }
-
-
- /** Insert the key,
- * return an iterator to a position that can be used for `placement new` of value,
- * as well as the flag - whether a new key was inserted.
- *
- * You have to make `placement new` values if you inserted a new key,
- * since when destroying a hash table, the destructor will be invoked for it!
- *
- * Example usage:
- *
- * Map::iterator it;
- * bool inserted;
- * map.emplace(key, it, inserted);
- * if (inserted)
- * new(&it->second) Mapped(value);
- */
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
- {
- size_t hash_value = hash(keyHolderGetKey(key_holder));
- emplace(key_holder, it, inserted, hash_value);
- }
-
-
- /// Same, but with a precalculated values of hash function.
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it,
- bool & inserted, size_t hash_value)
- {
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].emplace(key_holder, it, inserted, hash_value);
- }
-
- LookupResult ALWAYS_INLINE find(Key x, size_t hash_value)
- {
- size_t buck = getBucketFromHash(hash_value);
- return impls[buck].find(x, hash_value);
- }
-
- ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const
- {
- return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
- }
-
- LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); }
-
- ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x, hash(x)); }
-
-
- void write(DB::WriteBuffer & wb) const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- impls[i].write(wb);
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::writeChar(',', wb);
- impls[i].writeText(wb);
- }
- }
-
- void read(DB::ReadBuffer & rb)
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- impls[i].read(rb);
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::assertChar(',', rb);
- impls[i].readText(rb);
- }
- }
-
-
- size_t size() const
- {
- size_t res = 0;
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].size();
-
- return res;
- }
-
- bool empty() const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- if (!impls[i].empty())
- return false;
-
- return true;
- }
-
- size_t getBufferSizeInBytes() const
- {
- size_t res = 0;
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].getBufferSizeInBytes();
-
- return res;
- }
-};
+
+ friend class TwoLevelHashTable;
+
+ const_iterator(Self * container_, size_t bucket_, typename Impl::const_iterator current_it_)
+ : container(container_), bucket(bucket_), current_it(current_it_) {}
+
+ public:
+ const_iterator() {}
+ const_iterator(const iterator & rhs) : container(rhs.container), bucket(rhs.bucket), current_it(rhs.current_it) {}
+
+ bool operator== (const const_iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; }
+ bool operator!= (const const_iterator & rhs) const { return !(*this == rhs); }
+
+ const_iterator & operator++()
+ {
+ ++current_it;
+ if (current_it == container->impls[bucket].end())
+ {
+ ++bucket;
+ current_it = container->beginOfNextNonEmptyBucket(bucket);
+ }
+
+ return *this;
+ }
+
+ const Cell & operator* () const { return *current_it; }
+ const Cell * operator->() const { return current_it->getPtr(); }
+
+ const Cell * getPtr() const { return current_it.getPtr(); }
+ size_t getHash() const { return current_it.getHash(); }
+ };
+
+
+ const_iterator begin() const
+ {
+ size_t buck = 0;
+ typename Impl::const_iterator impl_it = beginOfNextNonEmptyBucket(buck);
+ return { this, buck, impl_it };
+ }
+
+ iterator begin()
+ {
+ size_t buck = 0;
+ typename Impl::iterator impl_it = beginOfNextNonEmptyBucket(buck);
+ return { this, buck, impl_it };
+ }
+
+ const_iterator end() const { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
+ iterator end() { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
+
+
+ /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
+ std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x)
+ {
+ size_t hash_value = hash(Cell::getKey(x));
+
+ std::pair<LookupResult, bool> res;
+ emplace(Cell::getKey(x), res.first, res.second, hash_value);
+
+ if (res.second)
+ insertSetMapped(res.first->getMapped(), x);
+
+ return res;
+ }
+
+
+ /** Insert the key,
+ * return an iterator to a position that can be used for `placement new` of value,
+ * as well as the flag - whether a new key was inserted.
+ *
+ * You have to make `placement new` values if you inserted a new key,
+ * since when destroying a hash table, the destructor will be invoked for it!
+ *
+ * Example usage:
+ *
+ * Map::iterator it;
+ * bool inserted;
+ * map.emplace(key, it, inserted);
+ * if (inserted)
+ * new(&it->second) Mapped(value);
+ */
+ template <typename KeyHolder>
+ void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
+ {
+ size_t hash_value = hash(keyHolderGetKey(key_holder));
+ emplace(key_holder, it, inserted, hash_value);
+ }
+
+
+ /// Same, but with a precalculated values of hash function.
+ template <typename KeyHolder>
+ void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it,
+ bool & inserted, size_t hash_value)
+ {
+ size_t buck = getBucketFromHash(hash_value);
+ impls[buck].emplace(key_holder, it, inserted, hash_value);
+ }
+
+ LookupResult ALWAYS_INLINE find(Key x, size_t hash_value)
+ {
+ size_t buck = getBucketFromHash(hash_value);
+ return impls[buck].find(x, hash_value);
+ }
+
+ ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const
+ {
+ return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
+ }
+
+ LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); }
+
+ ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x, hash(x)); }
+
+
+ void write(DB::WriteBuffer & wb) const
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ impls[i].write(wb);
+ }
+
+ void writeText(DB::WriteBuffer & wb) const
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ {
+ if (i != 0)
+ DB::writeChar(',', wb);
+ impls[i].writeText(wb);
+ }
+ }
+
+ void read(DB::ReadBuffer & rb)
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ impls[i].read(rb);
+ }
+
+ void readText(DB::ReadBuffer & rb)
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ {
+ if (i != 0)
+ DB::assertChar(',', rb);
+ impls[i].readText(rb);
+ }
+ }
+
+
+ size_t size() const
+ {
+ size_t res = 0;
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ res += impls[i].size();
+
+ return res;
+ }
+
+ bool empty() const
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ if (!impls[i].empty())
+ return false;
+
+ return true;
+ }
+
+ size_t getBufferSizeInBytes() const
+ {
+ size_t res = 0;
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ res += impls[i].getBufferSizeInBytes();
+
+ return res;
+ }
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h
index 6bd8f74dbd..e87535da2f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h
@@ -1,33 +1,33 @@
-#pragma once
-
-#include <Common/HashTable/StringHashMap.h>
-#include <Common/HashTable/TwoLevelStringHashTable.h>
-
-template <typename TMapped, typename Allocator = HashTableAllocator, template <typename...> typename ImplTable = StringHashMap>
-class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>>
-{
-public:
- using Key = StringRef;
- using Self = TwoLevelStringHashMap;
- using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
- using LookupResult = typename Base::LookupResult;
-
- using Base::Base;
-
- template <typename Func>
- void ALWAYS_INLINE forEachMapped(Func && func)
- {
- for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
+#pragma once
+
+#include <Common/HashTable/StringHashMap.h>
+#include <Common/HashTable/TwoLevelStringHashTable.h>
+
+template <typename TMapped, typename Allocator = HashTableAllocator, template <typename...> typename ImplTable = StringHashMap>
+class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>>
+{
+public:
+ using Key = StringRef;
+ using Self = TwoLevelStringHashMap;
+ using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
+ using LookupResult = typename Base::LookupResult;
+
+ using Base::Base;
+
+ template <typename Func>
+ void ALWAYS_INLINE forEachMapped(Func && func)
+ {
+ for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
this->impls[i].forEachMapped(func);
- }
-
- TMapped & ALWAYS_INLINE operator[](const Key & x)
- {
- bool inserted;
- LookupResult it;
- this->emplace(x, it, inserted);
- if (inserted)
- new (&it->getMapped()) TMapped();
- return it->getMapped();
- }
-};
+ }
+
+ TMapped & ALWAYS_INLINE operator[](const Key & x)
+ {
+ bool inserted;
+ LookupResult it;
+ this->emplace(x, it, inserted);
+ if (inserted)
+ new (&it->getMapped()) TMapped();
+ return it->getMapped();
+ }
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h
index 93bbcb2835..ff2a4ab12e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h
@@ -1,235 +1,235 @@
-#pragma once
-
-#include <Common/HashTable/StringHashTable.h>
-
-template <typename SubMaps, typename ImplTable = StringHashTable<SubMaps>, size_t BITS_FOR_BUCKET = 8>
-class TwoLevelStringHashTable : private boost::noncopyable
-{
-protected:
- using HashValue = size_t;
- using Self = TwoLevelStringHashTable;
-
-public:
- using Key = StringRef;
- using Impl = ImplTable;
-
- static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
- static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
-
- // TODO: currently hashing contains redundant computations when doing distributed or external aggregations
- size_t hash(const Key & x) const
- {
- return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; });
- }
-
- size_t operator()(const Key & x) const { return hash(x); }
-
- /// NOTE Bad for hash tables with more than 2^32 cells.
- static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
-
-public:
- using key_type = typename Impl::key_type;
- using mapped_type = typename Impl::mapped_type;
- using value_type = typename Impl::value_type;
- using cell_type = typename Impl::cell_type;
-
- using LookupResult = typename Impl::LookupResult;
- using ConstLookupResult = typename Impl::ConstLookupResult;
-
- Impl impls[NUM_BUCKETS];
-
- TwoLevelStringHashTable() {}
-
- template <typename Source>
- TwoLevelStringHashTable(const Source & src)
- {
- if (src.m0.hasZero())
- impls[0].m0.setHasZero(*src.m0.zeroValue());
-
- for (auto & v : src.m1)
- {
- size_t hash_value = v.getHash(src.m1);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].m1.insertUniqueNonZero(&v, hash_value);
- }
- for (auto & v : src.m2)
- {
- size_t hash_value = v.getHash(src.m2);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].m2.insertUniqueNonZero(&v, hash_value);
- }
- for (auto & v : src.m3)
- {
- size_t hash_value = v.getHash(src.m3);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].m3.insertUniqueNonZero(&v, hash_value);
- }
- for (auto & v : src.ms)
- {
- size_t hash_value = v.getHash(src.ms);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].ms.insertUniqueNonZero(&v, hash_value);
- }
- }
-
- // This function is mostly the same as StringHashTable::dispatch, but with
- // added bucket computation. See the comments there.
- template <typename Self, typename Func, typename KeyHolder>
- static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
- {
- StringHashTableHash hash;
- const StringRef & x = keyHolderGetKey(key_holder);
- const size_t sz = x.size;
- if (sz == 0)
- {
- keyHolderDiscardKey(key_holder);
- return func(self.impls[0].m0, VoidKey{}, 0);
- }
-
- if (x.data[x.size - 1] == 0)
- {
- // Strings with trailing zeros are not representable as fixed-size
- // string keys. Put them to the generic table.
- auto res = hash(x);
- auto buck = getBucketFromHash(res);
- return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder),
- res);
- }
-
- const char * p = x.data;
- // pending bits that needs to be shifted out
- const char s = (-sz & 7) * 8;
- union
- {
- StringKey8 k8;
- StringKey16 k16;
- StringKey24 k24;
- UInt64 n[3];
- };
- switch ((sz - 1) >> 3)
- {
- case 0:
- {
- // first half page
- if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
- {
- memcpy(&n[0], p, 8);
- n[0] &= -1ul >> s;
- }
- else
- {
- const char * lp = x.data + x.size - 8;
- memcpy(&n[0], lp, 8);
- n[0] >>= s;
- }
- auto res = hash(k8);
- auto buck = getBucketFromHash(res);
- keyHolderDiscardKey(key_holder);
- return func(self.impls[buck].m1, k8, res);
- }
- case 1:
- {
- memcpy(&n[0], p, 8);
- const char * lp = x.data + x.size - 8;
- memcpy(&n[1], lp, 8);
- n[1] >>= s;
- auto res = hash(k16);
- auto buck = getBucketFromHash(res);
- keyHolderDiscardKey(key_holder);
- return func(self.impls[buck].m2, k16, res);
- }
- case 2:
- {
- memcpy(&n[0], p, 16);
- const char * lp = x.data + x.size - 8;
- memcpy(&n[2], lp, 8);
- n[2] >>= s;
- auto res = hash(k24);
- auto buck = getBucketFromHash(res);
- keyHolderDiscardKey(key_holder);
- return func(self.impls[buck].m3, k24, res);
- }
- default:
- {
- auto res = hash(x);
- auto buck = getBucketFromHash(res);
- return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
- }
- }
- }
-
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
- {
- dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted});
- }
-
- LookupResult ALWAYS_INLINE find(const Key x)
- {
- return dispatch(*this, x, typename Impl::FindCallable{});
- }
-
- ConstLookupResult ALWAYS_INLINE find(const Key x) const
- {
- return dispatch(*this, x, typename Impl::FindCallable{});
- }
-
- void write(DB::WriteBuffer & wb) const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- impls[i].write(wb);
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::writeChar(',', wb);
- impls[i].writeText(wb);
- }
- }
-
- void read(DB::ReadBuffer & rb)
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- impls[i].read(rb);
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::assertChar(',', rb);
- impls[i].readText(rb);
- }
- }
-
- size_t size() const
- {
- size_t res = 0;
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].size();
-
- return res;
- }
-
- bool empty() const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- if (!impls[i].empty())
- return false;
-
- return true;
- }
-
- size_t getBufferSizeInBytes() const
- {
- size_t res = 0;
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].getBufferSizeInBytes();
-
- return res;
- }
-};
+#pragma once
+
+#include <Common/HashTable/StringHashTable.h>
+
+template <typename SubMaps, typename ImplTable = StringHashTable<SubMaps>, size_t BITS_FOR_BUCKET = 8>
+class TwoLevelStringHashTable : private boost::noncopyable
+{
+protected:
+ using HashValue = size_t;
+ using Self = TwoLevelStringHashTable;
+
+public:
+ using Key = StringRef;
+ using Impl = ImplTable;
+
+ static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
+ static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
+
+ // TODO: currently hashing contains redundant computations when doing distributed or external aggregations
+ size_t hash(const Key & x) const
+ {
+ return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; });
+ }
+
+ size_t operator()(const Key & x) const { return hash(x); }
+
+ /// NOTE Bad for hash tables with more than 2^32 cells.
+ static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
+
+public:
+ using key_type = typename Impl::key_type;
+ using mapped_type = typename Impl::mapped_type;
+ using value_type = typename Impl::value_type;
+ using cell_type = typename Impl::cell_type;
+
+ using LookupResult = typename Impl::LookupResult;
+ using ConstLookupResult = typename Impl::ConstLookupResult;
+
+ Impl impls[NUM_BUCKETS];
+
+ TwoLevelStringHashTable() {}
+
+ template <typename Source>
+ TwoLevelStringHashTable(const Source & src)
+ {
+ if (src.m0.hasZero())
+ impls[0].m0.setHasZero(*src.m0.zeroValue());
+
+ for (auto & v : src.m1)
+ {
+ size_t hash_value = v.getHash(src.m1);
+ size_t buck = getBucketFromHash(hash_value);
+ impls[buck].m1.insertUniqueNonZero(&v, hash_value);
+ }
+ for (auto & v : src.m2)
+ {
+ size_t hash_value = v.getHash(src.m2);
+ size_t buck = getBucketFromHash(hash_value);
+ impls[buck].m2.insertUniqueNonZero(&v, hash_value);
+ }
+ for (auto & v : src.m3)
+ {
+ size_t hash_value = v.getHash(src.m3);
+ size_t buck = getBucketFromHash(hash_value);
+ impls[buck].m3.insertUniqueNonZero(&v, hash_value);
+ }
+ for (auto & v : src.ms)
+ {
+ size_t hash_value = v.getHash(src.ms);
+ size_t buck = getBucketFromHash(hash_value);
+ impls[buck].ms.insertUniqueNonZero(&v, hash_value);
+ }
+ }
+
+ // This function is mostly the same as StringHashTable::dispatch, but with
+ // added bucket computation. See the comments there.
+ template <typename Self, typename Func, typename KeyHolder>
+ static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
+ {
+ StringHashTableHash hash;
+ const StringRef & x = keyHolderGetKey(key_holder);
+ const size_t sz = x.size;
+ if (sz == 0)
+ {
+ keyHolderDiscardKey(key_holder);
+ return func(self.impls[0].m0, VoidKey{}, 0);
+ }
+
+ if (x.data[x.size - 1] == 0)
+ {
+ // Strings with trailing zeros are not representable as fixed-size
+ // string keys. Put them to the generic table.
+ auto res = hash(x);
+ auto buck = getBucketFromHash(res);
+ return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder),
+ res);
+ }
+
+ const char * p = x.data;
+ // pending bits that needs to be shifted out
+ const char s = (-sz & 7) * 8;
+ union
+ {
+ StringKey8 k8;
+ StringKey16 k16;
+ StringKey24 k24;
+ UInt64 n[3];
+ };
+ switch ((sz - 1) >> 3)
+ {
+ case 0:
+ {
+ // first half page
+ if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
+ {
+ memcpy(&n[0], p, 8);
+ n[0] &= -1ul >> s;
+ }
+ else
+ {
+ const char * lp = x.data + x.size - 8;
+ memcpy(&n[0], lp, 8);
+ n[0] >>= s;
+ }
+ auto res = hash(k8);
+ auto buck = getBucketFromHash(res);
+ keyHolderDiscardKey(key_holder);
+ return func(self.impls[buck].m1, k8, res);
+ }
+ case 1:
+ {
+ memcpy(&n[0], p, 8);
+ const char * lp = x.data + x.size - 8;
+ memcpy(&n[1], lp, 8);
+ n[1] >>= s;
+ auto res = hash(k16);
+ auto buck = getBucketFromHash(res);
+ keyHolderDiscardKey(key_holder);
+ return func(self.impls[buck].m2, k16, res);
+ }
+ case 2:
+ {
+ memcpy(&n[0], p, 16);
+ const char * lp = x.data + x.size - 8;
+ memcpy(&n[2], lp, 8);
+ n[2] >>= s;
+ auto res = hash(k24);
+ auto buck = getBucketFromHash(res);
+ keyHolderDiscardKey(key_holder);
+ return func(self.impls[buck].m3, k24, res);
+ }
+ default:
+ {
+ auto res = hash(x);
+ auto buck = getBucketFromHash(res);
+ return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
+ }
+ }
+ }
+
+ template <typename KeyHolder>
+ void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
+ {
+ dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted});
+ }
+
+ LookupResult ALWAYS_INLINE find(const Key x)
+ {
+ return dispatch(*this, x, typename Impl::FindCallable{});
+ }
+
+ ConstLookupResult ALWAYS_INLINE find(const Key x) const
+ {
+ return dispatch(*this, x, typename Impl::FindCallable{});
+ }
+
+ void write(DB::WriteBuffer & wb) const
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ impls[i].write(wb);
+ }
+
+ void writeText(DB::WriteBuffer & wb) const
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ {
+ if (i != 0)
+ DB::writeChar(',', wb);
+ impls[i].writeText(wb);
+ }
+ }
+
+ void read(DB::ReadBuffer & rb)
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ impls[i].read(rb);
+ }
+
+ void readText(DB::ReadBuffer & rb)
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ {
+ if (i != 0)
+ DB::assertChar(',', rb);
+ impls[i].readText(rb);
+ }
+ }
+
+ size_t size() const
+ {
+ size_t res = 0;
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ res += impls[i].size();
+
+ return res;
+ }
+
+ bool empty() const
+ {
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ if (!impls[i].empty())
+ return false;
+
+ return true;
+ }
+
+ size_t getBufferSizeInBytes() const
+ {
+ size_t res = 0;
+ for (size_t i = 0; i < NUM_BUCKETS; ++i)
+ res += impls[i].getBufferSizeInBytes();
+
+ return res;
+ }
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp
index a8363a46de..7ba91838f2 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.cpp
@@ -1,45 +1,45 @@
-#include "IPv6ToBinary.h"
-#include <Poco/Net/IPAddress.h>
+#include "IPv6ToBinary.h"
+#include <Poco/Net/IPAddress.h>
#include <Poco/ByteOrder.h>
#include <Common/formatIPv6.h>
-#include <cstring>
-
-
-namespace DB
-{
-
+#include <cstring>
+
+
+namespace DB
+{
+
/// Result array could be indexed with all possible uint8 values without extra check.
/// For values greater than 128 we will store same value as for 128 (all bits set).
constexpr size_t IPV6_MASKS_COUNT = 256;
using RawMaskArrayV6 = std::array<uint8_t, IPV6_BINARY_LENGTH>;
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res)
-{
- if (Poco::Net::IPAddress::IPv6 == address.family())
- {
+{
+ if (Poco::Net::IPAddress::IPv6 == address.family())
+ {
memcpy(res, address.addr(), 16);
- }
- else if (Poco::Net::IPAddress::IPv4 == address.family())
- {
- /// Convert to IPv6-mapped address.
+ }
+ else if (Poco::Net::IPAddress::IPv4 == address.family())
+ {
+ /// Convert to IPv6-mapped address.
memset(res, 0, 10);
- res[10] = '\xFF';
- res[11] = '\xFF';
- memcpy(&res[12], address.addr(), 4);
- }
- else
+ res[10] = '\xFF';
+ res[11] = '\xFF';
+ memcpy(&res[12], address.addr(), 4);
+ }
+ else
memset(res, 0, 16);
}
-
+
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
{
std::array<char, 16> res;
IPv6ToRawBinary(address, res.data());
- return res;
-}
-
+ return res;
+}
+
template <typename RawMaskArrayT>
static constexpr RawMaskArrayT generateBitMask(size_t prefix)
{
@@ -54,7 +54,7 @@ static constexpr RawMaskArrayT generateBitMask(size_t prefix)
while (i < arr.size())
arr[i++] = 0x00;
return arr;
-}
+}
template <typename RawMaskArrayT, size_t masksCount>
static constexpr std::array<RawMaskArrayT, masksCount> generateBitMasks()
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h
index d766d40835..7d432faa00 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/IPv6ToBinary.h
@@ -1,19 +1,19 @@
#pragma once
-#include <array>
+#include <array>
#include <common/types.h>
-
-namespace Poco { namespace Net { class IPAddress; }}
-
-namespace DB
-{
-
+
+namespace Poco { namespace Net { class IPAddress; }}
+
+namespace DB
+{
+
/// Convert IP address to raw binary with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
/// Saves result into the first 16 bytes of `res`.
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res);
-/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
-std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address);
-
+/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
+std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address);
+
/// Returns a reference to 16-byte array containing mask with first `prefix_len` bits set to `1` and `128 - prefix_len` to `0`.
/// The reference is valid during all program execution time.
/// Values of prefix_len greater than 128 interpreted as 128 exactly.
@@ -23,4 +23,4 @@ const std::array<uint8_t, 16> & getCIDRMaskIPv6(UInt8 prefix_len);
bool matchIPv4Subnet(UInt32 addr, UInt32 cidr_addr, UInt8 prefix);
bool matchIPv6Subnet(const uint8_t * addr, const uint8_t * cidr_addr, UInt8 prefix);
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h
index ff7d7c5c5f..3441d79a37 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/NetException.h
@@ -1,22 +1,22 @@
-#pragma once
-
-#include <Common/Exception.h>
-
-
-namespace DB
-{
-
-class NetException : public Exception
-{
-public:
- NetException(const std::string & msg, int code) : Exception(msg, code) {}
-
- NetException * clone() const override { return new NetException(*this); }
- void rethrow() const override { throw *this; }
-
-private:
- const char * name() const throw() override { return "DB::NetException"; }
- const char * className() const throw() override { return "DB::NetException"; }
-};
-
-}
+#pragma once
+
+#include <Common/Exception.h>
+
+
+namespace DB
+{
+
+class NetException : public Exception
+{
+public:
+ NetException(const std::string & msg, int code) : Exception(msg, code) {}
+
+ NetException * clone() const override { return new NetException(*this); }
+ void rethrow() const override { throw *this; }
+
+private:
+ const char * name() const throw() override { return "DB::NetException"; }
+ const char * className() const throw() override { return "DB::NetException"; }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp
index bf8ee6eedf..0339b46940 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.cpp
@@ -1,13 +1,13 @@
-#if USE_SSL
-#include "OpenSSLHelpers.h"
+#if USE_SSL
+#include "OpenSSLHelpers.h"
#include <common/scope_guard.h>
-#include <openssl/err.h>
-#include <openssl/sha.h>
-
-namespace DB
-{
-#pragma GCC diagnostic warning "-Wold-style-cast"
-
+#include <openssl/err.h>
+#include <openssl/sha.h>
+
+namespace DB
+{
+#pragma GCC diagnostic warning "-Wold-style-cast"
+
std::string encodeSHA256(const std::string_view & text)
{
return encodeSHA256(text.data(), text.size());
@@ -19,20 +19,20 @@ std::string encodeSHA256(const void * text, size_t size)
encodeSHA256(text, size, reinterpret_cast<unsigned char *>(out.data()));
return out;
}
-void encodeSHA256(const std::string_view & text, unsigned char * out)
-{
+void encodeSHA256(const std::string_view & text, unsigned char * out)
+{
encodeSHA256(text.data(), text.size(), out);
}
void encodeSHA256(const void * text, size_t size, unsigned char * out)
{
- SHA256_CTX ctx;
- SHA256_Init(&ctx);
+ SHA256_CTX ctx;
+ SHA256_Init(&ctx);
SHA256_Update(&ctx, reinterpret_cast<const UInt8 *>(text), size);
- SHA256_Final(out, &ctx);
-}
-
-String getOpenSSLErrors()
-{
+ SHA256_Final(out, &ctx);
+}
+
+String getOpenSSLErrors()
+{
String res;
ERR_print_errors_cb([](const char * str, size_t len, void * ctx)
{
@@ -43,7 +43,7 @@ String getOpenSSLErrors()
return 1;
}, &res);
return res;
-}
-
-}
-#endif
+}
+
+}
+#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h
index 192631ac6d..9b2754ce5e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/OpenSSLHelpers.h
@@ -1,22 +1,22 @@
-#pragma once
-
-
-#if USE_SSL
+#pragma once
+
+
+#if USE_SSL
# include <common/types.h>
-
-
-namespace DB
-{
+
+
+namespace DB
+{
/// Encodes `text` and returns it.
std::string encodeSHA256(const std::string_view & text);
std::string encodeSHA256(const void * text, size_t size);
/// `out` must be at least 32 bytes long.
-void encodeSHA256(const std::string_view & text, unsigned char * out);
+void encodeSHA256(const std::string_view & text, unsigned char * out);
void encodeSHA256(const void * text, size_t size, unsigned char * out);
-
-/// Returns concatenation of error strings for all errors that OpenSSL has recorded, emptying the error queue.
-String getOpenSSLErrors();
-
-}
-#endif
+
+/// Returns concatenation of error strings for all errors that OpenSSL has recorded, emptying the error queue.
+String getOpenSSLErrors();
+
+}
+#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp
index a5c21e3d87..b08c956bd9 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.cpp
@@ -1,84 +1,84 @@
-#include <Common/PipeFDs.h>
-#include <Common/Exception.h>
-#include <Common/formatReadable.h>
-
-#include <common/logger_useful.h>
-#include <common/errnoToString.h>
-
-#include <unistd.h>
-#include <fcntl.h>
-#include <string>
-#include <algorithm>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int CANNOT_PIPE;
- extern const int CANNOT_FCNTL;
- extern const int LOGICAL_ERROR;
-}
-
-void LazyPipeFDs::open()
-{
- for (int & fd : fds_rw)
- if (fd >= 0)
- throw Exception("Pipe is already opened", ErrorCodes::LOGICAL_ERROR);
-
-#ifndef __APPLE__
- if (0 != pipe2(fds_rw, O_CLOEXEC))
- throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE);
-#else
- if (0 != pipe(fds_rw))
- throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE);
- if (0 != fcntl(fds_rw[0], F_SETFD, FD_CLOEXEC))
- throwFromErrno("Cannot setup auto-close on exec for read end of pipe", ErrorCodes::CANNOT_FCNTL);
- if (0 != fcntl(fds_rw[1], F_SETFD, FD_CLOEXEC))
- throwFromErrno("Cannot setup auto-close on exec for write end of pipe", ErrorCodes::CANNOT_FCNTL);
-#endif
-}
-
-void LazyPipeFDs::close()
-{
- for (int & fd : fds_rw)
- {
- if (fd < 0)
- continue;
- if (0 != ::close(fd))
- throwFromErrno("Cannot close pipe", ErrorCodes::CANNOT_PIPE);
- fd = -1;
- }
-}
-
-PipeFDs::PipeFDs()
-{
- open();
-}
-
-LazyPipeFDs::~LazyPipeFDs()
-{
- try
- {
- close();
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
-}
-
-
+#include <Common/PipeFDs.h>
+#include <Common/Exception.h>
+#include <Common/formatReadable.h>
+
+#include <common/logger_useful.h>
+#include <common/errnoToString.h>
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <string>
+#include <algorithm>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CANNOT_PIPE;
+ extern const int CANNOT_FCNTL;
+ extern const int LOGICAL_ERROR;
+}
+
+void LazyPipeFDs::open()
+{
+ for (int & fd : fds_rw)
+ if (fd >= 0)
+ throw Exception("Pipe is already opened", ErrorCodes::LOGICAL_ERROR);
+
+#ifndef __APPLE__
+ if (0 != pipe2(fds_rw, O_CLOEXEC))
+ throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE);
+#else
+ if (0 != pipe(fds_rw))
+ throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE);
+ if (0 != fcntl(fds_rw[0], F_SETFD, FD_CLOEXEC))
+ throwFromErrno("Cannot setup auto-close on exec for read end of pipe", ErrorCodes::CANNOT_FCNTL);
+ if (0 != fcntl(fds_rw[1], F_SETFD, FD_CLOEXEC))
+ throwFromErrno("Cannot setup auto-close on exec for write end of pipe", ErrorCodes::CANNOT_FCNTL);
+#endif
+}
+
+void LazyPipeFDs::close()
+{
+ for (int & fd : fds_rw)
+ {
+ if (fd < 0)
+ continue;
+ if (0 != ::close(fd))
+ throwFromErrno("Cannot close pipe", ErrorCodes::CANNOT_PIPE);
+ fd = -1;
+ }
+}
+
+PipeFDs::PipeFDs()
+{
+ open();
+}
+
+LazyPipeFDs::~LazyPipeFDs()
+{
+ try
+ {
+ close();
+ }
+ catch (...)
+ {
+ tryLogCurrentException(__PRETTY_FUNCTION__);
+ }
+}
+
+
void LazyPipeFDs::setNonBlockingWrite()
-{
- int flags = fcntl(fds_rw[1], F_GETFL, 0);
- if (-1 == flags)
- throwFromErrno("Cannot get file status flags of pipe", ErrorCodes::CANNOT_FCNTL);
- if (-1 == fcntl(fds_rw[1], F_SETFL, flags | O_NONBLOCK))
- throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL);
-}
-
+{
+ int flags = fcntl(fds_rw[1], F_GETFL, 0);
+ if (-1 == flags)
+ throwFromErrno("Cannot get file status flags of pipe", ErrorCodes::CANNOT_FCNTL);
+ if (-1 == fcntl(fds_rw[1], F_SETFL, flags | O_NONBLOCK))
+ throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL);
+}
+
void LazyPipeFDs::setNonBlockingRead()
{
int flags = fcntl(fds_rw[0], F_GETFL, 0);
@@ -94,35 +94,35 @@ void LazyPipeFDs::setNonBlockingReadWrite()
setNonBlockingWrite();
}
-void LazyPipeFDs::tryIncreaseSize(int desired_size)
-{
-#if defined(OS_LINUX)
- Poco::Logger * log = &Poco::Logger::get("Pipe");
-
- /** Increase pipe size to avoid slowdown during fine-grained trace collection.
- */
- int pipe_size = fcntl(fds_rw[1], F_GETPIPE_SZ);
- if (-1 == pipe_size)
- {
- if (errno == EINVAL)
- {
+void LazyPipeFDs::tryIncreaseSize(int desired_size)
+{
+#if defined(OS_LINUX)
+ Poco::Logger * log = &Poco::Logger::get("Pipe");
+
+ /** Increase pipe size to avoid slowdown during fine-grained trace collection.
+ */
+ int pipe_size = fcntl(fds_rw[1], F_GETPIPE_SZ);
+ if (-1 == pipe_size)
+ {
+ if (errno == EINVAL)
+ {
LOG_INFO(log, "Cannot get pipe capacity, {}. Very old Linux kernels have no support for this fcntl.", errnoToString(ErrorCodes::CANNOT_FCNTL));
- /// It will work nevertheless.
- }
- else
- throwFromErrno("Cannot get pipe capacity", ErrorCodes::CANNOT_FCNTL);
- }
- else
- {
- for (errno = 0; errno != EPERM && pipe_size < desired_size; pipe_size *= 2)
- if (-1 == fcntl(fds_rw[1], F_SETPIPE_SZ, pipe_size * 2) && errno != EPERM)
- throwFromErrno("Cannot increase pipe capacity to " + std::to_string(pipe_size * 2), ErrorCodes::CANNOT_FCNTL);
-
- LOG_TRACE(log, "Pipe capacity is {}", ReadableSize(std::min(pipe_size, desired_size)));
- }
-#else
- (void)desired_size;
-#endif
-}
-
-}
+ /// It will work nevertheless.
+ }
+ else
+ throwFromErrno("Cannot get pipe capacity", ErrorCodes::CANNOT_FCNTL);
+ }
+ else
+ {
+ for (errno = 0; errno != EPERM && pipe_size < desired_size; pipe_size *= 2)
+ if (-1 == fcntl(fds_rw[1], F_SETPIPE_SZ, pipe_size * 2) && errno != EPERM)
+ throwFromErrno("Cannot increase pipe capacity to " + std::to_string(pipe_size * 2), ErrorCodes::CANNOT_FCNTL);
+
+ LOG_TRACE(log, "Pipe capacity is {}", ReadableSize(std::min(pipe_size, desired_size)));
+ }
+#else
+ (void)desired_size;
+#endif
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h
index 20bd847c07..8f72bf9e54 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PipeFDs.h
@@ -1,40 +1,40 @@
-#pragma once
-
-#include <cstddef>
-
-
-namespace DB
-{
-
-/** Struct containing a pipe with lazy initialization.
- * Use `open` and `close` methods to manipulate pipe and `fds_rw` field to access
- * pipe's file descriptors.
- */
-struct LazyPipeFDs
-{
- int fds_rw[2] = {-1, -1};
-
- void open();
- void close();
-
+#pragma once
+
+#include <cstddef>
+
+
+namespace DB
+{
+
+/** Struct containing a pipe with lazy initialization.
+ * Use `open` and `close` methods to manipulate pipe and `fds_rw` field to access
+ * pipe's file descriptors.
+ */
+struct LazyPipeFDs
+{
+ int fds_rw[2] = {-1, -1};
+
+ void open();
+ void close();
+
/// Set O_NONBLOCK to different ends of pipe preserving existing flags.
/// Throws an exception if fcntl was not successful.
void setNonBlockingWrite();
void setNonBlockingRead();
void setNonBlockingReadWrite();
- void tryIncreaseSize(int desired_size);
-
- ~LazyPipeFDs();
-};
-
-
-/** Struct which opens new pipe on creation and closes it on destruction.
- * Use `fds_rw` field to access pipe's file descriptors.
- */
-struct PipeFDs : public LazyPipeFDs
-{
- PipeFDs();
-};
-
-}
+ void tryIncreaseSize(int desired_size);
+
+ ~LazyPipeFDs();
+};
+
+
+/** Struct which opens new pipe on creation and closes it on destruction.
+ * Use `fds_rw` field to access pipe's file descriptors.
+ */
+struct PipeFDs : public LazyPipeFDs
+{
+ PipeFDs();
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h
index e84e249d17..7f14a4b0d4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h
@@ -1,98 +1,98 @@
-#pragma once
-
-#include <time.h>
-#include <cstdlib>
-#include <climits>
-#include <random>
-#include <functional>
-#include <common/types.h>
+#pragma once
+
+#include <time.h>
+#include <cstdlib>
+#include <climits>
+#include <random>
+#include <functional>
+#include <common/types.h>
#include <common/scope_guard.h>
-#include <Common/PoolBase.h>
-#include <Common/ProfileEvents.h>
-#include <Common/NetException.h>
-#include <Common/Exception.h>
-#include <Common/randomSeed.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int ALL_CONNECTION_TRIES_FAILED;
- extern const int ALL_REPLICAS_ARE_STALE;
- extern const int LOGICAL_ERROR;
-}
-}
-
-namespace ProfileEvents
-{
- extern const Event DistributedConnectionFailTry;
- extern const Event DistributedConnectionFailAtAll;
-}
-
-/// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB.
-/// Initialized by several PoolBase objects.
-/// When a connection is requested, tries to create or choose an alive connection from one of the nested pools.
-/// Pools are tried in the order consistent with lexicographical order of (error count, priority, random number) tuples.
-/// Number of tries for a single pool is limited by max_tries parameter.
-/// The client can set nested pool priority by passing a GetPriority functor.
-///
-/// NOTE: if one of the nested pools blocks because it is empty, this pool will also block.
-///
-/// The client must provide a TryGetEntryFunc functor, which should perform a single try to get a connection from a nested pool.
-/// This functor can also check if the connection satisfies some eligibility criterion (e.g. check if
-/// the replica is up-to-date).
-
-template <typename TNestedPool>
-class PoolWithFailoverBase : private boost::noncopyable
-{
-public:
- using NestedPool = TNestedPool;
- using NestedPoolPtr = std::shared_ptr<NestedPool>;
- using Entry = typename NestedPool::Entry;
- using NestedPools = std::vector<NestedPoolPtr>;
-
- PoolWithFailoverBase(
- NestedPools nested_pools_,
- time_t decrease_error_period_,
- size_t max_error_cap_,
- Poco::Logger * log_)
- : nested_pools(std::move(nested_pools_))
- , decrease_error_period(decrease_error_period_)
- , max_error_cap(max_error_cap_)
- , shared_pool_states(nested_pools.size())
- , log(log_)
- {
- for (size_t i = 0;i < nested_pools.size(); ++i)
- shared_pool_states[i].config_priority = nested_pools[i]->getPriority();
- }
-
- struct TryResult
- {
- TryResult() = default;
-
- explicit TryResult(Entry entry_)
- : entry(std::move(entry_))
- , is_usable(true)
- , is_up_to_date(true)
- {
- }
-
- void reset()
- {
- entry = Entry();
- is_usable = false;
- is_up_to_date = false;
- staleness = 0.0;
- }
-
- Entry entry;
- bool is_usable = false; /// If false, the entry is unusable for current request
- /// (but may be usable for other requests, so error counts are not incremented)
- bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica.
- double staleness = 0.0; /// Helps choosing the "least stale" option when all replicas are stale.
- };
-
+#include <Common/PoolBase.h>
+#include <Common/ProfileEvents.h>
+#include <Common/NetException.h>
+#include <Common/Exception.h>
+#include <Common/randomSeed.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int ALL_CONNECTION_TRIES_FAILED;
+ extern const int ALL_REPLICAS_ARE_STALE;
+ extern const int LOGICAL_ERROR;
+}
+}
+
+namespace ProfileEvents
+{
+ extern const Event DistributedConnectionFailTry;
+ extern const Event DistributedConnectionFailAtAll;
+}
+
+/// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB.
+/// Initialized by several PoolBase objects.
+/// When a connection is requested, tries to create or choose an alive connection from one of the nested pools.
+/// Pools are tried in the order consistent with lexicographical order of (error count, priority, random number) tuples.
+/// Number of tries for a single pool is limited by max_tries parameter.
+/// The client can set nested pool priority by passing a GetPriority functor.
+///
+/// NOTE: if one of the nested pools blocks because it is empty, this pool will also block.
+///
+/// The client must provide a TryGetEntryFunc functor, which should perform a single try to get a connection from a nested pool.
+/// This functor can also check if the connection satisfies some eligibility criterion (e.g. check if
+/// the replica is up-to-date).
+
+template <typename TNestedPool>
+class PoolWithFailoverBase : private boost::noncopyable
+{
+public:
+ using NestedPool = TNestedPool;
+ using NestedPoolPtr = std::shared_ptr<NestedPool>;
+ using Entry = typename NestedPool::Entry;
+ using NestedPools = std::vector<NestedPoolPtr>;
+
+ PoolWithFailoverBase(
+ NestedPools nested_pools_,
+ time_t decrease_error_period_,
+ size_t max_error_cap_,
+ Poco::Logger * log_)
+ : nested_pools(std::move(nested_pools_))
+ , decrease_error_period(decrease_error_period_)
+ , max_error_cap(max_error_cap_)
+ , shared_pool_states(nested_pools.size())
+ , log(log_)
+ {
+ for (size_t i = 0;i < nested_pools.size(); ++i)
+ shared_pool_states[i].config_priority = nested_pools[i]->getPriority();
+ }
+
+ struct TryResult
+ {
+ TryResult() = default;
+
+ explicit TryResult(Entry entry_)
+ : entry(std::move(entry_))
+ , is_usable(true)
+ , is_up_to_date(true)
+ {
+ }
+
+ void reset()
+ {
+ entry = Entry();
+ is_usable = false;
+ is_up_to_date = false;
+ staleness = 0.0;
+ }
+
+ Entry entry;
+ bool is_usable = false; /// If false, the entry is unusable for current request
+ /// (but may be usable for other requests, so error counts are not incremented)
+ bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica.
+ double staleness = 0.0; /// Helps choosing the "least stale" option when all replicas are stale.
+ };
+
struct PoolState;
using PoolStates = std::vector<PoolState>;
@@ -106,34 +106,34 @@ public:
size_t slowdown_count = 0;
};
- /// This functor must be provided by a client. It must perform a single try that takes a connection
- /// from the provided pool and checks that it is good.
- using TryGetEntryFunc = std::function<TryResult(NestedPool & pool, std::string & fail_message)>;
-
- /// The client can provide this functor to affect load balancing - the index of a pool is passed to
- /// this functor. The pools with lower result value will be tried first.
- using GetPriorityFunc = std::function<size_t(size_t index)>;
-
-
- /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool).
- /// The method will throw if it is unable to get min_entries alive connections or
- /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas.
- std::vector<TryResult> getMany(
- size_t min_entries, size_t max_entries, size_t max_tries,
- size_t max_ignored_errors,
- bool fallback_to_stale_replicas,
- const TryGetEntryFunc & try_get_entry,
- const GetPriorityFunc & get_priority = GetPriorityFunc());
-
-protected:
-
- /// Returns a single connection.
- Entry get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
- const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority = GetPriorityFunc());
-
- /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states.
- PoolStates updatePoolStates(size_t max_ignored_errors);
-
+ /// This functor must be provided by a client. It must perform a single try that takes a connection
+ /// from the provided pool and checks that it is good.
+ using TryGetEntryFunc = std::function<TryResult(NestedPool & pool, std::string & fail_message)>;
+
+ /// The client can provide this functor to affect load balancing - the index of a pool is passed to
+ /// this functor. The pools with lower result value will be tried first.
+ using GetPriorityFunc = std::function<size_t(size_t index)>;
+
+
+ /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool).
+ /// The method will throw if it is unable to get min_entries alive connections or
+ /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas.
+ std::vector<TryResult> getMany(
+ size_t min_entries, size_t max_entries, size_t max_tries,
+ size_t max_ignored_errors,
+ bool fallback_to_stale_replicas,
+ const TryGetEntryFunc & try_get_entry,
+ const GetPriorityFunc & get_priority = GetPriorityFunc());
+
+protected:
+
+ /// Returns a single connection.
+ Entry get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
+ const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority = GetPriorityFunc());
+
+ /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states.
+ PoolStates updatePoolStates(size_t max_ignored_errors);
+
void updateErrorCounts(PoolStates & states, time_t & last_decrease_time) const;
std::vector<ShuffledPool> getShuffledPools(size_t max_ignored_errors, const GetPriorityFunc & get_priority);
@@ -146,21 +146,21 @@ protected:
return std::make_tuple(shared_pool_states, nested_pools, last_error_decrease_time);
}
- NestedPools nested_pools;
-
- const time_t decrease_error_period;
- const size_t max_error_cap;
-
- mutable std::mutex pool_states_mutex;
- PoolStates shared_pool_states;
- /// The time when error counts were last decreased.
- time_t last_error_decrease_time = 0;
-
- Poco::Logger * log;
-};
-
-
-template <typename TNestedPool>
+ NestedPools nested_pools;
+
+ const time_t decrease_error_period;
+ const size_t max_error_cap;
+
+ mutable std::mutex pool_states_mutex;
+ PoolStates shared_pool_states;
+ /// The time when error counts were last decreased.
+ time_t last_error_decrease_time = 0;
+
+ Poco::Logger * log;
+};
+
+
+template <typename TNestedPool>
std::vector<typename PoolWithFailoverBase<TNestedPool>::ShuffledPool>
PoolWithFailoverBase<TNestedPool>::getShuffledPools(
size_t max_ignored_errors, const PoolWithFailoverBase::GetPriorityFunc & get_priority)
@@ -201,181 +201,181 @@ inline void PoolWithFailoverBase<TNestedPool>::updateSharedErrorCounts(std::vect
}
template <typename TNestedPool>
-typename TNestedPool::Entry
-PoolWithFailoverBase<TNestedPool>::get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
- const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority)
-{
- std::vector<TryResult> results = getMany(
- 1 /* min entries */, 1 /* max entries */, 1 /* max tries */,
- max_ignored_errors, fallback_to_stale_replicas,
- try_get_entry, get_priority);
- if (results.empty() || results[0].entry.isNull())
- throw DB::Exception(
- "PoolWithFailoverBase::getMany() returned less than min_entries entries.",
- DB::ErrorCodes::LOGICAL_ERROR);
- return results[0].entry;
-}
-
-template <typename TNestedPool>
-std::vector<typename PoolWithFailoverBase<TNestedPool>::TryResult>
-PoolWithFailoverBase<TNestedPool>::getMany(
- size_t min_entries, size_t max_entries, size_t max_tries,
- size_t max_ignored_errors,
- bool fallback_to_stale_replicas,
- const TryGetEntryFunc & try_get_entry,
- const GetPriorityFunc & get_priority)
-{
+typename TNestedPool::Entry
+PoolWithFailoverBase<TNestedPool>::get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
+ const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority)
+{
+ std::vector<TryResult> results = getMany(
+ 1 /* min entries */, 1 /* max entries */, 1 /* max tries */,
+ max_ignored_errors, fallback_to_stale_replicas,
+ try_get_entry, get_priority);
+ if (results.empty() || results[0].entry.isNull())
+ throw DB::Exception(
+ "PoolWithFailoverBase::getMany() returned less than min_entries entries.",
+ DB::ErrorCodes::LOGICAL_ERROR);
+ return results[0].entry;
+}
+
+template <typename TNestedPool>
+std::vector<typename PoolWithFailoverBase<TNestedPool>::TryResult>
+PoolWithFailoverBase<TNestedPool>::getMany(
+ size_t min_entries, size_t max_entries, size_t max_tries,
+ size_t max_ignored_errors,
+ bool fallback_to_stale_replicas,
+ const TryGetEntryFunc & try_get_entry,
+ const GetPriorityFunc & get_priority)
+{
std::vector<ShuffledPool> shuffled_pools = getShuffledPools(max_ignored_errors, get_priority);
-
- /// We will try to get a connection from each pool until a connection is produced or max_tries is reached.
- std::vector<TryResult> try_results(shuffled_pools.size());
- size_t entries_count = 0;
- size_t usable_count = 0;
- size_t up_to_date_count = 0;
- size_t failed_pools_count = 0;
-
- /// At exit update shared error counts with error counts occurred during this call.
- SCOPE_EXIT(
- {
+
+ /// We will try to get a connection from each pool until a connection is produced or max_tries is reached.
+ std::vector<TryResult> try_results(shuffled_pools.size());
+ size_t entries_count = 0;
+ size_t usable_count = 0;
+ size_t up_to_date_count = 0;
+ size_t failed_pools_count = 0;
+
+ /// At exit update shared error counts with error counts occurred during this call.
+ SCOPE_EXIT(
+ {
updateSharedErrorCounts(shuffled_pools);
- });
-
- std::string fail_messages;
- bool finished = false;
- while (!finished)
- {
- for (size_t i = 0; i < shuffled_pools.size(); ++i)
- {
- if (up_to_date_count >= max_entries /// Already enough good entries.
- || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced.
- {
- finished = true;
- break;
- }
-
- ShuffledPool & shuffled_pool = shuffled_pools[i];
- TryResult & result = try_results[i];
+ });
+
+ std::string fail_messages;
+ bool finished = false;
+ while (!finished)
+ {
+ for (size_t i = 0; i < shuffled_pools.size(); ++i)
+ {
+ if (up_to_date_count >= max_entries /// Already enough good entries.
+ || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced.
+ {
+ finished = true;
+ break;
+ }
+
+ ShuffledPool & shuffled_pool = shuffled_pools[i];
+ TryResult & result = try_results[i];
if (max_tries && (shuffled_pool.error_count >= max_tries || !result.entry.isNull()))
- continue;
-
- std::string fail_message;
- result = try_get_entry(*shuffled_pool.pool, fail_message);
-
- if (!fail_message.empty())
- fail_messages += fail_message + '\n';
-
- if (!result.entry.isNull())
- {
- ++entries_count;
- if (result.is_usable)
- {
- ++usable_count;
- if (result.is_up_to_date)
- ++up_to_date_count;
- }
- }
- else
- {
+ continue;
+
+ std::string fail_message;
+ result = try_get_entry(*shuffled_pool.pool, fail_message);
+
+ if (!fail_message.empty())
+ fail_messages += fail_message + '\n';
+
+ if (!result.entry.isNull())
+ {
+ ++entries_count;
+ if (result.is_usable)
+ {
+ ++usable_count;
+ if (result.is_up_to_date)
+ ++up_to_date_count;
+ }
+ }
+ else
+ {
LOG_WARNING(log, "Connection failed at try â„–{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
- ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
-
- shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);
-
- if (shuffled_pool.error_count >= max_tries)
- {
- ++failed_pools_count;
- ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll);
- }
- }
- }
- }
-
- if (usable_count < min_entries)
- throw DB::NetException(
- "All connection tries failed. Log: \n\n" + fail_messages + "\n",
- DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED);
-
- try_results.erase(
- std::remove_if(
- try_results.begin(), try_results.end(),
- [](const TryResult & r) { return r.entry.isNull() || !r.is_usable; }),
- try_results.end());
-
- /// Sort so that preferred items are near the beginning.
- std::stable_sort(
- try_results.begin(), try_results.end(),
- [](const TryResult & left, const TryResult & right)
- {
- return std::forward_as_tuple(!left.is_up_to_date, left.staleness)
- < std::forward_as_tuple(!right.is_up_to_date, right.staleness);
- });
-
- if (fallback_to_stale_replicas)
- {
- /// There is not enough up-to-date entries but we are allowed to return stale entries.
- /// Gather all up-to-date ones and least-bad stale ones.
-
- size_t size = std::min(try_results.size(), max_entries);
- try_results.resize(size);
- }
- else if (up_to_date_count >= min_entries)
- {
- /// There is enough up-to-date entries.
- try_results.resize(up_to_date_count);
- }
- else
- throw DB::Exception(
- "Could not find enough connections to up-to-date replicas. Got: " + std::to_string(up_to_date_count)
- + ", needed: " + std::to_string(min_entries),
- DB::ErrorCodes::ALL_REPLICAS_ARE_STALE);
-
- return try_results;
-}
-
-template <typename TNestedPool>
-struct PoolWithFailoverBase<TNestedPool>::PoolState
-{
- UInt64 error_count = 0;
+ ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
+
+ shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);
+
+ if (shuffled_pool.error_count >= max_tries)
+ {
+ ++failed_pools_count;
+ ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll);
+ }
+ }
+ }
+ }
+
+ if (usable_count < min_entries)
+ throw DB::NetException(
+ "All connection tries failed. Log: \n\n" + fail_messages + "\n",
+ DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED);
+
+ try_results.erase(
+ std::remove_if(
+ try_results.begin(), try_results.end(),
+ [](const TryResult & r) { return r.entry.isNull() || !r.is_usable; }),
+ try_results.end());
+
+ /// Sort so that preferred items are near the beginning.
+ std::stable_sort(
+ try_results.begin(), try_results.end(),
+ [](const TryResult & left, const TryResult & right)
+ {
+ return std::forward_as_tuple(!left.is_up_to_date, left.staleness)
+ < std::forward_as_tuple(!right.is_up_to_date, right.staleness);
+ });
+
+ if (fallback_to_stale_replicas)
+ {
+ /// There is not enough up-to-date entries but we are allowed to return stale entries.
+ /// Gather all up-to-date ones and least-bad stale ones.
+
+ size_t size = std::min(try_results.size(), max_entries);
+ try_results.resize(size);
+ }
+ else if (up_to_date_count >= min_entries)
+ {
+ /// There is enough up-to-date entries.
+ try_results.resize(up_to_date_count);
+ }
+ else
+ throw DB::Exception(
+ "Could not find enough connections to up-to-date replicas. Got: " + std::to_string(up_to_date_count)
+ + ", needed: " + std::to_string(min_entries),
+ DB::ErrorCodes::ALL_REPLICAS_ARE_STALE);
+
+ return try_results;
+}
+
+template <typename TNestedPool>
+struct PoolWithFailoverBase<TNestedPool>::PoolState
+{
+ UInt64 error_count = 0;
/// The number of slowdowns that led to changing replica in HedgedRequestsFactory
UInt64 slowdown_count = 0;
- /// Priority from the <remote_server> configuration.
- Int64 config_priority = 1;
- /// Priority from the GetPriorityFunc.
- Int64 priority = 0;
- UInt32 random = 0;
-
- void randomize()
- {
- random = rng();
- }
-
- static bool compare(const PoolState & lhs, const PoolState & rhs)
- {
+ /// Priority from the <remote_server> configuration.
+ Int64 config_priority = 1;
+ /// Priority from the GetPriorityFunc.
+ Int64 priority = 0;
+ UInt32 random = 0;
+
+ void randomize()
+ {
+ random = rng();
+ }
+
+ static bool compare(const PoolState & lhs, const PoolState & rhs)
+ {
return std::forward_as_tuple(lhs.error_count, lhs.slowdown_count, lhs.config_priority, lhs.priority, lhs.random)
< std::forward_as_tuple(rhs.error_count, rhs.slowdown_count, rhs.config_priority, rhs.priority, rhs.random);
- }
-
-private:
- std::minstd_rand rng = std::minstd_rand(randomSeed());
-};
-
-template <typename TNestedPool>
-typename PoolWithFailoverBase<TNestedPool>::PoolStates
-PoolWithFailoverBase<TNestedPool>::updatePoolStates(size_t max_ignored_errors)
-{
- PoolStates result;
- result.reserve(nested_pools.size());
-
- {
- std::lock_guard lock(pool_states_mutex);
-
- for (auto & state : shared_pool_states)
- state.randomize();
-
+ }
+
+private:
+ std::minstd_rand rng = std::minstd_rand(randomSeed());
+};
+
+template <typename TNestedPool>
+typename PoolWithFailoverBase<TNestedPool>::PoolStates
+PoolWithFailoverBase<TNestedPool>::updatePoolStates(size_t max_ignored_errors)
+{
+ PoolStates result;
+ result.reserve(nested_pools.size());
+
+ {
+ std::lock_guard lock(pool_states_mutex);
+
+ for (auto & state : shared_pool_states)
+ state.randomize();
+
updateErrorCounts(shared_pool_states, last_error_decrease_time);
result.assign(shared_pool_states.begin(), shared_pool_states.end());
}
-
+
/// distributed_replica_max_ignored_errors
for (auto & state : result)
state.error_count = std::max<UInt64>(0, state.error_count - max_ignored_errors);
@@ -393,7 +393,7 @@ void PoolWithFailoverBase<TNestedPool>::updateErrorCounts(PoolWithFailoverBase<T
time_t delta = current_time - last_decrease_time;
if (delta >= 0)
- {
+ {
const UInt64 MAX_BITS = sizeof(UInt64) * CHAR_BIT;
size_t shift_amount = MAX_BITS;
/// Divide error counts by 2 every decrease_error_period seconds.
@@ -403,25 +403,25 @@ void PoolWithFailoverBase<TNestedPool>::updateErrorCounts(PoolWithFailoverBase<T
/// Else if the function is called often enough, error count will never decrease.
if (shift_amount)
last_decrease_time = current_time;
-
+
if (shift_amount >= MAX_BITS)
- {
+ {
for (auto & state : states)
- {
+ {
state.error_count = 0;
state.slowdown_count = 0;
- }
+ }
}
else if (shift_amount)
{
for (auto & state : states)
- {
+ {
state.error_count >>= shift_amount;
state.slowdown_count >>= shift_amount;
- }
- }
- }
- }
+ }
+ }
+ }
+ }
else
last_decrease_time = current_time;
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp
index fcc4124aa8..3dce47b34c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.cpp
@@ -1,198 +1,198 @@
-#include "ProcfsMetricsProvider.h"
-
-#if defined(__linux__)
-
-#include <Common/Exception.h>
-#include <IO/ReadBufferFromMemory.h>
-#include <IO/ReadHelpers.h>
-
-#include <common/find_symbols.h>
-#include <common/logger_useful.h>
-
-#include <cassert>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <linux/taskstats.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int FILE_DOESNT_EXIST;
- extern const int CANNOT_OPEN_FILE;
- extern const int CANNOT_CLOSE_FILE;
- extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
-}
-
-static constexpr auto thread_schedstat = "/proc/thread-self/schedstat";
-static constexpr auto thread_stat = "/proc/thread-self/stat";
-static constexpr auto thread_io = "/proc/thread-self/io";
-
-
-namespace
-{
-[[noreturn]] inline void throwWithFailedToOpenFile(const std::string & filename)
-{
- throwFromErrno(
- "Cannot open file " + filename,
- errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
-}
-
-inline void emitErrorMsgWithFailedToCloseFile(const std::string & filename)
-{
- try
- {
- throwFromErrno(
- "File descriptor for \"" + filename + "\" could not be closed. "
- "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE);
- }
- catch (const ErrnoException &)
- {
- DB::tryLogCurrentException(__PRETTY_FUNCTION__);
- }
-}
-
-ssize_t readFromFD(const int fd, const char * filename, char * buf, size_t buf_size)
-{
- ssize_t res = 0;
-
- do
- {
- res = ::pread(fd, buf, buf_size, 0);
-
- if (-1 == res)
- {
- if (errno == EINTR)
- continue;
-
- throwFromErrno(
- "Cannot read from file " + std::string(filename),
- ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
- }
-
- assert(res >= 0);
- break;
- } while (true);
-
- return res;
-}
-}
-
-
-bool ProcfsMetricsProvider::isAvailable() noexcept
-{
- struct stat sb;
- int res = ::stat(thread_schedstat, &sb);
-
- /// Verify that procfs is mounted, one of the stats file exists and is a regular file
- return res != -1 && (sb.st_mode & S_IFMT) == S_IFREG;
-}
-
-
-ProcfsMetricsProvider::ProcfsMetricsProvider(const pid_t /*tid*/)
-{
- thread_schedstat_fd = ::open(thread_schedstat, O_RDONLY | O_CLOEXEC);
- if (-1 == thread_schedstat_fd)
- {
- throwWithFailedToOpenFile(thread_schedstat);
- }
- thread_stat_fd = ::open(thread_stat, O_RDONLY | O_CLOEXEC);
- if (-1 == thread_stat_fd)
- {
- ::close(thread_schedstat_fd);
- throwWithFailedToOpenFile(thread_stat);
- }
- thread_io_fd = ::open(thread_io, O_RDONLY | O_CLOEXEC);
- if (-1 != thread_io_fd)
- {
- stats_version = 3;
- }
-}
-
-
-ProcfsMetricsProvider::~ProcfsMetricsProvider()
-{
- if (stats_version >= 3 && 0 != ::close(thread_io_fd))
- emitErrorMsgWithFailedToCloseFile(thread_io);
- if (0 != ::close(thread_stat_fd))
- emitErrorMsgWithFailedToCloseFile(thread_stat);
- if (0 != ::close(thread_schedstat_fd))
- emitErrorMsgWithFailedToCloseFile(thread_schedstat);
-}
-
-
-void ProcfsMetricsProvider::getTaskStats(::taskstats & out_stats) const
-{
- constexpr size_t buf_size = 1024;
- char buf[buf_size];
-
- out_stats.version = stats_version;
-
- readParseAndSetThreadCPUStat(out_stats, buf, buf_size);
- readParseAndSetThreadBlkIOStat(out_stats, buf, buf_size);
-
- if (stats_version >= 3)
- {
- readParseAndSetThreadIOStat(out_stats, buf, buf_size);
- }
-}
-
-
-void ProcfsMetricsProvider::readParseAndSetThreadCPUStat(::taskstats & out_stats, char * buf, size_t buf_size) const
-{
- ssize_t res = readFromFD(thread_schedstat_fd, thread_schedstat, buf, buf_size);
- ReadBufferFromMemory in_schedstat(buf, res);
-
- readIntText(out_stats.cpu_run_virtual_total, in_schedstat);
- skipWhitespaceIfAny(in_schedstat);
- readIntText(out_stats.cpu_delay_total, in_schedstat);
-}
-
-
-void ProcfsMetricsProvider::readParseAndSetThreadBlkIOStat(::taskstats & out_stats, char * buf, size_t buf_size) const
-{
- ssize_t res = readFromFD(thread_stat_fd, thread_stat, buf, buf_size - 1);
- ReadBufferFromMemory in_stat(buf, res);
-
- /// We need to skip the first 41 fields of the string read from /proc/thread-self/stat.
- for (int i = 0; i < 41; ++i)
- {
- in_stat.position() = find_first_symbols<' ', '\t'>(in_stat.position(), in_stat.buffer().end());
- skipWhitespaceIfAny(in_stat);
- }
-
- /// Read field #42 - Aggregated block I/O delays, measured in clock ticks (centiseconds)
- readIntText(out_stats.blkio_delay_total, in_stat);
- out_stats.blkio_delay_total *= 10000000ul; /// We need to return time in nanoseconds
-}
-
-
-void ProcfsMetricsProvider::readParseAndSetThreadIOStat(::taskstats & out_stats, char * buf, size_t buf_size) const
-{
- ssize_t res = readFromFD(thread_io_fd, thread_io, buf, buf_size);
- ReadBufferFromMemory in_thread_io(buf, res);
-
- assertString("rchar:", in_thread_io);
- skipWhitespaceIfAny(in_thread_io);
- readIntText(out_stats.read_char, in_thread_io);
- skipWhitespaceIfAny(in_thread_io);
- assertString("wchar:", in_thread_io);
- skipWhitespaceIfAny(in_thread_io);
- readIntText(out_stats.write_char, in_thread_io);
- skipWhitespaceIfAny(in_thread_io);
- skipToNextLineOrEOF(in_thread_io);
- skipToNextLineOrEOF(in_thread_io);
- assertString("read_bytes:", in_thread_io);
- skipWhitespaceIfAny(in_thread_io);
- readIntText(out_stats.read_bytes, in_thread_io);
- skipWhitespaceIfAny(in_thread_io);
- assertString("write_bytes:", in_thread_io);
- skipWhitespaceIfAny(in_thread_io);
- readIntText(out_stats.write_bytes, in_thread_io);
-}
-}
-
-#endif
+#include "ProcfsMetricsProvider.h"
+
+#if defined(__linux__)
+
+#include <Common/Exception.h>
+#include <IO/ReadBufferFromMemory.h>
+#include <IO/ReadHelpers.h>
+
+#include <common/find_symbols.h>
+#include <common/logger_useful.h>
+
+#include <cassert>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/taskstats.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int FILE_DOESNT_EXIST;
+ extern const int CANNOT_OPEN_FILE;
+ extern const int CANNOT_CLOSE_FILE;
+ extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
+}
+
+static constexpr auto thread_schedstat = "/proc/thread-self/schedstat";
+static constexpr auto thread_stat = "/proc/thread-self/stat";
+static constexpr auto thread_io = "/proc/thread-self/io";
+
+
+namespace
+{
+[[noreturn]] inline void throwWithFailedToOpenFile(const std::string & filename)
+{
+ throwFromErrno(
+ "Cannot open file " + filename,
+ errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+}
+
+inline void emitErrorMsgWithFailedToCloseFile(const std::string & filename)
+{
+ try
+ {
+ throwFromErrno(
+ "File descriptor for \"" + filename + "\" could not be closed. "
+ "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE);
+ }
+ catch (const ErrnoException &)
+ {
+ DB::tryLogCurrentException(__PRETTY_FUNCTION__);
+ }
+}
+
+ssize_t readFromFD(const int fd, const char * filename, char * buf, size_t buf_size)
+{
+ ssize_t res = 0;
+
+ do
+ {
+ res = ::pread(fd, buf, buf_size, 0);
+
+ if (-1 == res)
+ {
+ if (errno == EINTR)
+ continue;
+
+ throwFromErrno(
+ "Cannot read from file " + std::string(filename),
+ ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+ }
+
+ assert(res >= 0);
+ break;
+ } while (true);
+
+ return res;
+}
+}
+
+
+bool ProcfsMetricsProvider::isAvailable() noexcept
+{
+ struct stat sb;
+ int res = ::stat(thread_schedstat, &sb);
+
+ /// Verify that procfs is mounted, one of the stats file exists and is a regular file
+ return res != -1 && (sb.st_mode & S_IFMT) == S_IFREG;
+}
+
+
+ProcfsMetricsProvider::ProcfsMetricsProvider(const pid_t /*tid*/)
+{
+ thread_schedstat_fd = ::open(thread_schedstat, O_RDONLY | O_CLOEXEC);
+ if (-1 == thread_schedstat_fd)
+ {
+ throwWithFailedToOpenFile(thread_schedstat);
+ }
+ thread_stat_fd = ::open(thread_stat, O_RDONLY | O_CLOEXEC);
+ if (-1 == thread_stat_fd)
+ {
+ ::close(thread_schedstat_fd);
+ throwWithFailedToOpenFile(thread_stat);
+ }
+ thread_io_fd = ::open(thread_io, O_RDONLY | O_CLOEXEC);
+ if (-1 != thread_io_fd)
+ {
+ stats_version = 3;
+ }
+}
+
+
+ProcfsMetricsProvider::~ProcfsMetricsProvider()
+{
+ if (stats_version >= 3 && 0 != ::close(thread_io_fd))
+ emitErrorMsgWithFailedToCloseFile(thread_io);
+ if (0 != ::close(thread_stat_fd))
+ emitErrorMsgWithFailedToCloseFile(thread_stat);
+ if (0 != ::close(thread_schedstat_fd))
+ emitErrorMsgWithFailedToCloseFile(thread_schedstat);
+}
+
+
+void ProcfsMetricsProvider::getTaskStats(::taskstats & out_stats) const
+{
+ constexpr size_t buf_size = 1024;
+ char buf[buf_size];
+
+ out_stats.version = stats_version;
+
+ readParseAndSetThreadCPUStat(out_stats, buf, buf_size);
+ readParseAndSetThreadBlkIOStat(out_stats, buf, buf_size);
+
+ if (stats_version >= 3)
+ {
+ readParseAndSetThreadIOStat(out_stats, buf, buf_size);
+ }
+}
+
+
+void ProcfsMetricsProvider::readParseAndSetThreadCPUStat(::taskstats & out_stats, char * buf, size_t buf_size) const
+{
+ ssize_t res = readFromFD(thread_schedstat_fd, thread_schedstat, buf, buf_size);
+ ReadBufferFromMemory in_schedstat(buf, res);
+
+ readIntText(out_stats.cpu_run_virtual_total, in_schedstat);
+ skipWhitespaceIfAny(in_schedstat);
+ readIntText(out_stats.cpu_delay_total, in_schedstat);
+}
+
+
+void ProcfsMetricsProvider::readParseAndSetThreadBlkIOStat(::taskstats & out_stats, char * buf, size_t buf_size) const
+{
+ ssize_t res = readFromFD(thread_stat_fd, thread_stat, buf, buf_size - 1);
+ ReadBufferFromMemory in_stat(buf, res);
+
+ /// We need to skip the first 41 fields of the string read from /proc/thread-self/stat.
+ for (int i = 0; i < 41; ++i)
+ {
+ in_stat.position() = find_first_symbols<' ', '\t'>(in_stat.position(), in_stat.buffer().end());
+ skipWhitespaceIfAny(in_stat);
+ }
+
+ /// Read field #42 - Aggregated block I/O delays, measured in clock ticks (centiseconds)
+ readIntText(out_stats.blkio_delay_total, in_stat);
+ out_stats.blkio_delay_total *= 10000000ul; /// We need to return time in nanoseconds
+}
+
+
+void ProcfsMetricsProvider::readParseAndSetThreadIOStat(::taskstats & out_stats, char * buf, size_t buf_size) const
+{
+ ssize_t res = readFromFD(thread_io_fd, thread_io, buf, buf_size);
+ ReadBufferFromMemory in_thread_io(buf, res);
+
+ assertString("rchar:", in_thread_io);
+ skipWhitespaceIfAny(in_thread_io);
+ readIntText(out_stats.read_char, in_thread_io);
+ skipWhitespaceIfAny(in_thread_io);
+ assertString("wchar:", in_thread_io);
+ skipWhitespaceIfAny(in_thread_io);
+ readIntText(out_stats.write_char, in_thread_io);
+ skipWhitespaceIfAny(in_thread_io);
+ skipToNextLineOrEOF(in_thread_io);
+ skipToNextLineOrEOF(in_thread_io);
+ assertString("read_bytes:", in_thread_io);
+ skipWhitespaceIfAny(in_thread_io);
+ readIntText(out_stats.read_bytes, in_thread_io);
+ skipWhitespaceIfAny(in_thread_io);
+ assertString("write_bytes:", in_thread_io);
+ skipWhitespaceIfAny(in_thread_io);
+ readIntText(out_stats.write_bytes, in_thread_io);
+}
+}
+
+#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h
index 60eb94bfcc..475a16af5e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ProcfsMetricsProvider.h
@@ -1,44 +1,44 @@
-#pragma once
-
-#include <sys/types.h>
-#include <boost/noncopyable.hpp>
-
-
-#if defined(__linux__)
-struct taskstats;
-
-namespace DB
-{
-/// Provides several essential per-task metrics by reading data from Procfs (when available).
-class ProcfsMetricsProvider : private boost::noncopyable
-{
-public:
- ProcfsMetricsProvider(const pid_t /*tid*/);
- ~ProcfsMetricsProvider();
-
- /// Updates only a part of taskstats struct's fields:
- /// - cpu_run_virtual_total, cpu_delay_total (when /proc/thread-self/schedstat is available)
- /// - blkio_delay_total (when /proc/thread-self/stat is available)
- /// - rchar, wchar, read_bytes, write_bytes (when /prod/thread-self/io is available)
- /// See: man procfs
- void getTaskStats(::taskstats & out_stats) const;
-
- /// Tells whether this metrics (via Procfs) is provided on the current platform
- static bool isAvailable() noexcept;
-
-private:
- void readParseAndSetThreadCPUStat(::taskstats & out_stats, char *, size_t) const;
- void readParseAndSetThreadBlkIOStat(::taskstats & out_stats, char *, size_t) const;
- void readParseAndSetThreadIOStat(::taskstats & out_stats, char *, size_t) const;
-
-private:
- int thread_schedstat_fd = -1;
- int thread_stat_fd = -1;
- int thread_io_fd = -1;
-
- /// This field is used for compatibility with TasksStatsCounters::incrementProfileEvents()
- unsigned short stats_version = 1;
-};
-
-}
-#endif
+#pragma once
+
+#include <sys/types.h>
+#include <boost/noncopyable.hpp>
+
+
+#if defined(__linux__)
+struct taskstats;
+
+namespace DB
+{
+/// Provides several essential per-task metrics by reading data from Procfs (when available).
+class ProcfsMetricsProvider : private boost::noncopyable
+{
+public:
+ ProcfsMetricsProvider(const pid_t /*tid*/);
+ ~ProcfsMetricsProvider();
+
+ /// Updates only a part of taskstats struct's fields:
+ /// - cpu_run_virtual_total, cpu_delay_total (when /proc/thread-self/schedstat is available)
+ /// - blkio_delay_total (when /proc/thread-self/stat is available)
+ /// - rchar, wchar, read_bytes, write_bytes (when /prod/thread-self/io is available)
+ /// See: man procfs
+ void getTaskStats(::taskstats & out_stats) const;
+
+ /// Tells whether this metrics (via Procfs) is provided on the current platform
+ static bool isAvailable() noexcept;
+
+private:
+ void readParseAndSetThreadCPUStat(::taskstats & out_stats, char *, size_t) const;
+ void readParseAndSetThreadBlkIOStat(::taskstats & out_stats, char *, size_t) const;
+ void readParseAndSetThreadIOStat(::taskstats & out_stats, char *, size_t) const;
+
+private:
+ int thread_schedstat_fd = -1;
+ int thread_stat_fd = -1;
+ int thread_io_fd = -1;
+
+ /// This field is used for compatibility with TasksStatsCounters::incrementProfileEvents()
+ unsigned short stats_version = 1;
+};
+
+}
+#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h
index 8830945957..b370272302 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h
@@ -1,72 +1,72 @@
-#pragma once
-
-#include <memory>
-#include <vector>
-#include <cstdint>
-
-namespace Poco
-{
-namespace Util
-{
- class AbstractConfiguration;
-}
-}
-
-/// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules
-
-/// It's used as a singleton via getInstance method
-
-/// Initially it's empty (nullptr) and after manual initialization
-/// (one-time, done by setInstance call) it takes the proper value which
-/// is stored in unique_ptr.
-
-/// It looks like the singleton is the best option here, as
-/// two users of that object (OwnSplitChannel & Interpreters/executeQuery)
+#pragma once
+
+#include <memory>
+#include <vector>
+#include <cstdint>
+
+namespace Poco
+{
+namespace Util
+{
+ class AbstractConfiguration;
+}
+}
+
+/// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules
+
+/// It's used as a singleton via getInstance method
+
+/// Initially it's empty (nullptr) and after manual initialization
+/// (one-time, done by setInstance call) it takes the proper value which
+/// is stored in unique_ptr.
+
+/// It looks like the singleton is the best option here, as
+/// two users of that object (OwnSplitChannel & Interpreters/executeQuery)
/// can't own/share that Masker properly without synchronization & locks,
-/// and we can't afford setting global locks for each logged line.
-
-/// I've considered singleton alternatives, but it's unclear who should own the object,
-/// and it introduce unnecessary complexity in implementation (passing references back and forward):
-///
-/// context can't own, as Context is destroyed before logger,
-/// and logger lives longer and logging can still happen after Context destruction.
-/// resetting masker in the logger at the moment of
-/// context destruction can't be done w/o synchronization / locks in a safe manner.
-///
-/// logger is Poco derived and i didn't want to brake it's interface,
-/// also logger can be dynamically reconfigured without server restart,
-/// and it actually recreates OwnSplitChannel when reconfiguration happen,
-/// so that makes it's quite tricky. So it a bad candidate for owning masker too.
-
-namespace DB
-{
-class SensitiveDataMasker
-{
-private:
- class MaskingRule;
- std::vector<std::unique_ptr<MaskingRule>> all_masking_rules;
- static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker;
-
-public:
- SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
- ~SensitiveDataMasker();
-
- /// Returns the number of matched rules.
- size_t wipeSensitiveData(std::string & data) const;
-
- /// setInstance is not thread-safe and should be called once in single-thread mode.
- /// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367
- static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_);
- static SensitiveDataMasker * getInstance();
-
- /// Used in tests.
- void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string);
-
-#ifndef NDEBUG
- void printStats();
-#endif
-
- size_t rulesCount() const;
-};
-
-};
+/// and we can't afford setting global locks for each logged line.
+
+/// I've considered singleton alternatives, but it's unclear who should own the object,
+/// and it introduce unnecessary complexity in implementation (passing references back and forward):
+///
+/// context can't own, as Context is destroyed before logger,
+/// and logger lives longer and logging can still happen after Context destruction.
+/// resetting masker in the logger at the moment of
+/// context destruction can't be done w/o synchronization / locks in a safe manner.
+///
+/// logger is Poco derived and i didn't want to brake it's interface,
+/// also logger can be dynamically reconfigured without server restart,
+/// and it actually recreates OwnSplitChannel when reconfiguration happen,
+/// so that makes it's quite tricky. So it a bad candidate for owning masker too.
+
+namespace DB
+{
+class SensitiveDataMasker
+{
+private:
+ class MaskingRule;
+ std::vector<std::unique_ptr<MaskingRule>> all_masking_rules;
+ static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker;
+
+public:
+ SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
+ ~SensitiveDataMasker();
+
+ /// Returns the number of matched rules.
+ size_t wipeSensitiveData(std::string & data) const;
+
+ /// setInstance is not thread-safe and should be called once in single-thread mode.
+ /// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367
+ static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_);
+ static SensitiveDataMasker * getInstance();
+
+ /// Used in tests.
+ void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string);
+
+#ifndef NDEBUG
+ void printStats();
+#endif
+
+ size_t rulesCount() const;
+};
+
+};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp
index 92978a0ad8..6b09a9741a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.cpp
@@ -1,319 +1,319 @@
-#include "TaskStatsInfoGetter.h"
-#include <Common/Exception.h>
+#include "TaskStatsInfoGetter.h"
+#include <Common/Exception.h>
#include <common/types.h>
-
-#include <unistd.h>
-
-#if defined(OS_LINUX)
-
-#include "hasLinuxCapability.h"
-#include <common/unaligned.h>
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <linux/genetlink.h>
-#include <linux/netlink.h>
-#include <linux/taskstats.h>
-#include <linux/capability.h>
-
-#if defined(__clang__)
- #pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
-#endif
-
-/// Basic idea is motivated by "iotop" tool.
-/// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int NETLINK_ERROR;
-}
-
-// Replace NLMSG_OK with explicit casts since that system macro contains signedness bugs which are not going to be fixed.
-static inline bool is_nlmsg_ok(const struct nlmsghdr * const nlh, const ssize_t len)
-{
- return len >= static_cast<ssize_t>(sizeof(*nlh)) && nlh->nlmsg_len >= sizeof(*nlh) && static_cast<size_t>(len) >= nlh->nlmsg_len;
-}
-
-namespace
-{
-
-
-/** The message contains:
- * - Netlink protocol header;
- * - Generic Netlink (is a sub-protocol of Netlink that we use) protocol header;
- * - Payload
- * -- that itself is a list of "Attributes" (sub-messages), each of them contains length (including header), type, and its own payload.
- * -- and attribute payload may be represented by the list of embedded attributes.
- */
-struct NetlinkMessage
-{
- static size_t constexpr MAX_MSG_SIZE = 1024;
-
- alignas(NLMSG_ALIGNTO) ::nlmsghdr header;
-
- struct Attribute
- {
- ::nlattr header;
-
- alignas(NLMSG_ALIGNTO) char payload[0];
-
- const Attribute * next() const
- {
- return reinterpret_cast<const Attribute *>(reinterpret_cast<const char *>(this) + NLA_ALIGN(header.nla_len));
- }
- };
-
- union alignas(NLMSG_ALIGNTO)
- {
- struct
- {
- ::genlmsghdr generic_header;
-
- union alignas(NLMSG_ALIGNTO)
- {
- char buf[MAX_MSG_SIZE];
- Attribute attribute; /// First attribute. There may be more.
- } payload;
- };
-
- ::nlmsgerr error;
- };
-
- const Attribute * end() const
- {
- return reinterpret_cast<const Attribute *>(reinterpret_cast<const char *>(this) + header.nlmsg_len);
- }
-
- void send(int fd) const
- {
- const char * request_buf = reinterpret_cast<const char *>(this);
- ssize_t request_size = header.nlmsg_len;
-
- union
- {
- ::sockaddr_nl nladdr{};
- ::sockaddr sockaddr;
- };
-
- nladdr.nl_family = AF_NETLINK;
-
- while (true)
- {
- ssize_t bytes_sent = ::sendto(fd, request_buf, request_size, 0, &sockaddr, sizeof(nladdr));
-
- if (bytes_sent <= 0)
- {
- if (errno == EAGAIN)
- continue;
- else
- throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR);
- }
-
- if (bytes_sent > request_size)
- throw Exception("Wrong result of sendto system call: bytes_sent is greater than request size", ErrorCodes::NETLINK_ERROR);
-
- if (bytes_sent == request_size)
- break;
-
- request_buf += bytes_sent;
- request_size -= bytes_sent;
- }
- }
-
- void receive(int fd)
- {
- ssize_t bytes_received = ::recv(fd, this, sizeof(*this), 0);
-
- if (header.nlmsg_type == NLMSG_ERROR)
- throw Exception("Can't receive Netlink response: error " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR);
-
- if (!is_nlmsg_ok(&header, bytes_received))
- throw Exception("Can't receive Netlink response: wrong number of bytes received", ErrorCodes::NETLINK_ERROR);
- }
-};
-
-
-NetlinkMessage query(
- int fd,
- UInt16 type,
- UInt32 pid,
- UInt8 command,
- UInt16 attribute_type,
- const void * attribute_data,
- int attribute_size)
-{
- NetlinkMessage request{};
-
- request.header.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); /// Length of both headers.
- request.header.nlmsg_type = type;
- request.header.nlmsg_flags = NLM_F_REQUEST; /// A request.
- request.header.nlmsg_seq = 0;
- request.header.nlmsg_pid = pid;
-
- request.generic_header.cmd = command;
- request.generic_header.version = 1;
-
- request.payload.attribute.header.nla_type = attribute_type;
- request.payload.attribute.header.nla_len = attribute_size + NLA_HDRLEN;
-
- memcpy(&request.payload.attribute.payload, attribute_data, attribute_size);
-
- request.header.nlmsg_len += NLMSG_ALIGN(request.payload.attribute.header.nla_len);
-
- request.send(fd);
-
- NetlinkMessage response;
- response.receive(fd);
-
- return response;
-}
-
-
-UInt16 getFamilyIdImpl(int fd)
-{
- NetlinkMessage answer = query(fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, TASKSTATS_GENL_NAME, strlen(TASKSTATS_GENL_NAME) + 1);
-
- /// NOTE Why the relevant info is located in the second attribute?
- const NetlinkMessage::Attribute * attr = answer.payload.attribute.next();
-
- if (attr->header.nla_type != CTRL_ATTR_FAMILY_ID)
- throw Exception("Received wrong attribute as an answer to GET_FAMILY Netlink command", ErrorCodes::NETLINK_ERROR);
-
- return unalignedLoad<UInt16>(attr->payload);
-}
-
-
-bool checkPermissionsImpl()
-{
- static bool res = hasLinuxCapability(CAP_NET_ADMIN);
- if (!res)
- return false;
-
- /// Check that we can successfully initialize TaskStatsInfoGetter.
- /// It will ask about family id through Netlink.
- /// On some LXC containers we have capability but we still cannot use Netlink.
-
- try
- {
- TaskStatsInfoGetter();
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- return false;
- }
-
- return true;
-}
-
-
-UInt16 getFamilyId(int fd)
-{
- /// It is thread and exception safe since C++11 and even before.
- static UInt16 res = getFamilyIdImpl(fd);
- return res;
-}
-
-}
-
-
-bool TaskStatsInfoGetter::checkPermissions()
-{
- static bool res = checkPermissionsImpl();
- return res;
-}
-
-
-TaskStatsInfoGetter::TaskStatsInfoGetter()
-{
- netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
- if (netlink_socket_fd < 0)
- throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
-
- /// On some containerized environments, operation on Netlink socket could hang forever.
- /// We set reasonably small timeout to overcome this issue.
-
- struct timeval tv;
- tv.tv_sec = 0;
- tv.tv_usec = 50000;
-
- if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<const char *>(&tv), sizeof(tv)))
- throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
-
- union
- {
- ::sockaddr_nl addr{};
- ::sockaddr sockaddr;
- };
- addr.nl_family = AF_NETLINK;
-
- if (::bind(netlink_socket_fd, &sockaddr, sizeof(addr)) < 0)
- throwFromErrno("Can't bind PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
-
- taskstats_family_id = getFamilyId(netlink_socket_fd);
-}
-
-
-void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const
-{
- NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid));
-
- for (const NetlinkMessage::Attribute * attr = &answer.payload.attribute;
- attr < answer.end();
- attr = attr->next())
- {
- if (attr->header.nla_type == TASKSTATS_TYPE_AGGR_TGID || attr->header.nla_type == TASKSTATS_TYPE_AGGR_PID)
- {
- for (const NetlinkMessage::Attribute * nested_attr = reinterpret_cast<const NetlinkMessage::Attribute *>(attr->payload);
- nested_attr < attr->next();
- nested_attr = nested_attr->next())
- {
- if (nested_attr->header.nla_type == TASKSTATS_TYPE_STATS)
- {
- out_stats = unalignedLoad<::taskstats>(nested_attr->payload);
- return;
- }
- }
- }
- }
-
- throw Exception("There is no TASKSTATS_TYPE_STATS attribute in the Netlink response", ErrorCodes::NETLINK_ERROR);
-}
-
-
-TaskStatsInfoGetter::~TaskStatsInfoGetter()
-{
- if (netlink_socket_fd >= 0)
- close(netlink_socket_fd);
-}
-
-}
-
-
-#else
-
-namespace DB
-{
-
-bool TaskStatsInfoGetter::checkPermissions()
-{
- return false;
-}
-
-TaskStatsInfoGetter::TaskStatsInfoGetter() = default;
-TaskStatsInfoGetter::~TaskStatsInfoGetter() = default;
-
-void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) const
-{
-}
-
-}
-
-#endif
+
+#include <unistd.h>
+
+#if defined(OS_LINUX)
+
+#include "hasLinuxCapability.h"
+#include <common/unaligned.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <linux/genetlink.h>
+#include <linux/netlink.h>
+#include <linux/taskstats.h>
+#include <linux/capability.h>
+
+#if defined(__clang__)
+ #pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
+#endif
+
+/// Basic idea is motivated by "iotop" tool.
+/// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NETLINK_ERROR;
+}
+
+// Replace NLMSG_OK with explicit casts since that system macro contains signedness bugs which are not going to be fixed.
+static inline bool is_nlmsg_ok(const struct nlmsghdr * const nlh, const ssize_t len)
+{
+ return len >= static_cast<ssize_t>(sizeof(*nlh)) && nlh->nlmsg_len >= sizeof(*nlh) && static_cast<size_t>(len) >= nlh->nlmsg_len;
+}
+
+namespace
+{
+
+
+/** The message contains:
+ * - Netlink protocol header;
+ * - Generic Netlink (is a sub-protocol of Netlink that we use) protocol header;
+ * - Payload
+ * -- that itself is a list of "Attributes" (sub-messages), each of them contains length (including header), type, and its own payload.
+ * -- and attribute payload may be represented by the list of embedded attributes.
+ */
+struct NetlinkMessage
+{
+ static size_t constexpr MAX_MSG_SIZE = 1024;
+
+ alignas(NLMSG_ALIGNTO) ::nlmsghdr header;
+
+ struct Attribute
+ {
+ ::nlattr header;
+
+ alignas(NLMSG_ALIGNTO) char payload[0];
+
+ const Attribute * next() const
+ {
+ return reinterpret_cast<const Attribute *>(reinterpret_cast<const char *>(this) + NLA_ALIGN(header.nla_len));
+ }
+ };
+
+ union alignas(NLMSG_ALIGNTO)
+ {
+ struct
+ {
+ ::genlmsghdr generic_header;
+
+ union alignas(NLMSG_ALIGNTO)
+ {
+ char buf[MAX_MSG_SIZE];
+ Attribute attribute; /// First attribute. There may be more.
+ } payload;
+ };
+
+ ::nlmsgerr error;
+ };
+
+ const Attribute * end() const
+ {
+ return reinterpret_cast<const Attribute *>(reinterpret_cast<const char *>(this) + header.nlmsg_len);
+ }
+
+ void send(int fd) const
+ {
+ const char * request_buf = reinterpret_cast<const char *>(this);
+ ssize_t request_size = header.nlmsg_len;
+
+ union
+ {
+ ::sockaddr_nl nladdr{};
+ ::sockaddr sockaddr;
+ };
+
+ nladdr.nl_family = AF_NETLINK;
+
+ while (true)
+ {
+ ssize_t bytes_sent = ::sendto(fd, request_buf, request_size, 0, &sockaddr, sizeof(nladdr));
+
+ if (bytes_sent <= 0)
+ {
+ if (errno == EAGAIN)
+ continue;
+ else
+ throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR);
+ }
+
+ if (bytes_sent > request_size)
+ throw Exception("Wrong result of sendto system call: bytes_sent is greater than request size", ErrorCodes::NETLINK_ERROR);
+
+ if (bytes_sent == request_size)
+ break;
+
+ request_buf += bytes_sent;
+ request_size -= bytes_sent;
+ }
+ }
+
+ void receive(int fd)
+ {
+ ssize_t bytes_received = ::recv(fd, this, sizeof(*this), 0);
+
+ if (header.nlmsg_type == NLMSG_ERROR)
+ throw Exception("Can't receive Netlink response: error " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR);
+
+ if (!is_nlmsg_ok(&header, bytes_received))
+ throw Exception("Can't receive Netlink response: wrong number of bytes received", ErrorCodes::NETLINK_ERROR);
+ }
+};
+
+
+NetlinkMessage query(
+ int fd,
+ UInt16 type,
+ UInt32 pid,
+ UInt8 command,
+ UInt16 attribute_type,
+ const void * attribute_data,
+ int attribute_size)
+{
+ NetlinkMessage request{};
+
+ request.header.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); /// Length of both headers.
+ request.header.nlmsg_type = type;
+ request.header.nlmsg_flags = NLM_F_REQUEST; /// A request.
+ request.header.nlmsg_seq = 0;
+ request.header.nlmsg_pid = pid;
+
+ request.generic_header.cmd = command;
+ request.generic_header.version = 1;
+
+ request.payload.attribute.header.nla_type = attribute_type;
+ request.payload.attribute.header.nla_len = attribute_size + NLA_HDRLEN;
+
+ memcpy(&request.payload.attribute.payload, attribute_data, attribute_size);
+
+ request.header.nlmsg_len += NLMSG_ALIGN(request.payload.attribute.header.nla_len);
+
+ request.send(fd);
+
+ NetlinkMessage response;
+ response.receive(fd);
+
+ return response;
+}
+
+
+UInt16 getFamilyIdImpl(int fd)
+{
+ NetlinkMessage answer = query(fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, TASKSTATS_GENL_NAME, strlen(TASKSTATS_GENL_NAME) + 1);
+
+ /// NOTE Why the relevant info is located in the second attribute?
+ const NetlinkMessage::Attribute * attr = answer.payload.attribute.next();
+
+ if (attr->header.nla_type != CTRL_ATTR_FAMILY_ID)
+ throw Exception("Received wrong attribute as an answer to GET_FAMILY Netlink command", ErrorCodes::NETLINK_ERROR);
+
+ return unalignedLoad<UInt16>(attr->payload);
+}
+
+
+bool checkPermissionsImpl()
+{
+ static bool res = hasLinuxCapability(CAP_NET_ADMIN);
+ if (!res)
+ return false;
+
+ /// Check that we can successfully initialize TaskStatsInfoGetter.
+ /// It will ask about family id through Netlink.
+ /// On some LXC containers we have capability but we still cannot use Netlink.
+
+ try
+ {
+ TaskStatsInfoGetter();
+ }
+ catch (...)
+ {
+ tryLogCurrentException(__PRETTY_FUNCTION__);
+ return false;
+ }
+
+ return true;
+}
+
+
+UInt16 getFamilyId(int fd)
+{
+ /// It is thread and exception safe since C++11 and even before.
+ static UInt16 res = getFamilyIdImpl(fd);
+ return res;
+}
+
+}
+
+
+bool TaskStatsInfoGetter::checkPermissions()
+{
+ static bool res = checkPermissionsImpl();
+ return res;
+}
+
+
+TaskStatsInfoGetter::TaskStatsInfoGetter()
+{
+ netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (netlink_socket_fd < 0)
+ throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
+
+ /// On some containerized environments, operation on Netlink socket could hang forever.
+ /// We set reasonably small timeout to overcome this issue.
+
+ struct timeval tv;
+ tv.tv_sec = 0;
+ tv.tv_usec = 50000;
+
+ if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<const char *>(&tv), sizeof(tv)))
+ throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
+
+ union
+ {
+ ::sockaddr_nl addr{};
+ ::sockaddr sockaddr;
+ };
+ addr.nl_family = AF_NETLINK;
+
+ if (::bind(netlink_socket_fd, &sockaddr, sizeof(addr)) < 0)
+ throwFromErrno("Can't bind PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
+
+ taskstats_family_id = getFamilyId(netlink_socket_fd);
+}
+
+
+void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const
+{
+ NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid));
+
+ for (const NetlinkMessage::Attribute * attr = &answer.payload.attribute;
+ attr < answer.end();
+ attr = attr->next())
+ {
+ if (attr->header.nla_type == TASKSTATS_TYPE_AGGR_TGID || attr->header.nla_type == TASKSTATS_TYPE_AGGR_PID)
+ {
+ for (const NetlinkMessage::Attribute * nested_attr = reinterpret_cast<const NetlinkMessage::Attribute *>(attr->payload);
+ nested_attr < attr->next();
+ nested_attr = nested_attr->next())
+ {
+ if (nested_attr->header.nla_type == TASKSTATS_TYPE_STATS)
+ {
+ out_stats = unalignedLoad<::taskstats>(nested_attr->payload);
+ return;
+ }
+ }
+ }
+ }
+
+ throw Exception("There is no TASKSTATS_TYPE_STATS attribute in the Netlink response", ErrorCodes::NETLINK_ERROR);
+}
+
+
+TaskStatsInfoGetter::~TaskStatsInfoGetter()
+{
+ if (netlink_socket_fd >= 0)
+ close(netlink_socket_fd);
+}
+
+}
+
+
+#else
+
+namespace DB
+{
+
+bool TaskStatsInfoGetter::checkPermissions()
+{
+ return false;
+}
+
+TaskStatsInfoGetter::TaskStatsInfoGetter() = default;
+TaskStatsInfoGetter::~TaskStatsInfoGetter() = default;
+
+void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) const
+{
+}
+
+}
+
+#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h
index 00ecf91c47..2141ec63f0 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/TaskStatsInfoGetter.h
@@ -1,31 +1,31 @@
-#pragma once
-
-#include <sys/types.h>
+#pragma once
+
+#include <sys/types.h>
#include <common/types.h>
-#include <boost/noncopyable.hpp>
-
-struct taskstats;
-
-namespace DB
-{
-
-/// Get taskstat info from OS kernel via Netlink protocol.
-class TaskStatsInfoGetter : private boost::noncopyable
-{
-public:
- TaskStatsInfoGetter();
- ~TaskStatsInfoGetter();
-
- void getStat(::taskstats & out_stats, pid_t tid) const;
-
+#include <boost/noncopyable.hpp>
+
+struct taskstats;
+
+namespace DB
+{
+
+/// Get taskstat info from OS kernel via Netlink protocol.
+class TaskStatsInfoGetter : private boost::noncopyable
+{
+public:
+ TaskStatsInfoGetter();
+ ~TaskStatsInfoGetter();
+
+ void getStat(::taskstats & out_stats, pid_t tid) const;
+
/// Whether the current process has permissions (sudo or cap_net_admin capabilities) to get taskstats info
- static bool checkPermissions();
-
-#if defined(OS_LINUX)
-private:
- int netlink_socket_fd = -1;
- UInt16 taskstats_family_id = 0;
-#endif
-};
-
-}
+ static bool checkPermissions();
+
+#if defined(OS_LINUX)
+private:
+ int netlink_socket_fd = -1;
+ UInt16 taskstats_family_id = 0;
+#endif
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp
index 7b69bf766d..dba6d3b057 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.cpp
@@ -1,576 +1,576 @@
-#include "ThreadProfileEvents.h"
-
-#if defined(__linux__)
-
-#include "TaskStatsInfoGetter.h"
-#include "ProcfsMetricsProvider.h"
-#include "hasLinuxCapability.h"
-
-#include <filesystem>
-#include <fstream>
-#include <optional>
-#include <sstream>
-#include <unordered_set>
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <linux/perf_event.h>
-#include <syscall.h>
-#include <sys/ioctl.h>
-#include <cerrno>
-#include <sys/types.h>
-#include <dirent.h>
-
+#include "ThreadProfileEvents.h"
+
+#if defined(__linux__)
+
+#include "TaskStatsInfoGetter.h"
+#include "ProcfsMetricsProvider.h"
+#include "hasLinuxCapability.h"
+
+#include <filesystem>
+#include <fstream>
+#include <optional>
+#include <sstream>
+#include <unordered_set>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <cerrno>
+#include <sys/types.h>
+#include <dirent.h>
+
#include <common/errnoToString.h>
-namespace DB
-{
-
-bool TasksStatsCounters::checkIfAvailable()
-{
- return findBestAvailableProvider() != MetricsProvider::None;
-}
-
-std::unique_ptr<TasksStatsCounters> TasksStatsCounters::create(const UInt64 tid)
-{
- std::unique_ptr<TasksStatsCounters> instance;
- if (checkIfAvailable())
- instance.reset(new TasksStatsCounters(tid, findBestAvailableProvider()));
- return instance;
-}
-
-TasksStatsCounters::MetricsProvider TasksStatsCounters::findBestAvailableProvider()
-{
- /// This initialization is thread-safe and executed once since C++11
- static std::optional<MetricsProvider> provider =
- []() -> MetricsProvider
- {
- if (TaskStatsInfoGetter::checkPermissions())
- {
- return MetricsProvider::Netlink;
- }
- else if (ProcfsMetricsProvider::isAvailable())
- {
- return MetricsProvider::Procfs;
- }
- return MetricsProvider::None;
- }();
-
- return *provider;
-}
-
-
-TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider provider)
-{
- switch (provider)
- {
- case MetricsProvider::Netlink:
- stats_getter = [metrics_provider = std::make_shared<TaskStatsInfoGetter>(), tid]()
- {
+namespace DB
+{
+
+bool TasksStatsCounters::checkIfAvailable()
+{
+ return findBestAvailableProvider() != MetricsProvider::None;
+}
+
+std::unique_ptr<TasksStatsCounters> TasksStatsCounters::create(const UInt64 tid)
+{
+ std::unique_ptr<TasksStatsCounters> instance;
+ if (checkIfAvailable())
+ instance.reset(new TasksStatsCounters(tid, findBestAvailableProvider()));
+ return instance;
+}
+
+TasksStatsCounters::MetricsProvider TasksStatsCounters::findBestAvailableProvider()
+{
+ /// This initialization is thread-safe and executed once since C++11
+ static std::optional<MetricsProvider> provider =
+ []() -> MetricsProvider
+ {
+ if (TaskStatsInfoGetter::checkPermissions())
+ {
+ return MetricsProvider::Netlink;
+ }
+ else if (ProcfsMetricsProvider::isAvailable())
+ {
+ return MetricsProvider::Procfs;
+ }
+ return MetricsProvider::None;
+ }();
+
+ return *provider;
+}
+
+
+TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider provider)
+{
+ switch (provider)
+ {
+ case MetricsProvider::Netlink:
+ stats_getter = [metrics_provider = std::make_shared<TaskStatsInfoGetter>(), tid]()
+ {
::taskstats result{};
- metrics_provider->getStat(result, tid);
- return result;
- };
- break;
- case MetricsProvider::Procfs:
- stats_getter = [metrics_provider = std::make_shared<ProcfsMetricsProvider>(tid)]()
- {
+ metrics_provider->getStat(result, tid);
+ return result;
+ };
+ break;
+ case MetricsProvider::Procfs:
+ stats_getter = [metrics_provider = std::make_shared<ProcfsMetricsProvider>(tid)]()
+ {
::taskstats result{};
- metrics_provider->getTaskStats(result);
- return result;
- };
- break;
- case MetricsProvider::None:
- ;
- }
-}
-
-void TasksStatsCounters::reset()
-{
- if (stats_getter)
- stats = stats_getter();
-}
-
-void TasksStatsCounters::updateCounters(ProfileEvents::Counters & profile_events)
-{
- if (!stats_getter)
- return;
-
- const auto new_stats = stats_getter();
- incrementProfileEvents(stats, new_stats, profile_events);
- stats = new_stats;
-}
-
-void TasksStatsCounters::incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events)
-{
- profile_events.increment(ProfileEvents::OSCPUWaitMicroseconds,
- safeDiff(prev.cpu_delay_total, curr.cpu_delay_total) / 1000U);
- profile_events.increment(ProfileEvents::OSIOWaitMicroseconds,
- safeDiff(prev.blkio_delay_total, curr.blkio_delay_total) / 1000U);
- profile_events.increment(ProfileEvents::OSCPUVirtualTimeMicroseconds,
- safeDiff(prev.cpu_run_virtual_total, curr.cpu_run_virtual_total) / 1000U);
-
- /// Since TASKSTATS_VERSION = 3 extended accounting and IO accounting is available.
- if (curr.version < 3)
- return;
-
- profile_events.increment(ProfileEvents::OSReadChars, safeDiff(prev.read_char, curr.read_char));
- profile_events.increment(ProfileEvents::OSWriteChars, safeDiff(prev.write_char, curr.write_char));
- profile_events.increment(ProfileEvents::OSReadBytes, safeDiff(prev.read_bytes, curr.read_bytes));
- profile_events.increment(ProfileEvents::OSWriteBytes, safeDiff(prev.write_bytes, curr.write_bytes));
-}
-
-}
-
-#endif
-
+ metrics_provider->getTaskStats(result);
+ return result;
+ };
+ break;
+ case MetricsProvider::None:
+ ;
+ }
+}
+
+void TasksStatsCounters::reset()
+{
+ if (stats_getter)
+ stats = stats_getter();
+}
+
+void TasksStatsCounters::updateCounters(ProfileEvents::Counters & profile_events)
+{
+ if (!stats_getter)
+ return;
+
+ const auto new_stats = stats_getter();
+ incrementProfileEvents(stats, new_stats, profile_events);
+ stats = new_stats;
+}
+
+void TasksStatsCounters::incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events)
+{
+ profile_events.increment(ProfileEvents::OSCPUWaitMicroseconds,
+ safeDiff(prev.cpu_delay_total, curr.cpu_delay_total) / 1000U);
+ profile_events.increment(ProfileEvents::OSIOWaitMicroseconds,
+ safeDiff(prev.blkio_delay_total, curr.blkio_delay_total) / 1000U);
+ profile_events.increment(ProfileEvents::OSCPUVirtualTimeMicroseconds,
+ safeDiff(prev.cpu_run_virtual_total, curr.cpu_run_virtual_total) / 1000U);
+
+ /// Since TASKSTATS_VERSION = 3 extended accounting and IO accounting is available.
+ if (curr.version < 3)
+ return;
+
+ profile_events.increment(ProfileEvents::OSReadChars, safeDiff(prev.read_char, curr.read_char));
+ profile_events.increment(ProfileEvents::OSWriteChars, safeDiff(prev.write_char, curr.write_char));
+ profile_events.increment(ProfileEvents::OSReadBytes, safeDiff(prev.read_bytes, curr.read_bytes));
+ profile_events.increment(ProfileEvents::OSWriteBytes, safeDiff(prev.write_bytes, curr.write_bytes));
+}
+
+}
+
+#endif
+
#if defined(__linux__)
-
-namespace DB
-{
-
-thread_local PerfEventsCounters current_thread_counters;
-
-#define SOFTWARE_EVENT(PERF_NAME, LOCAL_NAME) \
- PerfEventInfo \
- { \
- .event_type = perf_type_id::PERF_TYPE_SOFTWARE, \
- .event_config = (PERF_NAME), \
- .profile_event = ProfileEvents::LOCAL_NAME, \
- .settings_name = #LOCAL_NAME \
- }
-
-#define HARDWARE_EVENT(PERF_NAME, LOCAL_NAME) \
- PerfEventInfo \
- { \
- .event_type = perf_type_id::PERF_TYPE_HARDWARE, \
- .event_config = (PERF_NAME), \
- .profile_event = ProfileEvents::LOCAL_NAME, \
- .settings_name = #LOCAL_NAME \
- }
-
-// One event for cache accesses and one for cache misses.
-// Type is ACCESS or MISS
-#define CACHE_EVENT(PERF_NAME, LOCAL_NAME, TYPE) \
- PerfEventInfo \
- { \
- .event_type = perf_type_id::PERF_TYPE_HW_CACHE, \
- .event_config = (PERF_NAME) \
- | (PERF_COUNT_HW_CACHE_OP_READ << 8) \
- | (PERF_COUNT_HW_CACHE_RESULT_ ## TYPE << 16), \
- .profile_event = ProfileEvents::LOCAL_NAME, \
- .settings_name = #LOCAL_NAME \
- }
-
-// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
-static const PerfEventInfo raw_events_info[] = {
- HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles),
- HARDWARE_EVENT(PERF_COUNT_HW_INSTRUCTIONS, PerfInstructions),
- HARDWARE_EVENT(PERF_COUNT_HW_CACHE_REFERENCES, PerfCacheReferences),
- HARDWARE_EVENT(PERF_COUNT_HW_CACHE_MISSES, PerfCacheMisses),
- HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PerfBranchInstructions),
- HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_MISSES, PerfBranchMisses),
- HARDWARE_EVENT(PERF_COUNT_HW_BUS_CYCLES, PerfBusCycles),
- HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, PerfStalledCyclesFrontend),
- HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, PerfStalledCyclesBackend),
- HARDWARE_EVENT(PERF_COUNT_HW_REF_CPU_CYCLES, PerfRefCpuCycles),
-
- // `cpu-clock` is a bit broken according to this: https://stackoverflow.com/a/56967896
- SOFTWARE_EVENT(PERF_COUNT_SW_CPU_CLOCK, PerfCpuClock),
- SOFTWARE_EVENT(PERF_COUNT_SW_TASK_CLOCK, PerfTaskClock),
- SOFTWARE_EVENT(PERF_COUNT_SW_CONTEXT_SWITCHES, PerfContextSwitches),
- SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations),
- SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults),
- SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults),
-
- // Don't add them -- they are the same as SoftPageFaults and HardPageFaults,
- // match well numerically.
- // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor),
- // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor),
-
- CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS),
- CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS),
-
- // Apparently it doesn't make sense to treat these values as relative:
- // https://stackoverflow.com/questions/49933319/how-to-interpret-perf-itlb-loads-itlb-load-misses
- CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS),
- CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBMisses, MISS),
+
+namespace DB
+{
+
+thread_local PerfEventsCounters current_thread_counters;
+
+#define SOFTWARE_EVENT(PERF_NAME, LOCAL_NAME) \
+ PerfEventInfo \
+ { \
+ .event_type = perf_type_id::PERF_TYPE_SOFTWARE, \
+ .event_config = (PERF_NAME), \
+ .profile_event = ProfileEvents::LOCAL_NAME, \
+ .settings_name = #LOCAL_NAME \
+ }
+
+#define HARDWARE_EVENT(PERF_NAME, LOCAL_NAME) \
+ PerfEventInfo \
+ { \
+ .event_type = perf_type_id::PERF_TYPE_HARDWARE, \
+ .event_config = (PERF_NAME), \
+ .profile_event = ProfileEvents::LOCAL_NAME, \
+ .settings_name = #LOCAL_NAME \
+ }
+
+// One event for cache accesses and one for cache misses.
+// Type is ACCESS or MISS
+#define CACHE_EVENT(PERF_NAME, LOCAL_NAME, TYPE) \
+ PerfEventInfo \
+ { \
+ .event_type = perf_type_id::PERF_TYPE_HW_CACHE, \
+ .event_config = (PERF_NAME) \
+ | (PERF_COUNT_HW_CACHE_OP_READ << 8) \
+ | (PERF_COUNT_HW_CACHE_RESULT_ ## TYPE << 16), \
+ .profile_event = ProfileEvents::LOCAL_NAME, \
+ .settings_name = #LOCAL_NAME \
+ }
+
+// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
+static const PerfEventInfo raw_events_info[] = {
+ HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles),
+ HARDWARE_EVENT(PERF_COUNT_HW_INSTRUCTIONS, PerfInstructions),
+ HARDWARE_EVENT(PERF_COUNT_HW_CACHE_REFERENCES, PerfCacheReferences),
+ HARDWARE_EVENT(PERF_COUNT_HW_CACHE_MISSES, PerfCacheMisses),
+ HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PerfBranchInstructions),
+ HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_MISSES, PerfBranchMisses),
+ HARDWARE_EVENT(PERF_COUNT_HW_BUS_CYCLES, PerfBusCycles),
+ HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, PerfStalledCyclesFrontend),
+ HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, PerfStalledCyclesBackend),
+ HARDWARE_EVENT(PERF_COUNT_HW_REF_CPU_CYCLES, PerfRefCpuCycles),
+
+ // `cpu-clock` is a bit broken according to this: https://stackoverflow.com/a/56967896
+ SOFTWARE_EVENT(PERF_COUNT_SW_CPU_CLOCK, PerfCpuClock),
+ SOFTWARE_EVENT(PERF_COUNT_SW_TASK_CLOCK, PerfTaskClock),
+ SOFTWARE_EVENT(PERF_COUNT_SW_CONTEXT_SWITCHES, PerfContextSwitches),
+ SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations),
+ SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults),
+ SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults),
+
+ // Don't add them -- they are the same as SoftPageFaults and HardPageFaults,
+ // match well numerically.
+ // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor),
+ // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor),
+
+ CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS),
+ CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS),
+
+ // Apparently it doesn't make sense to treat these values as relative:
+ // https://stackoverflow.com/questions/49933319/how-to-interpret-perf-itlb-loads-itlb-load-misses
+ CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS),
+ CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBMisses, MISS),
CACHE_EVENT(PERF_COUNT_HW_CACHE_NODE, PerfLocalMemoryReferences, ACCESS),
CACHE_EVENT(PERF_COUNT_HW_CACHE_NODE, PerfLocalMemoryMisses, MISS),
-};
-
-static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS);
-
-#undef HARDWARE_EVENT
-#undef SOFTWARE_EVENT
+};
+
+static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS);
+
+#undef HARDWARE_EVENT
+#undef SOFTWARE_EVENT
#undef CACHE_EVENT
-
-// A map of event name -> event index, to parse event list in settings.
-static std::unordered_map<std::string, size_t> populateEventMap()
-{
- std::unordered_map<std::string, size_t> name_to_index;
- name_to_index.reserve(NUMBER_OF_RAW_EVENTS);
-
- for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
- {
- name_to_index.emplace(raw_events_info[i].settings_name, i);
- }
-
- return name_to_index;
-}
-
-static const auto event_name_to_index = populateEventMap();
-
-static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, UInt64 flags)
-{
- return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags));
-}
-
-static int openPerfEventDisabled(Int32 perf_event_paranoid, bool has_cap_sys_admin, UInt32 perf_event_type, UInt64 perf_event_config)
-{
- perf_event_attr pe{};
- pe.type = perf_event_type;
- pe.size = sizeof(struct perf_event_attr);
- pe.config = perf_event_config;
- // disable by default to add as little extra time as possible
- pe.disabled = 1;
- // can record kernel only when `perf_event_paranoid` <= 1 or have CAP_SYS_ADMIN
- pe.exclude_kernel = perf_event_paranoid >= 2 && !has_cap_sys_admin;
- pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
-
- return openPerfEvent(&pe, /* measure the calling thread */ 0, /* on any cpu */ -1, -1, 0);
-}
-
-static void enablePerfEvent(int event_fd)
-{
- if (ioctl(event_fd, PERF_EVENT_IOC_ENABLE, 0))
- {
- LOG_WARNING(&Poco::Logger::get("PerfEvents"),
- "Can't enable perf event with file descriptor {}: '{}' ({})",
+
+// A map of event name -> event index, to parse event list in settings.
+static std::unordered_map<std::string, size_t> populateEventMap()
+{
+ std::unordered_map<std::string, size_t> name_to_index;
+ name_to_index.reserve(NUMBER_OF_RAW_EVENTS);
+
+ for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+ {
+ name_to_index.emplace(raw_events_info[i].settings_name, i);
+ }
+
+ return name_to_index;
+}
+
+static const auto event_name_to_index = populateEventMap();
+
+static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, UInt64 flags)
+{
+ return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags));
+}
+
+static int openPerfEventDisabled(Int32 perf_event_paranoid, bool has_cap_sys_admin, UInt32 perf_event_type, UInt64 perf_event_config)
+{
+ perf_event_attr pe{};
+ pe.type = perf_event_type;
+ pe.size = sizeof(struct perf_event_attr);
+ pe.config = perf_event_config;
+ // disable by default to add as little extra time as possible
+ pe.disabled = 1;
+ // can record kernel only when `perf_event_paranoid` <= 1 or have CAP_SYS_ADMIN
+ pe.exclude_kernel = perf_event_paranoid >= 2 && !has_cap_sys_admin;
+ pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+ return openPerfEvent(&pe, /* measure the calling thread */ 0, /* on any cpu */ -1, -1, 0);
+}
+
+static void enablePerfEvent(int event_fd)
+{
+ if (ioctl(event_fd, PERF_EVENT_IOC_ENABLE, 0))
+ {
+ LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+ "Can't enable perf event with file descriptor {}: '{}' ({})",
event_fd, errnoToString(errno), errno);
- }
-}
-
-static void disablePerfEvent(int event_fd)
-{
- if (ioctl(event_fd, PERF_EVENT_IOC_DISABLE, 0))
- {
- LOG_WARNING(&Poco::Logger::get("PerfEvents"),
- "Can't disable perf event with file descriptor {}: '{}' ({})",
+ }
+}
+
+static void disablePerfEvent(int event_fd)
+{
+ if (ioctl(event_fd, PERF_EVENT_IOC_DISABLE, 0))
+ {
+ LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+ "Can't disable perf event with file descriptor {}: '{}' ({})",
event_fd, errnoToString(errno), errno);
- }
-}
-
-static void releasePerfEvent(int event_fd)
-{
- if (close(event_fd))
- {
- LOG_WARNING(&Poco::Logger::get("PerfEvents"),
- "Can't close perf event file descriptor {}: {} ({})",
+ }
+}
+
+static void releasePerfEvent(int event_fd)
+{
+ if (close(event_fd))
+ {
+ LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+ "Can't close perf event file descriptor {}: {} ({})",
event_fd, errnoToString(errno), errno);
- }
-}
-
-static bool validatePerfEventDescriptor(int & fd)
-{
- if (fcntl(fd, F_GETFL) != -1)
- return true;
-
- if (errno == EBADF)
- {
- LOG_WARNING(&Poco::Logger::get("PerfEvents"),
- "Event descriptor {} was closed from the outside; reopening", fd);
- }
- else
- {
- LOG_WARNING(&Poco::Logger::get("PerfEvents"),
- "Error while checking availability of event descriptor {}: {} ({})",
+ }
+}
+
+static bool validatePerfEventDescriptor(int & fd)
+{
+ if (fcntl(fd, F_GETFL) != -1)
+ return true;
+
+ if (errno == EBADF)
+ {
+ LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+ "Event descriptor {} was closed from the outside; reopening", fd);
+ }
+ else
+ {
+ LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+ "Error while checking availability of event descriptor {}: {} ({})",
fd, errnoToString(errno), errno);
-
- disablePerfEvent(fd);
- releasePerfEvent(fd);
- }
-
- fd = -1;
- return false;
-}
-
-bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_events_list)
-{
- const auto valid_event_indices = eventIndicesFromString(needed_events_list);
-
- // find state changes (if there are any)
- bool old_state[NUMBER_OF_RAW_EVENTS];
- for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
- old_state[i] = thread_events_descriptors_holder.descriptors[i] != -1;
-
- bool new_state[NUMBER_OF_RAW_EVENTS];
- std::fill_n(new_state, NUMBER_OF_RAW_EVENTS, false);
- for (size_t opened_index : valid_event_indices)
- new_state[opened_index] = true;
-
- std::vector<size_t> events_to_open;
- std::vector<size_t> events_to_release;
- for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
- {
- bool old_one = old_state[i];
- bool new_one = new_state[i];
-
- if (old_one == new_one)
- {
- if (old_one
- && !validatePerfEventDescriptor(
- thread_events_descriptors_holder.descriptors[i]))
- {
- events_to_open.push_back(i);
- }
- continue;
- }
-
- if (new_one)
- events_to_open.push_back(i);
- else
- events_to_release.push_back(i);
- }
-
- // release unused descriptors
- for (size_t i : events_to_release)
- {
- int & fd = thread_events_descriptors_holder.descriptors[i];
- disablePerfEvent(fd);
- releasePerfEvent(fd);
- fd = -1;
- }
-
- if (events_to_open.empty())
- {
- return true;
- }
-
- // check permissions
- // cat /proc/sys/kernel/perf_event_paranoid
- // -1: Allow use of (almost) all events by all users
- // >=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
- // >=1: Disallow CPU event access by users without CAP_SYS_ADMIN
- // >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
- // >=3: Disallow all event access by users without CAP_SYS_ADMIN
- Int32 perf_event_paranoid = 0;
- std::ifstream paranoid_file("/proc/sys/kernel/perf_event_paranoid");
- paranoid_file >> perf_event_paranoid;
-
- bool has_cap_sys_admin = hasLinuxCapability(CAP_SYS_ADMIN);
- if (perf_event_paranoid >= 3 && !has_cap_sys_admin)
- {
- LOG_WARNING(&Poco::Logger::get("PerfEvents"),
- "Not enough permissions to record perf events: "
- "perf_event_paranoid = {} and CAP_SYS_ADMIN = 0",
- perf_event_paranoid);
- return false;
- }
-
- // Open descriptors for new events.
- // Theoretically, we can run out of file descriptors. Threads go up to 10k,
- // and there might be a dozen perf events per thread, so we're looking at
- // 100k open files. In practice, this is not likely -- perf events are
- // mostly used in performance tests or other kinds of testing, and the
- // number of threads stays below hundred.
- // We used to check the number of open files by enumerating /proc/self/fd,
- // but listing all open files before opening more files is obviously
- // quadratic, and quadraticity never ends well.
- for (size_t i : events_to_open)
- {
- const PerfEventInfo & event_info = raw_events_info[i];
- int & fd = thread_events_descriptors_holder.descriptors[i];
- // disable by default to add as little extra time as possible
- fd = openPerfEventDisabled(perf_event_paranoid, has_cap_sys_admin, event_info.event_type, event_info.event_config);
-
- if (fd == -1 && errno != ENOENT)
- {
- // ENOENT means that the event is not supported. Don't log it, because
- // this is called for each thread and would be too verbose. Log other
- // error codes because they might signify an error.
- LOG_WARNING(&Poco::Logger::get("PerfEvents"),
- "Failed to open perf event {} (event_type={}, event_config={}): "
- "'{}' ({})", event_info.settings_name, event_info.event_type,
+
+ disablePerfEvent(fd);
+ releasePerfEvent(fd);
+ }
+
+ fd = -1;
+ return false;
+}
+
+bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_events_list)
+{
+ const auto valid_event_indices = eventIndicesFromString(needed_events_list);
+
+ // find state changes (if there are any)
+ bool old_state[NUMBER_OF_RAW_EVENTS];
+ for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+ old_state[i] = thread_events_descriptors_holder.descriptors[i] != -1;
+
+ bool new_state[NUMBER_OF_RAW_EVENTS];
+ std::fill_n(new_state, NUMBER_OF_RAW_EVENTS, false);
+ for (size_t opened_index : valid_event_indices)
+ new_state[opened_index] = true;
+
+ std::vector<size_t> events_to_open;
+ std::vector<size_t> events_to_release;
+ for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+ {
+ bool old_one = old_state[i];
+ bool new_one = new_state[i];
+
+ if (old_one == new_one)
+ {
+ if (old_one
+ && !validatePerfEventDescriptor(
+ thread_events_descriptors_holder.descriptors[i]))
+ {
+ events_to_open.push_back(i);
+ }
+ continue;
+ }
+
+ if (new_one)
+ events_to_open.push_back(i);
+ else
+ events_to_release.push_back(i);
+ }
+
+ // release unused descriptors
+ for (size_t i : events_to_release)
+ {
+ int & fd = thread_events_descriptors_holder.descriptors[i];
+ disablePerfEvent(fd);
+ releasePerfEvent(fd);
+ fd = -1;
+ }
+
+ if (events_to_open.empty())
+ {
+ return true;
+ }
+
+ // check permissions
+ // cat /proc/sys/kernel/perf_event_paranoid
+ // -1: Allow use of (almost) all events by all users
+ // >=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
+ // >=1: Disallow CPU event access by users without CAP_SYS_ADMIN
+ // >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+ // >=3: Disallow all event access by users without CAP_SYS_ADMIN
+ Int32 perf_event_paranoid = 0;
+ std::ifstream paranoid_file("/proc/sys/kernel/perf_event_paranoid");
+ paranoid_file >> perf_event_paranoid;
+
+ bool has_cap_sys_admin = hasLinuxCapability(CAP_SYS_ADMIN);
+ if (perf_event_paranoid >= 3 && !has_cap_sys_admin)
+ {
+ LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+ "Not enough permissions to record perf events: "
+ "perf_event_paranoid = {} and CAP_SYS_ADMIN = 0",
+ perf_event_paranoid);
+ return false;
+ }
+
+ // Open descriptors for new events.
+ // Theoretically, we can run out of file descriptors. Threads go up to 10k,
+ // and there might be a dozen perf events per thread, so we're looking at
+ // 100k open files. In practice, this is not likely -- perf events are
+ // mostly used in performance tests or other kinds of testing, and the
+ // number of threads stays below hundred.
+ // We used to check the number of open files by enumerating /proc/self/fd,
+ // but listing all open files before opening more files is obviously
+ // quadratic, and quadraticity never ends well.
+ for (size_t i : events_to_open)
+ {
+ const PerfEventInfo & event_info = raw_events_info[i];
+ int & fd = thread_events_descriptors_holder.descriptors[i];
+ // disable by default to add as little extra time as possible
+ fd = openPerfEventDisabled(perf_event_paranoid, has_cap_sys_admin, event_info.event_type, event_info.event_config);
+
+ if (fd == -1 && errno != ENOENT)
+ {
+ // ENOENT means that the event is not supported. Don't log it, because
+ // this is called for each thread and would be too verbose. Log other
+ // error codes because they might signify an error.
+ LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+ "Failed to open perf event {} (event_type={}, event_config={}): "
+ "'{}' ({})", event_info.settings_name, event_info.event_type,
event_info.event_config, errnoToString(errno), errno);
- }
- }
-
- return true;
-}
-
+ }
+ }
+
+ return true;
+}
+
// Parse comma-separated list of event names. Empty means all available events.
-std::vector<size_t> PerfEventsCounters::eventIndicesFromString(const std::string & events_list)
-{
- std::vector<size_t> result;
- result.reserve(NUMBER_OF_RAW_EVENTS);
-
- if (events_list.empty())
- {
- for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
- {
- result.push_back(i);
- }
- return result;
- }
-
+std::vector<size_t> PerfEventsCounters::eventIndicesFromString(const std::string & events_list)
+{
+ std::vector<size_t> result;
+ result.reserve(NUMBER_OF_RAW_EVENTS);
+
+ if (events_list.empty())
+ {
+ for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+ {
+ result.push_back(i);
+ }
+ return result;
+ }
+
std::istringstream iss(events_list); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
- std::string event_name;
- while (std::getline(iss, event_name, ','))
- {
+ std::string event_name;
+ while (std::getline(iss, event_name, ','))
+ {
// Allow spaces at the beginning of the token, so that you can write 'a, b'.
- event_name.erase(0, event_name.find_first_not_of(' '));
-
- auto entry = event_name_to_index.find(event_name);
- if (entry != event_name_to_index.end())
- {
- result.push_back(entry->second);
- }
- else
- {
- LOG_ERROR(&Poco::Logger::get("PerfEvents"),
- "Unknown perf event name '{}' specified in settings", event_name);
- }
- }
-
- return result;
-}
-
-void PerfEventsCounters::initializeProfileEvents(const std::string & events_list)
-{
- if (!processThreadLocalChanges(events_list))
- return;
-
- for (int fd : thread_events_descriptors_holder.descriptors)
- {
- if (fd == -1)
- continue;
-
- // We don't reset the event, because the time_running and time_enabled
- // can't be reset anyway and we have to calculate deltas.
- enablePerfEvent(fd);
- }
-}
-
-void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile_events)
-{
- // Disable all perf events.
- for (auto fd : thread_events_descriptors_holder.descriptors)
- {
- if (fd == -1)
- continue;
- disablePerfEvent(fd);
- }
-
- // Read the counter values.
- PerfEventValue current_values[NUMBER_OF_RAW_EVENTS];
- for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
- {
- int fd = thread_events_descriptors_holder.descriptors[i];
- if (fd == -1)
- continue;
-
- constexpr ssize_t bytes_to_read = sizeof(current_values[0]);
- const int bytes_read = read(fd, &current_values[i], bytes_to_read);
-
- if (bytes_read != bytes_to_read)
- {
- LOG_WARNING(&Poco::Logger::get("PerfEvents"),
- "Can't read event value from file descriptor {}: '{}' ({})",
+ event_name.erase(0, event_name.find_first_not_of(' '));
+
+ auto entry = event_name_to_index.find(event_name);
+ if (entry != event_name_to_index.end())
+ {
+ result.push_back(entry->second);
+ }
+ else
+ {
+ LOG_ERROR(&Poco::Logger::get("PerfEvents"),
+ "Unknown perf event name '{}' specified in settings", event_name);
+ }
+ }
+
+ return result;
+}
+
+void PerfEventsCounters::initializeProfileEvents(const std::string & events_list)
+{
+ if (!processThreadLocalChanges(events_list))
+ return;
+
+ for (int fd : thread_events_descriptors_holder.descriptors)
+ {
+ if (fd == -1)
+ continue;
+
+ // We don't reset the event, because the time_running and time_enabled
+ // can't be reset anyway and we have to calculate deltas.
+ enablePerfEvent(fd);
+ }
+}
+
+void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile_events)
+{
+ // Disable all perf events.
+ for (auto fd : thread_events_descriptors_holder.descriptors)
+ {
+ if (fd == -1)
+ continue;
+ disablePerfEvent(fd);
+ }
+
+ // Read the counter values.
+ PerfEventValue current_values[NUMBER_OF_RAW_EVENTS];
+ for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+ {
+ int fd = thread_events_descriptors_holder.descriptors[i];
+ if (fd == -1)
+ continue;
+
+ constexpr ssize_t bytes_to_read = sizeof(current_values[0]);
+ const int bytes_read = read(fd, &current_values[i], bytes_to_read);
+
+ if (bytes_read != bytes_to_read)
+ {
+ LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+ "Can't read event value from file descriptor {}: '{}' ({})",
fd, errnoToString(errno), errno);
- current_values[i] = {};
- }
- }
-
- // Actually process counters' values. Track the minimal time that a performance
- // counter was enabled, and the corresponding running time, to give some idea
- // about the amount of counter multiplexing.
- UInt64 min_enabled_time = -1;
- UInt64 running_time_for_min_enabled_time = 0;
-
- for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
- {
- int fd = thread_events_descriptors_holder.descriptors[i];
- if (fd == -1)
- continue;
-
- const PerfEventInfo & info = raw_events_info[i];
- const PerfEventValue & previous_value = previous_values[i];
- const PerfEventValue & current_value = current_values[i];
-
- // Account for counter multiplexing. time_running and time_enabled are
- // not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate
- // deltas from old values.
- const auto enabled = current_value.time_enabled - previous_value.time_enabled;
- const auto running = current_value.time_running - previous_value.time_running;
- const UInt64 delta = (current_value.value - previous_value.value)
- * enabled / std::max(1.f, float(running));
-
- if (min_enabled_time > enabled)
- {
- min_enabled_time = enabled;
- running_time_for_min_enabled_time = running;
- }
-
- profile_events.increment(info.profile_event, delta);
- }
-
- // If we had at least one enabled event, also show multiplexing-related
- // statistics.
- if (min_enabled_time != UInt64(-1))
- {
- profile_events.increment(ProfileEvents::PerfMinEnabledTime,
- min_enabled_time);
- profile_events.increment(ProfileEvents::PerfMinEnabledRunningTime,
- running_time_for_min_enabled_time);
- }
-
- // Store current counter values for the next profiling period.
- memcpy(previous_values, current_values, sizeof(current_values));
-}
-
-void PerfEventsCounters::closeEventDescriptors()
-{
- thread_events_descriptors_holder.releaseResources();
-}
-
-PerfDescriptorsHolder::PerfDescriptorsHolder()
-{
- for (int & descriptor : descriptors)
- descriptor = -1;
-}
-
-PerfDescriptorsHolder::~PerfDescriptorsHolder()
-{
- releaseResources();
-}
-
-void PerfDescriptorsHolder::releaseResources()
-{
- for (int & descriptor : descriptors)
- {
- if (descriptor == -1)
- continue;
-
- disablePerfEvent(descriptor);
- releasePerfEvent(descriptor);
- descriptor = -1;
- }
-}
-
-}
-
-#else
-
-namespace DB
-{
-
+ current_values[i] = {};
+ }
+ }
+
+ // Actually process counters' values. Track the minimal time that a performance
+ // counter was enabled, and the corresponding running time, to give some idea
+ // about the amount of counter multiplexing.
+ UInt64 min_enabled_time = -1;
+ UInt64 running_time_for_min_enabled_time = 0;
+
+ for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+ {
+ int fd = thread_events_descriptors_holder.descriptors[i];
+ if (fd == -1)
+ continue;
+
+ const PerfEventInfo & info = raw_events_info[i];
+ const PerfEventValue & previous_value = previous_values[i];
+ const PerfEventValue & current_value = current_values[i];
+
+ // Account for counter multiplexing. time_running and time_enabled are
+ // not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate
+ // deltas from old values.
+ const auto enabled = current_value.time_enabled - previous_value.time_enabled;
+ const auto running = current_value.time_running - previous_value.time_running;
+ const UInt64 delta = (current_value.value - previous_value.value)
+ * enabled / std::max(1.f, float(running));
+
+ if (min_enabled_time > enabled)
+ {
+ min_enabled_time = enabled;
+ running_time_for_min_enabled_time = running;
+ }
+
+ profile_events.increment(info.profile_event, delta);
+ }
+
+ // If we had at least one enabled event, also show multiplexing-related
+ // statistics.
+ if (min_enabled_time != UInt64(-1))
+ {
+ profile_events.increment(ProfileEvents::PerfMinEnabledTime,
+ min_enabled_time);
+ profile_events.increment(ProfileEvents::PerfMinEnabledRunningTime,
+ running_time_for_min_enabled_time);
+ }
+
+ // Store current counter values for the next profiling period.
+ memcpy(previous_values, current_values, sizeof(current_values));
+}
+
+void PerfEventsCounters::closeEventDescriptors()
+{
+ thread_events_descriptors_holder.releaseResources();
+}
+
+PerfDescriptorsHolder::PerfDescriptorsHolder()
+{
+ for (int & descriptor : descriptors)
+ descriptor = -1;
+}
+
+PerfDescriptorsHolder::~PerfDescriptorsHolder()
+{
+ releaseResources();
+}
+
+void PerfDescriptorsHolder::releaseResources()
+{
+ for (int & descriptor : descriptors)
+ {
+ if (descriptor == -1)
+ continue;
+
+ disablePerfEvent(descriptor);
+ releasePerfEvent(descriptor);
+ descriptor = -1;
+ }
+}
+
+}
+
+#else
+
+namespace DB
+{
+
// the functionality is disabled when we are not running on Linux.
-PerfEventsCounters current_thread_counters;
-
-}
-
-#endif
+PerfEventsCounters current_thread_counters;
+
+}
+
+#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h
index cf5196f363..416f512687 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ThreadProfileEvents.h
@@ -1,135 +1,135 @@
-#pragma once
-
+#pragma once
+
#include <common/types.h>
-#include <Common/ProfileEvents.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <pthread.h>
-#include <common/logger_useful.h>
-
-
-#if defined(__linux__)
-#include <linux/taskstats.h>
-#else
-struct taskstats {};
-#endif
-
-/** Implement ProfileEvents with statistics about resource consumption of the current thread.
- */
-
-namespace ProfileEvents
-{
- extern const Event RealTimeMicroseconds;
- extern const Event UserTimeMicroseconds;
- extern const Event SystemTimeMicroseconds;
- extern const Event SoftPageFaults;
- extern const Event HardPageFaults;
- extern const Event VoluntaryContextSwitches;
- extern const Event InvoluntaryContextSwitches;
-
-#if defined(__linux__)
- extern const Event OSIOWaitMicroseconds;
- extern const Event OSCPUWaitMicroseconds;
- extern const Event OSCPUVirtualTimeMicroseconds;
- extern const Event OSReadChars;
- extern const Event OSWriteChars;
- extern const Event OSReadBytes;
- extern const Event OSWriteBytes;
-
- extern const Event PerfCpuCycles;
- extern const Event PerfInstructions;
- extern const Event PerfCacheReferences;
- extern const Event PerfCacheMisses;
- extern const Event PerfBranchInstructions;
- extern const Event PerfBranchMisses;
- extern const Event PerfBusCycles;
- extern const Event PerfStalledCyclesFrontend;
- extern const Event PerfStalledCyclesBackend;
- extern const Event PerfRefCpuCycles;
-
- extern const Event PerfCpuClock;
- extern const Event PerfTaskClock;
- extern const Event PerfContextSwitches;
- extern const Event PerfCpuMigrations;
- extern const Event PerfAlignmentFaults;
- extern const Event PerfEmulationFaults;
- extern const Event PerfMinEnabledTime;
- extern const Event PerfMinEnabledRunningTime;
- extern const Event PerfDataTLBReferences;
- extern const Event PerfDataTLBMisses;
- extern const Event PerfInstructionTLBReferences;
- extern const Event PerfInstructionTLBMisses;
+#include <Common/ProfileEvents.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <pthread.h>
+#include <common/logger_useful.h>
+
+
+#if defined(__linux__)
+#include <linux/taskstats.h>
+#else
+struct taskstats {};
+#endif
+
+/** Implement ProfileEvents with statistics about resource consumption of the current thread.
+ */
+
+namespace ProfileEvents
+{
+ extern const Event RealTimeMicroseconds;
+ extern const Event UserTimeMicroseconds;
+ extern const Event SystemTimeMicroseconds;
+ extern const Event SoftPageFaults;
+ extern const Event HardPageFaults;
+ extern const Event VoluntaryContextSwitches;
+ extern const Event InvoluntaryContextSwitches;
+
+#if defined(__linux__)
+ extern const Event OSIOWaitMicroseconds;
+ extern const Event OSCPUWaitMicroseconds;
+ extern const Event OSCPUVirtualTimeMicroseconds;
+ extern const Event OSReadChars;
+ extern const Event OSWriteChars;
+ extern const Event OSReadBytes;
+ extern const Event OSWriteBytes;
+
+ extern const Event PerfCpuCycles;
+ extern const Event PerfInstructions;
+ extern const Event PerfCacheReferences;
+ extern const Event PerfCacheMisses;
+ extern const Event PerfBranchInstructions;
+ extern const Event PerfBranchMisses;
+ extern const Event PerfBusCycles;
+ extern const Event PerfStalledCyclesFrontend;
+ extern const Event PerfStalledCyclesBackend;
+ extern const Event PerfRefCpuCycles;
+
+ extern const Event PerfCpuClock;
+ extern const Event PerfTaskClock;
+ extern const Event PerfContextSwitches;
+ extern const Event PerfCpuMigrations;
+ extern const Event PerfAlignmentFaults;
+ extern const Event PerfEmulationFaults;
+ extern const Event PerfMinEnabledTime;
+ extern const Event PerfMinEnabledRunningTime;
+ extern const Event PerfDataTLBReferences;
+ extern const Event PerfDataTLBMisses;
+ extern const Event PerfInstructionTLBReferences;
+ extern const Event PerfInstructionTLBMisses;
extern const Event PerfLocalMemoryReferences;
extern const Event PerfLocalMemoryMisses;
-#endif
-}
-
-namespace DB
-{
-
-/// Handles overflow
-template <typename TUInt>
-inline TUInt safeDiff(TUInt prev, TUInt curr)
-{
- return curr >= prev ? curr - prev : 0;
-}
-
-
-struct RUsageCounters
-{
- /// In nanoseconds
- UInt64 real_time = 0;
- UInt64 user_time = 0;
- UInt64 sys_time = 0;
-
- UInt64 soft_page_faults = 0;
- UInt64 hard_page_faults = 0;
-
- RUsageCounters() = default;
- RUsageCounters(const ::rusage & rusage_, UInt64 real_time_)
- {
- set(rusage_, real_time_);
- }
-
- void set(const ::rusage & rusage, UInt64 real_time_)
- {
- real_time = real_time_;
- user_time = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL;
- sys_time = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL;
-
- soft_page_faults = static_cast<UInt64>(rusage.ru_minflt);
- hard_page_faults = static_cast<UInt64>(rusage.ru_majflt);
- }
-
+#endif
+}
+
+namespace DB
+{
+
+/// Handles overflow
+template <typename TUInt>
+inline TUInt safeDiff(TUInt prev, TUInt curr)
+{
+ return curr >= prev ? curr - prev : 0;
+}
+
+
+struct RUsageCounters
+{
+ /// In nanoseconds
+ UInt64 real_time = 0;
+ UInt64 user_time = 0;
+ UInt64 sys_time = 0;
+
+ UInt64 soft_page_faults = 0;
+ UInt64 hard_page_faults = 0;
+
+ RUsageCounters() = default;
+ RUsageCounters(const ::rusage & rusage_, UInt64 real_time_)
+ {
+ set(rusage_, real_time_);
+ }
+
+ void set(const ::rusage & rusage, UInt64 real_time_)
+ {
+ real_time = real_time_;
+ user_time = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL;
+ sys_time = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL;
+
+ soft_page_faults = static_cast<UInt64>(rusage.ru_minflt);
+ hard_page_faults = static_cast<UInt64>(rusage.ru_majflt);
+ }
+
static RUsageCounters current()
- {
- ::rusage rusage {};
-#if !defined(__APPLE__)
+ {
+ ::rusage rusage {};
+#if !defined(__APPLE__)
#if defined(OS_SUNOS)
::getrusage(RUSAGE_LWP, &rusage);
#else
- ::getrusage(RUSAGE_THREAD, &rusage);
+ ::getrusage(RUSAGE_THREAD, &rusage);
#endif // OS_SUNOS
#endif // __APPLE
return RUsageCounters(rusage, getClockMonotonic());
- }
-
- static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events)
- {
- profile_events.increment(ProfileEvents::RealTimeMicroseconds, (curr.real_time - prev.real_time) / 1000U);
- profile_events.increment(ProfileEvents::UserTimeMicroseconds, (curr.user_time - prev.user_time) / 1000U);
- profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U);
-
- profile_events.increment(ProfileEvents::SoftPageFaults, curr.soft_page_faults - prev.soft_page_faults);
- profile_events.increment(ProfileEvents::HardPageFaults, curr.hard_page_faults - prev.hard_page_faults);
- }
-
- static void updateProfileEvents(RUsageCounters & last_counters, ProfileEvents::Counters & profile_events)
- {
- auto current_counters = current();
- incrementProfileEvents(last_counters, current_counters, profile_events);
- last_counters = current_counters;
- }
+ }
+
+ static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events)
+ {
+ profile_events.increment(ProfileEvents::RealTimeMicroseconds, (curr.real_time - prev.real_time) / 1000U);
+ profile_events.increment(ProfileEvents::UserTimeMicroseconds, (curr.user_time - prev.user_time) / 1000U);
+ profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U);
+
+ profile_events.increment(ProfileEvents::SoftPageFaults, curr.soft_page_faults - prev.soft_page_faults);
+ profile_events.increment(ProfileEvents::HardPageFaults, curr.hard_page_faults - prev.hard_page_faults);
+ }
+
+ static void updateProfileEvents(RUsageCounters & last_counters, ProfileEvents::Counters & profile_events)
+ {
+ auto current_counters = current();
+ incrementProfileEvents(last_counters, current_counters, profile_events);
+ last_counters = current_counters;
+ }
private:
static inline UInt64 getClockMonotonic()
@@ -138,121 +138,121 @@ private:
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
-};
-
+};
+
#if defined(__linux__)
-
-struct PerfEventInfo
-{
- // see perf_event.h/perf_type_id enum
- int event_type;
- // see configs in perf_event.h
- int event_config;
- ProfileEvents::Event profile_event;
- std::string settings_name;
-};
-
-struct PerfEventValue
-{
- UInt64 value = 0;
- UInt64 time_enabled = 0;
- UInt64 time_running = 0;
-};
-
+
+struct PerfEventInfo
+{
+ // see perf_event.h/perf_type_id enum
+ int event_type;
+ // see configs in perf_event.h
+ int event_config;
+ ProfileEvents::Event profile_event;
+ std::string settings_name;
+};
+
+struct PerfEventValue
+{
+ UInt64 value = 0;
+ UInt64 time_enabled = 0;
+ UInt64 time_running = 0;
+};
+
static constexpr size_t NUMBER_OF_RAW_EVENTS = 22;
-
-struct PerfDescriptorsHolder : boost::noncopyable
-{
- int descriptors[NUMBER_OF_RAW_EVENTS]{};
-
- PerfDescriptorsHolder();
-
- ~PerfDescriptorsHolder();
-
- void releaseResources();
-};
-
-struct PerfEventsCounters
-{
- PerfDescriptorsHolder thread_events_descriptors_holder;
-
- // time_enabled and time_running can't be reset, so we have to store the
- // data from the previous profiling period and calculate deltas to them,
- // to be able to properly account for counter multiplexing.
- PerfEventValue previous_values[NUMBER_OF_RAW_EVENTS]{};
-
-
- void initializeProfileEvents(const std::string & events_list);
- void finalizeProfileEvents(ProfileEvents::Counters & profile_events);
- void closeEventDescriptors();
- bool processThreadLocalChanges(const std::string & needed_events_list);
-
-
- static std::vector<size_t> eventIndicesFromString(const std::string & events_list);
-};
-
-// Perf event creation is moderately heavy, so we create them once per thread and
-// then reuse.
-extern thread_local PerfEventsCounters current_thread_counters;
-
-#else
-
+
+struct PerfDescriptorsHolder : boost::noncopyable
+{
+ int descriptors[NUMBER_OF_RAW_EVENTS]{};
+
+ PerfDescriptorsHolder();
+
+ ~PerfDescriptorsHolder();
+
+ void releaseResources();
+};
+
+struct PerfEventsCounters
+{
+ PerfDescriptorsHolder thread_events_descriptors_holder;
+
+ // time_enabled and time_running can't be reset, so we have to store the
+ // data from the previous profiling period and calculate deltas to them,
+ // to be able to properly account for counter multiplexing.
+ PerfEventValue previous_values[NUMBER_OF_RAW_EVENTS]{};
+
+
+ void initializeProfileEvents(const std::string & events_list);
+ void finalizeProfileEvents(ProfileEvents::Counters & profile_events);
+ void closeEventDescriptors();
+ bool processThreadLocalChanges(const std::string & needed_events_list);
+
+
+ static std::vector<size_t> eventIndicesFromString(const std::string & events_list);
+};
+
+// Perf event creation is moderately heavy, so we create them once per thread and
+// then reuse.
+extern thread_local PerfEventsCounters current_thread_counters;
+
+#else
+
// the functionality is disabled when we are not running on Linux.
-struct PerfEventsCounters
-{
- void initializeProfileEvents(const std::string & /* events_list */) {}
- void finalizeProfileEvents(ProfileEvents::Counters & /* profile_events */) {}
- void closeEventDescriptors() {}
-};
-
-extern PerfEventsCounters current_thread_counters;
-
-#endif
-
-#if defined(__linux__)
-
-class TasksStatsCounters
-{
-public:
- static bool checkIfAvailable();
- static std::unique_ptr<TasksStatsCounters> create(const UInt64 tid);
-
- void reset();
- void updateCounters(ProfileEvents::Counters & profile_events);
-
-private:
- ::taskstats stats; //-V730_NOINIT
- std::function<::taskstats()> stats_getter;
-
- enum class MetricsProvider
- {
- None,
- Procfs,
- Netlink
- };
-
-private:
- explicit TasksStatsCounters(const UInt64 tid, const MetricsProvider provider);
-
- static MetricsProvider findBestAvailableProvider();
- static void incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events);
-};
-
-#else
-
-class TasksStatsCounters
-{
-public:
- static bool checkIfAvailable() { return false; }
- static std::unique_ptr<TasksStatsCounters> create(const UInt64 /*tid*/) { return {}; }
-
- void reset() {}
- void updateCounters(ProfileEvents::Counters &) {}
-
-private:
- TasksStatsCounters(const UInt64 /*tid*/) {}
-};
-
-#endif
-
-}
+struct PerfEventsCounters
+{
+ void initializeProfileEvents(const std::string & /* events_list */) {}
+ void finalizeProfileEvents(ProfileEvents::Counters & /* profile_events */) {}
+ void closeEventDescriptors() {}
+};
+
+extern PerfEventsCounters current_thread_counters;
+
+#endif
+
+#if defined(__linux__)
+
+class TasksStatsCounters
+{
+public:
+ static bool checkIfAvailable();
+ static std::unique_ptr<TasksStatsCounters> create(const UInt64 tid);
+
+ void reset();
+ void updateCounters(ProfileEvents::Counters & profile_events);
+
+private:
+ ::taskstats stats; //-V730_NOINIT
+ std::function<::taskstats()> stats_getter;
+
+ enum class MetricsProvider
+ {
+ None,
+ Procfs,
+ Netlink
+ };
+
+private:
+ explicit TasksStatsCounters(const UInt64 tid, const MetricsProvider provider);
+
+ static MetricsProvider findBestAvailableProvider();
+ static void incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events);
+};
+
+#else
+
+class TasksStatsCounters
+{
+public:
+ static bool checkIfAvailable() { return false; }
+ static std::unique_ptr<TasksStatsCounters> create(const UInt64 /*tid*/) { return {}; }
+
+ void reset() {}
+ void updateCounters(ProfileEvents::Counters &) {}
+
+private:
+ TasksStatsCounters(const UInt64 /*tid*/) {}
+};
+
+#endif
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h
index 33be285306..cae14b7e96 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h
@@ -1,37 +1,37 @@
-#pragma once
-
-#include <common/types.h>
-#include <future>
-#include <memory>
-#include <vector>
-#include <Common/ZooKeeper/IKeeper.h>
-#include <Poco/Event.h>
-
-
-namespace zkutil
-{
-
-using Strings = std::vector<std::string>;
-
-
-namespace CreateMode
-{
- extern const int Persistent;
- extern const int Ephemeral;
- extern const int EphemeralSequential;
- extern const int PersistentSequential;
-}
-
-using EventPtr = std::shared_ptr<Poco::Event>;
-
-/// Gets multiple asynchronous results
-/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse
-template <typename R>
-using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>;
-
-Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode);
-Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version);
-Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version);
-Coordination::RequestPtr makeCheckRequest(const std::string & path, int version);
-
-}
+#pragma once
+
+#include <common/types.h>
+#include <future>
+#include <memory>
+#include <vector>
+#include <Common/ZooKeeper/IKeeper.h>
+#include <Poco/Event.h>
+
+
+namespace zkutil
+{
+
+using Strings = std::vector<std::string>;
+
+
+namespace CreateMode
+{
+ extern const int Persistent;
+ extern const int Ephemeral;
+ extern const int EphemeralSequential;
+ extern const int PersistentSequential;
+}
+
+using EventPtr = std::shared_ptr<Poco::Event>;
+
+/// Gets multiple asynchronous results
+/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse
+template <typename R>
+using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>;
+
+Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode);
+Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version);
+Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version);
+Coordination::RequestPtr makeCheckRequest(const std::string & path, int version);
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp
index 4f0c5efe68..08305276a8 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.cpp
@@ -1,164 +1,164 @@
-#include <Common/ProfileEvents.h>
-#include <Common/ZooKeeper/IKeeper.h>
-
-
-namespace DB
-{
- namespace ErrorCodes
- {
- extern const int KEEPER_EXCEPTION;
- }
-}
-
-namespace ProfileEvents
-{
- extern const Event ZooKeeperUserExceptions;
- extern const Event ZooKeeperHardwareExceptions;
- extern const Event ZooKeeperOtherExceptions;
-}
-
-
-namespace Coordination
-{
-
-Exception::Exception(const std::string & msg, const Error code_, int)
- : DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION), code(code_)
-{
- if (Coordination::isUserError(code))
- ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions);
- else if (Coordination::isHardwareError(code))
- ProfileEvents::increment(ProfileEvents::ZooKeeperHardwareExceptions);
- else
- ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions);
-}
-
-Exception::Exception(const std::string & msg, const Error code_)
- : Exception(msg + " (" + errorMessage(code_) + ")", code_, 0)
-{
-}
-
-Exception::Exception(const Error code_)
- : Exception(errorMessage(code_), code_, 0)
-{
-}
-
-Exception::Exception(const Error code_, const std::string & path)
- : Exception(std::string{errorMessage(code_)} + ", path: " + path, code_, 0)
-{
-}
-
-Exception::Exception(const Exception & exc) = default;
-
-
-using namespace DB;
-
-
-static void addRootPath(String & path, const String & root_path)
-{
- if (path.empty())
- throw Exception("Path cannot be empty", Error::ZBADARGUMENTS);
-
- if (path[0] != '/')
+#include <Common/ProfileEvents.h>
+#include <Common/ZooKeeper/IKeeper.h>
+
+
+namespace DB
+{
+ namespace ErrorCodes
+ {
+ extern const int KEEPER_EXCEPTION;
+ }
+}
+
+namespace ProfileEvents
+{
+ extern const Event ZooKeeperUserExceptions;
+ extern const Event ZooKeeperHardwareExceptions;
+ extern const Event ZooKeeperOtherExceptions;
+}
+
+
+namespace Coordination
+{
+
+Exception::Exception(const std::string & msg, const Error code_, int)
+ : DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION), code(code_)
+{
+ if (Coordination::isUserError(code))
+ ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions);
+ else if (Coordination::isHardwareError(code))
+ ProfileEvents::increment(ProfileEvents::ZooKeeperHardwareExceptions);
+ else
+ ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions);
+}
+
+Exception::Exception(const std::string & msg, const Error code_)
+ : Exception(msg + " (" + errorMessage(code_) + ")", code_, 0)
+{
+}
+
+Exception::Exception(const Error code_)
+ : Exception(errorMessage(code_), code_, 0)
+{
+}
+
+Exception::Exception(const Error code_, const std::string & path)
+ : Exception(std::string{errorMessage(code_)} + ", path: " + path, code_, 0)
+{
+}
+
+Exception::Exception(const Exception & exc) = default;
+
+
+using namespace DB;
+
+
+static void addRootPath(String & path, const String & root_path)
+{
+ if (path.empty())
+ throw Exception("Path cannot be empty", Error::ZBADARGUMENTS);
+
+ if (path[0] != '/')
throw Exception("Path must begin with /, got " + path, Error::ZBADARGUMENTS);
-
- if (root_path.empty())
- return;
-
- if (path.size() == 1) /// "/"
- path = root_path;
- else
- path = root_path + path;
-}
-
-static void removeRootPath(String & path, const String & root_path)
-{
- if (root_path.empty())
- return;
-
- if (path.size() <= root_path.size())
- throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY);
-
- path = path.substr(root_path.size());
-}
-
-
-const char * errorMessage(Error code)
-{
- switch (code)
- {
- case Error::ZOK: return "Ok";
- case Error::ZSYSTEMERROR: return "System error";
- case Error::ZRUNTIMEINCONSISTENCY: return "Run time inconsistency";
- case Error::ZDATAINCONSISTENCY: return "Data inconsistency";
- case Error::ZCONNECTIONLOSS: return "Connection loss";
- case Error::ZMARSHALLINGERROR: return "Marshalling error";
- case Error::ZUNIMPLEMENTED: return "Unimplemented";
- case Error::ZOPERATIONTIMEOUT: return "Operation timeout";
- case Error::ZBADARGUMENTS: return "Bad arguments";
- case Error::ZINVALIDSTATE: return "Invalid zhandle state";
- case Error::ZAPIERROR: return "API error";
- case Error::ZNONODE: return "No node";
- case Error::ZNOAUTH: return "Not authenticated";
- case Error::ZBADVERSION: return "Bad version";
- case Error::ZNOCHILDRENFOREPHEMERALS: return "No children for ephemerals";
- case Error::ZNODEEXISTS: return "Node exists";
- case Error::ZNOTEMPTY: return "Not empty";
- case Error::ZSESSIONEXPIRED: return "Session expired";
- case Error::ZINVALIDCALLBACK: return "Invalid callback";
- case Error::ZINVALIDACL: return "Invalid ACL";
- case Error::ZAUTHFAILED: return "Authentication failed";
- case Error::ZCLOSING: return "ZooKeeper is closing";
- case Error::ZNOTHING: return "(not error) no server responses to process";
- case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored";
- }
-
- __builtin_unreachable();
-}
-
-bool isHardwareError(Error zk_return_code)
-{
- return zk_return_code == Error::ZINVALIDSTATE
- || zk_return_code == Error::ZSESSIONEXPIRED
- || zk_return_code == Error::ZSESSIONMOVED
- || zk_return_code == Error::ZCONNECTIONLOSS
- || zk_return_code == Error::ZMARSHALLINGERROR
- || zk_return_code == Error::ZOPERATIONTIMEOUT;
-}
-
-bool isUserError(Error zk_return_code)
-{
- return zk_return_code == Error::ZNONODE
- || zk_return_code == Error::ZBADVERSION
- || zk_return_code == Error::ZNOCHILDRENFOREPHEMERALS
- || zk_return_code == Error::ZNODEEXISTS
- || zk_return_code == Error::ZNOTEMPTY;
-}
-
-
-void CreateRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
-void RemoveRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
-void ExistsRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
-void GetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
-void SetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
-void ListRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
-void CheckRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
+
+ if (root_path.empty())
+ return;
+
+ if (path.size() == 1) /// "/"
+ path = root_path;
+ else
+ path = root_path + path;
+}
+
+static void removeRootPath(String & path, const String & root_path)
+{
+ if (root_path.empty())
+ return;
+
+ if (path.size() <= root_path.size())
+ throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY);
+
+ path = path.substr(root_path.size());
+}
+
+
+const char * errorMessage(Error code)
+{
+ switch (code)
+ {
+ case Error::ZOK: return "Ok";
+ case Error::ZSYSTEMERROR: return "System error";
+ case Error::ZRUNTIMEINCONSISTENCY: return "Run time inconsistency";
+ case Error::ZDATAINCONSISTENCY: return "Data inconsistency";
+ case Error::ZCONNECTIONLOSS: return "Connection loss";
+ case Error::ZMARSHALLINGERROR: return "Marshalling error";
+ case Error::ZUNIMPLEMENTED: return "Unimplemented";
+ case Error::ZOPERATIONTIMEOUT: return "Operation timeout";
+ case Error::ZBADARGUMENTS: return "Bad arguments";
+ case Error::ZINVALIDSTATE: return "Invalid zhandle state";
+ case Error::ZAPIERROR: return "API error";
+ case Error::ZNONODE: return "No node";
+ case Error::ZNOAUTH: return "Not authenticated";
+ case Error::ZBADVERSION: return "Bad version";
+ case Error::ZNOCHILDRENFOREPHEMERALS: return "No children for ephemerals";
+ case Error::ZNODEEXISTS: return "Node exists";
+ case Error::ZNOTEMPTY: return "Not empty";
+ case Error::ZSESSIONEXPIRED: return "Session expired";
+ case Error::ZINVALIDCALLBACK: return "Invalid callback";
+ case Error::ZINVALIDACL: return "Invalid ACL";
+ case Error::ZAUTHFAILED: return "Authentication failed";
+ case Error::ZCLOSING: return "ZooKeeper is closing";
+ case Error::ZNOTHING: return "(not error) no server responses to process";
+ case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored";
+ }
+
+ __builtin_unreachable();
+}
+
+bool isHardwareError(Error zk_return_code)
+{
+ return zk_return_code == Error::ZINVALIDSTATE
+ || zk_return_code == Error::ZSESSIONEXPIRED
+ || zk_return_code == Error::ZSESSIONMOVED
+ || zk_return_code == Error::ZCONNECTIONLOSS
+ || zk_return_code == Error::ZMARSHALLINGERROR
+ || zk_return_code == Error::ZOPERATIONTIMEOUT;
+}
+
+bool isUserError(Error zk_return_code)
+{
+ return zk_return_code == Error::ZNONODE
+ || zk_return_code == Error::ZBADVERSION
+ || zk_return_code == Error::ZNOCHILDRENFOREPHEMERALS
+ || zk_return_code == Error::ZNODEEXISTS
+ || zk_return_code == Error::ZNOTEMPTY;
+}
+
+
+void CreateRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
+void RemoveRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
+void ExistsRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
+void GetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
+void SetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
+void ListRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
+void CheckRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void SetACLRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void GetACLRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
-
-void MultiRequest::addRootPath(const String & root_path)
-{
- for (auto & request : requests)
- request->addRootPath(root_path);
-}
-
-void CreateResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path_created, root_path); }
-void WatchResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path, root_path); }
-
-void MultiResponse::removeRootPath(const String & root_path)
-{
- for (auto & response : responses)
- response->removeRootPath(root_path);
-}
-
-}
-
+
+void MultiRequest::addRootPath(const String & root_path)
+{
+ for (auto & request : requests)
+ request->addRootPath(root_path);
+}
+
+void CreateResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path_created, root_path); }
+void WatchResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path, root_path); }
+
+void MultiResponse::removeRootPath(const String & root_path)
+{
+ for (auto & response : responses)
+ response->removeRootPath(root_path);
+}
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h
index 30d816aad1..1a49a78caf 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/IKeeper.h
@@ -1,153 +1,153 @@
-#pragma once
-
+#pragma once
+
#include <common/types.h>
-#include <Common/Exception.h>
-
-#include <vector>
-#include <memory>
-#include <cstdint>
-#include <functional>
-
-/** Generic interface for ZooKeeper-like services.
- * Possible examples are:
- * - ZooKeeper client itself;
- * - fake ZooKeeper client for testing;
- * - ZooKeeper emulation layer on top of Etcd, FoundationDB, whatever.
- */
-
-
-namespace Coordination
-{
-
-using namespace DB;
-
-
-struct ACL
-{
- static constexpr int32_t Read = 1;
- static constexpr int32_t Write = 2;
- static constexpr int32_t Create = 4;
- static constexpr int32_t Delete = 8;
- static constexpr int32_t Admin = 16;
- static constexpr int32_t All = 0x1F;
-
- int32_t permissions;
- String scheme;
- String id;
-};
-
-using ACLs = std::vector<ACL>;
-
-struct Stat
-{
- int64_t czxid;
- int64_t mzxid;
- int64_t ctime;
- int64_t mtime;
- int32_t version;
- int32_t cversion;
- int32_t aversion;
- int64_t ephemeralOwner;
- int32_t dataLength;
- int32_t numChildren;
- int64_t pzxid;
-};
-
-enum class Error : int32_t
-{
- ZOK = 0,
-
- /** System and server-side errors.
- * This is never thrown by the server, it shouldn't be used other than
- * to indicate a range. Specifically error codes greater than this
- * value, but lesser than ZAPIERROR, are system errors.
- */
- ZSYSTEMERROR = -1,
-
- ZRUNTIMEINCONSISTENCY = -2, /// A runtime inconsistency was found
- ZDATAINCONSISTENCY = -3, /// A data inconsistency was found
- ZCONNECTIONLOSS = -4, /// Connection to the server has been lost
- ZMARSHALLINGERROR = -5, /// Error while marshalling or unmarshalling data
- ZUNIMPLEMENTED = -6, /// Operation is unimplemented
- ZOPERATIONTIMEOUT = -7, /// Operation timeout
- ZBADARGUMENTS = -8, /// Invalid arguments
- ZINVALIDSTATE = -9, /// Invliad zhandle state
-
- /** API errors.
- * This is never thrown by the server, it shouldn't be used other than
- * to indicate a range. Specifically error codes greater than this
- * value are API errors.
- */
- ZAPIERROR = -100,
-
- ZNONODE = -101, /// Node does not exist
- ZNOAUTH = -102, /// Not authenticated
- ZBADVERSION = -103, /// Version conflict
- ZNOCHILDRENFOREPHEMERALS = -108, /// Ephemeral nodes may not have children
- ZNODEEXISTS = -110, /// The node already exists
- ZNOTEMPTY = -111, /// The node has children
- ZSESSIONEXPIRED = -112, /// The session has been expired by the server
- ZINVALIDCALLBACK = -113, /// Invalid callback specified
- ZINVALIDACL = -114, /// Invalid ACL specified
- ZAUTHFAILED = -115, /// Client authentication failed
- ZCLOSING = -116, /// ZooKeeper is closing
- ZNOTHING = -117, /// (not error) no server responses to process
- ZSESSIONMOVED = -118 /// Session moved to another server, so operation is ignored
-};
-
-/// Network errors and similar. You should reinitialize ZooKeeper session in case of these errors
-bool isHardwareError(Error code);
-
-/// Valid errors sent from the server about database state (like "no node"). Logical and authentication errors (like "bad arguments") are not here.
-bool isUserError(Error code);
-
-const char * errorMessage(Error code);
-
-
-struct Request;
-using RequestPtr = std::shared_ptr<Request>;
-using Requests = std::vector<RequestPtr>;
-
-struct Request
-{
- Request() = default;
- Request(const Request &) = default;
- Request & operator=(const Request &) = default;
- virtual ~Request() = default;
- virtual String getPath() const = 0;
- virtual void addRootPath(const String & /* root_path */) {}
+#include <Common/Exception.h>
+
+#include <vector>
+#include <memory>
+#include <cstdint>
+#include <functional>
+
+/** Generic interface for ZooKeeper-like services.
+ * Possible examples are:
+ * - ZooKeeper client itself;
+ * - fake ZooKeeper client for testing;
+ * - ZooKeeper emulation layer on top of Etcd, FoundationDB, whatever.
+ */
+
+
+namespace Coordination
+{
+
+using namespace DB;
+
+
+struct ACL
+{
+ static constexpr int32_t Read = 1;
+ static constexpr int32_t Write = 2;
+ static constexpr int32_t Create = 4;
+ static constexpr int32_t Delete = 8;
+ static constexpr int32_t Admin = 16;
+ static constexpr int32_t All = 0x1F;
+
+ int32_t permissions;
+ String scheme;
+ String id;
+};
+
+using ACLs = std::vector<ACL>;
+
+struct Stat
+{
+ int64_t czxid;
+ int64_t mzxid;
+ int64_t ctime;
+ int64_t mtime;
+ int32_t version;
+ int32_t cversion;
+ int32_t aversion;
+ int64_t ephemeralOwner;
+ int32_t dataLength;
+ int32_t numChildren;
+ int64_t pzxid;
+};
+
+enum class Error : int32_t
+{
+ ZOK = 0,
+
+ /** System and server-side errors.
+ * This is never thrown by the server, it shouldn't be used other than
+ * to indicate a range. Specifically error codes greater than this
+ * value, but lesser than ZAPIERROR, are system errors.
+ */
+ ZSYSTEMERROR = -1,
+
+ ZRUNTIMEINCONSISTENCY = -2, /// A runtime inconsistency was found
+ ZDATAINCONSISTENCY = -3, /// A data inconsistency was found
+ ZCONNECTIONLOSS = -4, /// Connection to the server has been lost
+ ZMARSHALLINGERROR = -5, /// Error while marshalling or unmarshalling data
+ ZUNIMPLEMENTED = -6, /// Operation is unimplemented
+ ZOPERATIONTIMEOUT = -7, /// Operation timeout
+ ZBADARGUMENTS = -8, /// Invalid arguments
+ ZINVALIDSTATE = -9, /// Invliad zhandle state
+
+ /** API errors.
+ * This is never thrown by the server, it shouldn't be used other than
+ * to indicate a range. Specifically error codes greater than this
+ * value are API errors.
+ */
+ ZAPIERROR = -100,
+
+ ZNONODE = -101, /// Node does not exist
+ ZNOAUTH = -102, /// Not authenticated
+ ZBADVERSION = -103, /// Version conflict
+ ZNOCHILDRENFOREPHEMERALS = -108, /// Ephemeral nodes may not have children
+ ZNODEEXISTS = -110, /// The node already exists
+ ZNOTEMPTY = -111, /// The node has children
+ ZSESSIONEXPIRED = -112, /// The session has been expired by the server
+ ZINVALIDCALLBACK = -113, /// Invalid callback specified
+ ZINVALIDACL = -114, /// Invalid ACL specified
+ ZAUTHFAILED = -115, /// Client authentication failed
+ ZCLOSING = -116, /// ZooKeeper is closing
+ ZNOTHING = -117, /// (not error) no server responses to process
+ ZSESSIONMOVED = -118 /// Session moved to another server, so operation is ignored
+};
+
+/// Network errors and similar. You should reinitialize ZooKeeper session in case of these errors
+bool isHardwareError(Error code);
+
+/// Valid errors sent from the server about database state (like "no node"). Logical and authentication errors (like "bad arguments") are not here.
+bool isUserError(Error code);
+
+const char * errorMessage(Error code);
+
+
+struct Request;
+using RequestPtr = std::shared_ptr<Request>;
+using Requests = std::vector<RequestPtr>;
+
+struct Request
+{
+ Request() = default;
+ Request(const Request &) = default;
+ Request & operator=(const Request &) = default;
+ virtual ~Request() = default;
+ virtual String getPath() const = 0;
+ virtual void addRootPath(const String & /* root_path */) {}
virtual size_t bytesSize() const { return 0; }
-};
-
-struct Response;
-using ResponsePtr = std::shared_ptr<Response>;
-using Responses = std::vector<ResponsePtr>;
-using ResponseCallback = std::function<void(const Response &)>;
-
-struct Response
-{
- Error error = Error::ZOK;
- Response() = default;
- Response(const Response &) = default;
- Response & operator=(const Response &) = default;
- virtual ~Response() = default;
- virtual void removeRootPath(const String & /* root_path */) {}
+};
+
+struct Response;
+using ResponsePtr = std::shared_ptr<Response>;
+using Responses = std::vector<ResponsePtr>;
+using ResponseCallback = std::function<void(const Response &)>;
+
+struct Response
+{
+ Error error = Error::ZOK;
+ Response() = default;
+ Response(const Response &) = default;
+ Response & operator=(const Response &) = default;
+ virtual ~Response() = default;
+ virtual void removeRootPath(const String & /* root_path */) {}
virtual size_t bytesSize() const { return 0; }
-};
-
-struct WatchResponse : virtual Response
-{
- int32_t type = 0;
- int32_t state = 0;
- String path;
-
- void removeRootPath(const String & root_path) override;
+};
+
+struct WatchResponse : virtual Response
+{
+ int32_t type = 0;
+ int32_t state = 0;
+ String path;
+
+ void removeRootPath(const String & root_path) override;
size_t bytesSize() const override { return path.size() + sizeof(type) + sizeof(state); }
-};
-
-using WatchCallback = std::function<void(const WatchResponse &)>;
-
+};
+
+using WatchCallback = std::function<void(const WatchResponse &)>;
+
struct SetACLRequest : virtual Request
{
String path;
@@ -182,113 +182,113 @@ struct GetACLResponse : virtual Response
size_t bytesSize() const override { return sizeof(Stat) + acl.size() * sizeof(ACL); }
};
-struct CreateRequest : virtual Request
-{
- String path;
- String data;
- bool is_ephemeral = false;
- bool is_sequential = false;
- ACLs acls;
-
- void addRootPath(const String & root_path) override;
- String getPath() const override { return path; }
+struct CreateRequest : virtual Request
+{
+ String path;
+ String data;
+ bool is_ephemeral = false;
+ bool is_sequential = false;
+ ACLs acls;
+
+ void addRootPath(const String & root_path) override;
+ String getPath() const override { return path; }
size_t bytesSize() const override { return path.size() + data.size()
+ sizeof(is_ephemeral) + sizeof(is_sequential) + acls.size() * sizeof(ACL); }
-};
-
-struct CreateResponse : virtual Response
-{
- String path_created;
-
- void removeRootPath(const String & root_path) override;
+};
+
+struct CreateResponse : virtual Response
+{
+ String path_created;
+
+ void removeRootPath(const String & root_path) override;
size_t bytesSize() const override { return path_created.size(); }
-};
-
-struct RemoveRequest : virtual Request
-{
- String path;
- int32_t version = -1;
-
- void addRootPath(const String & root_path) override;
- String getPath() const override { return path; }
+};
+
+struct RemoveRequest : virtual Request
+{
+ String path;
+ int32_t version = -1;
+
+ void addRootPath(const String & root_path) override;
+ String getPath() const override { return path; }
size_t bytesSize() const override { return path.size() + sizeof(version); }
-};
-
-struct RemoveResponse : virtual Response
-{
-};
-
-struct ExistsRequest : virtual Request
-{
- String path;
-
- void addRootPath(const String & root_path) override;
- String getPath() const override { return path; }
+};
+
+struct RemoveResponse : virtual Response
+{
+};
+
+struct ExistsRequest : virtual Request
+{
+ String path;
+
+ void addRootPath(const String & root_path) override;
+ String getPath() const override { return path; }
size_t bytesSize() const override { return path.size(); }
-};
-
-struct ExistsResponse : virtual Response
-{
- Stat stat;
+};
+
+struct ExistsResponse : virtual Response
+{
+ Stat stat;
size_t bytesSize() const override { return sizeof(Stat); }
-};
-
-struct GetRequest : virtual Request
-{
- String path;
-
- void addRootPath(const String & root_path) override;
- String getPath() const override { return path; }
+};
+
+struct GetRequest : virtual Request
+{
+ String path;
+
+ void addRootPath(const String & root_path) override;
+ String getPath() const override { return path; }
size_t bytesSize() const override { return path.size(); }
-};
-
-struct GetResponse : virtual Response
-{
- String data;
- Stat stat;
+};
+
+struct GetResponse : virtual Response
+{
+ String data;
+ Stat stat;
size_t bytesSize() const override { return data.size() + sizeof(stat); }
-};
-
-struct SetRequest : virtual Request
-{
- String path;
- String data;
- int32_t version = -1;
-
- void addRootPath(const String & root_path) override;
- String getPath() const override { return path; }
+};
+
+struct SetRequest : virtual Request
+{
+ String path;
+ String data;
+ int32_t version = -1;
+
+ void addRootPath(const String & root_path) override;
+ String getPath() const override { return path; }
size_t bytesSize() const override { return data.size() + data.size() + sizeof(version); }
-};
-
-struct SetResponse : virtual Response
-{
- Stat stat;
+};
+
+struct SetResponse : virtual Response
+{
+ Stat stat;
size_t bytesSize() const override { return sizeof(stat); }
-};
-
-struct ListRequest : virtual Request
-{
- String path;
-
- void addRootPath(const String & root_path) override;
- String getPath() const override { return path; }
+};
+
+struct ListRequest : virtual Request
+{
+ String path;
+
+ void addRootPath(const String & root_path) override;
+ String getPath() const override { return path; }
size_t bytesSize() const override { return path.size(); }
-};
-
-struct ListResponse : virtual Response
-{
- std::vector<String> names;
- Stat stat;
+};
+
+struct ListResponse : virtual Response
+{
+ std::vector<String> names;
+ Stat stat;
size_t bytesSize() const override
{
@@ -297,29 +297,29 @@ struct ListResponse : virtual Response
size += name.size();
return size;
}
-};
-
-struct CheckRequest : virtual Request
-{
- String path;
- int32_t version = -1;
-
- void addRootPath(const String & root_path) override;
- String getPath() const override { return path; }
+};
+
+struct CheckRequest : virtual Request
+{
+ String path;
+ int32_t version = -1;
+
+ void addRootPath(const String & root_path) override;
+ String getPath() const override { return path; }
size_t bytesSize() const override { return path.size() + sizeof(version); }
-};
-
-struct CheckResponse : virtual Response
-{
-};
-
-struct MultiRequest : virtual Request
-{
- Requests requests;
-
- void addRootPath(const String & root_path) override;
- String getPath() const override { return {}; }
+};
+
+struct CheckResponse : virtual Response
+{
+};
+
+struct MultiRequest : virtual Request
+{
+ Requests requests;
+
+ void addRootPath(const String & root_path) override;
+ String getPath() const override { return {}; }
size_t bytesSize() const override
{
@@ -328,13 +328,13 @@ struct MultiRequest : virtual Request
size += request->bytesSize();
return size;
}
-};
-
-struct MultiResponse : virtual Response
-{
- Responses responses;
-
- void removeRootPath(const String & root_path) override;
+};
+
+struct MultiResponse : virtual Response
+{
+ Responses responses;
+
+ void removeRootPath(const String & root_path) override;
size_t bytesSize() const override
{
@@ -343,145 +343,145 @@ struct MultiResponse : virtual Response
size += response->bytesSize();
return size;
}
-};
-
-/// This response may be received only as an element of responses in MultiResponse.
-struct ErrorResponse : virtual Response
-{
-};
-
-
-using CreateCallback = std::function<void(const CreateResponse &)>;
-using RemoveCallback = std::function<void(const RemoveResponse &)>;
-using ExistsCallback = std::function<void(const ExistsResponse &)>;
-using GetCallback = std::function<void(const GetResponse &)>;
-using SetCallback = std::function<void(const SetResponse &)>;
-using ListCallback = std::function<void(const ListResponse &)>;
-using CheckCallback = std::function<void(const CheckResponse &)>;
-using MultiCallback = std::function<void(const MultiResponse &)>;
-
-
-/// For watches.
-enum State
-{
- EXPIRED_SESSION = -112,
- AUTH_FAILED = -113,
- CONNECTING = 1,
- ASSOCIATING = 2,
- CONNECTED = 3,
- NOTCONNECTED = 999
-};
-
-enum Event
-{
- CREATED = 1,
- DELETED = 2,
- CHANGED = 3,
- CHILD = 4,
- SESSION = -1,
- NOTWATCHING = -2
-};
-
-
-class Exception : public DB::Exception
-{
-private:
- /// Delegate constructor, used to minimize repetition; last parameter used for overload resolution.
- Exception(const std::string & msg, const Error code_, int);
-
-public:
- explicit Exception(const Error code_);
- Exception(const std::string & msg, const Error code_);
- Exception(const Error code_, const std::string & path);
- Exception(const Exception & exc);
-
- const char * name() const throw() override { return "Coordination::Exception"; }
- const char * className() const throw() override { return "Coordination::Exception"; }
- Exception * clone() const override { return new Exception(*this); }
-
- const Error code;
-};
-
-
-/** Usage scenario:
- * - create an object and issue commands;
- * - you provide callbacks for your commands; callbacks are invoked in internal thread and must be cheap:
- * for example, just signal a condvar / fulfull a promise.
- * - you also may provide callbacks for watches; they are also invoked in internal thread and must be cheap.
- * - whenever you receive exception with ZSESSIONEXPIRED code or method isExpired returns true,
- * the ZooKeeper instance is no longer usable - you may only destroy it and probably create another.
- * - whenever session is expired or ZooKeeper instance is destroying, all callbacks are notified with special event.
+};
+
+/// This response may be received only as an element of responses in MultiResponse.
+struct ErrorResponse : virtual Response
+{
+};
+
+
+using CreateCallback = std::function<void(const CreateResponse &)>;
+using RemoveCallback = std::function<void(const RemoveResponse &)>;
+using ExistsCallback = std::function<void(const ExistsResponse &)>;
+using GetCallback = std::function<void(const GetResponse &)>;
+using SetCallback = std::function<void(const SetResponse &)>;
+using ListCallback = std::function<void(const ListResponse &)>;
+using CheckCallback = std::function<void(const CheckResponse &)>;
+using MultiCallback = std::function<void(const MultiResponse &)>;
+
+
+/// For watches.
+enum State
+{
+ EXPIRED_SESSION = -112,
+ AUTH_FAILED = -113,
+ CONNECTING = 1,
+ ASSOCIATING = 2,
+ CONNECTED = 3,
+ NOTCONNECTED = 999
+};
+
+enum Event
+{
+ CREATED = 1,
+ DELETED = 2,
+ CHANGED = 3,
+ CHILD = 4,
+ SESSION = -1,
+ NOTWATCHING = -2
+};
+
+
+class Exception : public DB::Exception
+{
+private:
+ /// Delegate constructor, used to minimize repetition; last parameter used for overload resolution.
+ Exception(const std::string & msg, const Error code_, int);
+
+public:
+ explicit Exception(const Error code_);
+ Exception(const std::string & msg, const Error code_);
+ Exception(const Error code_, const std::string & path);
+ Exception(const Exception & exc);
+
+ const char * name() const throw() override { return "Coordination::Exception"; }
+ const char * className() const throw() override { return "Coordination::Exception"; }
+ Exception * clone() const override { return new Exception(*this); }
+
+ const Error code;
+};
+
+
+/** Usage scenario:
+ * - create an object and issue commands;
+ * - you provide callbacks for your commands; callbacks are invoked in internal thread and must be cheap:
+ * for example, just signal a condvar / fulfull a promise.
+ * - you also may provide callbacks for watches; they are also invoked in internal thread and must be cheap.
+ * - whenever you receive exception with ZSESSIONEXPIRED code or method isExpired returns true,
+ * the ZooKeeper instance is no longer usable - you may only destroy it and probably create another.
+ * - whenever session is expired or ZooKeeper instance is destroying, all callbacks are notified with special event.
* - data for callbacks must be alive when ZooKeeper instance is alive, so try to avoid capturing references in callbacks, it's error-prone.
- */
-class IKeeper
-{
-public:
+ */
+class IKeeper
+{
+public:
virtual ~IKeeper() = default;
-
- /// If expired, you can only destroy the object. All other methods will throw exception.
- virtual bool isExpired() const = 0;
-
- /// Useful to check owner of ephemeral node.
- virtual int64_t getSessionID() const = 0;
-
- /// If the method will throw an exception, callbacks won't be called.
- ///
- /// After the method is executed successfully, you must wait for callbacks
- /// (don't destroy callback data before it will be called).
+
+ /// If expired, you can only destroy the object. All other methods will throw exception.
+ virtual bool isExpired() const = 0;
+
+ /// Useful to check owner of ephemeral node.
+ virtual int64_t getSessionID() const = 0;
+
+ /// If the method will throw an exception, callbacks won't be called.
+ ///
+ /// After the method is executed successfully, you must wait for callbacks
+ /// (don't destroy callback data before it will be called).
/// TODO: The above line is the description of an error-prone interface. It's better
/// to replace callbacks with std::future results, so the caller shouldn't think about
/// lifetime of the callback data.
- ///
- /// All callbacks are executed sequentially (the execution of callbacks is serialized).
- ///
- /// If an exception is thrown inside the callback, the session will expire,
- /// and all other callbacks will be called with "Session expired" error.
-
- virtual void create(
- const String & path,
- const String & data,
- bool is_ephemeral,
- bool is_sequential,
- const ACLs & acls,
- CreateCallback callback) = 0;
-
- virtual void remove(
- const String & path,
- int32_t version,
- RemoveCallback callback) = 0;
-
- virtual void exists(
- const String & path,
- ExistsCallback callback,
- WatchCallback watch) = 0;
-
- virtual void get(
- const String & path,
- GetCallback callback,
- WatchCallback watch) = 0;
-
- virtual void set(
- const String & path,
- const String & data,
- int32_t version,
- SetCallback callback) = 0;
-
- virtual void list(
- const String & path,
- ListCallback callback,
- WatchCallback watch) = 0;
-
- virtual void check(
- const String & path,
- int32_t version,
- CheckCallback callback) = 0;
-
- virtual void multi(
- const Requests & requests,
- MultiCallback callback) = 0;
+ ///
+ /// All callbacks are executed sequentially (the execution of callbacks is serialized).
+ ///
+ /// If an exception is thrown inside the callback, the session will expire,
+ /// and all other callbacks will be called with "Session expired" error.
+
+ virtual void create(
+ const String & path,
+ const String & data,
+ bool is_ephemeral,
+ bool is_sequential,
+ const ACLs & acls,
+ CreateCallback callback) = 0;
+
+ virtual void remove(
+ const String & path,
+ int32_t version,
+ RemoveCallback callback) = 0;
+
+ virtual void exists(
+ const String & path,
+ ExistsCallback callback,
+ WatchCallback watch) = 0;
+
+ virtual void get(
+ const String & path,
+ GetCallback callback,
+ WatchCallback watch) = 0;
+
+ virtual void set(
+ const String & path,
+ const String & data,
+ int32_t version,
+ SetCallback callback) = 0;
+
+ virtual void list(
+ const String & path,
+ ListCallback callback,
+ WatchCallback watch) = 0;
+
+ virtual void check(
+ const String & path,
+ int32_t version,
+ CheckCallback callback) = 0;
+
+ virtual void multi(
+ const Requests & requests,
+ MultiCallback callback) = 0;
/// Expire session and finish all pending requests
virtual void finalize() = 0;
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h
index 33be285306..cae14b7e96 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h
@@ -1,37 +1,37 @@
-#pragma once
-
-#include <common/types.h>
-#include <future>
-#include <memory>
-#include <vector>
-#include <Common/ZooKeeper/IKeeper.h>
-#include <Poco/Event.h>
-
-
-namespace zkutil
-{
-
-using Strings = std::vector<std::string>;
-
-
-namespace CreateMode
-{
- extern const int Persistent;
- extern const int Ephemeral;
- extern const int EphemeralSequential;
- extern const int PersistentSequential;
-}
-
-using EventPtr = std::shared_ptr<Poco::Event>;
-
-/// Gets multiple asynchronous results
-/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse
-template <typename R>
-using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>;
-
-Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode);
-Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version);
-Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version);
-Coordination::RequestPtr makeCheckRequest(const std::string & path, int version);
-
-}
+#pragma once
+
+#include <common/types.h>
+#include <future>
+#include <memory>
+#include <vector>
+#include <Common/ZooKeeper/IKeeper.h>
+#include <Poco/Event.h>
+
+
+namespace zkutil
+{
+
+using Strings = std::vector<std::string>;
+
+
+namespace CreateMode
+{
+ extern const int Persistent;
+ extern const int Ephemeral;
+ extern const int EphemeralSequential;
+ extern const int PersistentSequential;
+}
+
+using EventPtr = std::shared_ptr<Poco::Event>;
+
+/// Gets multiple asynchronous results
+/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse
+template <typename R>
+using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>;
+
+Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode);
+Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version);
+Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version);
+Coordination::RequestPtr makeCheckRequest(const std::string & path, int version);
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp
index 5ed82231b2..b1042ef23e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.cpp
@@ -1,45 +1,45 @@
-#include <Common/createHardLink.h>
-#include <Common/Exception.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/stat.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int CANNOT_STAT;
- extern const int CANNOT_LINK;
-}
-
-void createHardLink(const String & source_path, const String & destination_path)
-{
- if (0 != link(source_path.c_str(), destination_path.c_str()))
- {
- if (errno == EEXIST)
- {
- auto link_errno = errno;
-
- struct stat source_descr;
- struct stat destination_descr;
-
- if (0 != lstat(source_path.c_str(), &source_descr))
- throwFromErrnoWithPath("Cannot stat " + source_path, source_path, ErrorCodes::CANNOT_STAT);
-
- if (0 != lstat(destination_path.c_str(), &destination_descr))
- throwFromErrnoWithPath("Cannot stat " + destination_path, destination_path, ErrorCodes::CANNOT_STAT);
-
- if (source_descr.st_ino != destination_descr.st_ino)
- throwFromErrnoWithPath(
- "Destination file " + destination_path + " is already exist and have different inode.",
- destination_path, ErrorCodes::CANNOT_LINK, link_errno);
- }
- else
- throwFromErrnoWithPath("Cannot link " + source_path + " to " + destination_path, destination_path,
- ErrorCodes::CANNOT_LINK);
- }
-}
-
-}
+#include <Common/createHardLink.h>
+#include <Common/Exception.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CANNOT_STAT;
+ extern const int CANNOT_LINK;
+}
+
+void createHardLink(const String & source_path, const String & destination_path)
+{
+ if (0 != link(source_path.c_str(), destination_path.c_str()))
+ {
+ if (errno == EEXIST)
+ {
+ auto link_errno = errno;
+
+ struct stat source_descr;
+ struct stat destination_descr;
+
+ if (0 != lstat(source_path.c_str(), &source_descr))
+ throwFromErrnoWithPath("Cannot stat " + source_path, source_path, ErrorCodes::CANNOT_STAT);
+
+ if (0 != lstat(destination_path.c_str(), &destination_descr))
+ throwFromErrnoWithPath("Cannot stat " + destination_path, destination_path, ErrorCodes::CANNOT_STAT);
+
+ if (source_descr.st_ino != destination_descr.st_ino)
+ throwFromErrnoWithPath(
+ "Destination file " + destination_path + " is already exist and have different inode.",
+ destination_path, ErrorCodes::CANNOT_LINK, link_errno);
+ }
+ else
+ throwFromErrnoWithPath("Cannot link " + source_path + " to " + destination_path, destination_path,
+ ErrorCodes::CANNOT_LINK);
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h
index c2b01cf817..f4b4d6204c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/createHardLink.h
@@ -1,12 +1,12 @@
-#pragma once
-
+#pragma once
+
#include <common/types.h>
-
-namespace DB
-{
-
-/// Create a hard link `destination_path` pointing to `source_path`.
-/// If the destination already exists, check that it has the same inode (and throw if they are different).
-void createHardLink(const String & source_path, const String & destination_path);
-
-}
+
+namespace DB
+{
+
+/// Create a hard link `destination_path` pointing to `source_path`.
+/// If the destination already exists, check that it has the same inode (and throw if they are different).
+void createHardLink(const String & source_path, const String & destination_path);
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp
index 0fbebe21de..9d897d043d 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.cpp
@@ -1,161 +1,161 @@
-#include <Common/formatIPv6.h>
-#include <Common/hex.h>
-
+#include <Common/formatIPv6.h>
+#include <Common/hex.h>
+
#include <common/range.h>
-#include <array>
-#include <algorithm>
-
-namespace DB
-{
-
-// To be used in formatIPv4, maps a byte to it's string form prefixed with length (so save strlen call).
-extern const char one_byte_to_string_lookup_table[256][4] =
-{
- {1, '0'}, {1, '1'}, {1, '2'}, {1, '3'}, {1, '4'}, {1, '5'}, {1, '6'}, {1, '7'}, {1, '8'}, {1, '9'},
- {2, '1', '0'}, {2, '1', '1'}, {2, '1', '2'}, {2, '1', '3'}, {2, '1', '4'}, {2, '1', '5'}, {2, '1', '6'}, {2, '1', '7'}, {2, '1', '8'}, {2, '1', '9'},
- {2, '2', '0'}, {2, '2', '1'}, {2, '2', '2'}, {2, '2', '3'}, {2, '2', '4'}, {2, '2', '5'}, {2, '2', '6'}, {2, '2', '7'}, {2, '2', '8'}, {2, '2', '9'},
- {2, '3', '0'}, {2, '3', '1'}, {2, '3', '2'}, {2, '3', '3'}, {2, '3', '4'}, {2, '3', '5'}, {2, '3', '6'}, {2, '3', '7'}, {2, '3', '8'}, {2, '3', '9'},
- {2, '4', '0'}, {2, '4', '1'}, {2, '4', '2'}, {2, '4', '3'}, {2, '4', '4'}, {2, '4', '5'}, {2, '4', '6'}, {2, '4', '7'}, {2, '4', '8'}, {2, '4', '9'},
- {2, '5', '0'}, {2, '5', '1'}, {2, '5', '2'}, {2, '5', '3'}, {2, '5', '4'}, {2, '5', '5'}, {2, '5', '6'}, {2, '5', '7'}, {2, '5', '8'}, {2, '5', '9'},
- {2, '6', '0'}, {2, '6', '1'}, {2, '6', '2'}, {2, '6', '3'}, {2, '6', '4'}, {2, '6', '5'}, {2, '6', '6'}, {2, '6', '7'}, {2, '6', '8'}, {2, '6', '9'},
- {2, '7', '0'}, {2, '7', '1'}, {2, '7', '2'}, {2, '7', '3'}, {2, '7', '4'}, {2, '7', '5'}, {2, '7', '6'}, {2, '7', '7'}, {2, '7', '8'}, {2, '7', '9'},
- {2, '8', '0'}, {2, '8', '1'}, {2, '8', '2'}, {2, '8', '3'}, {2, '8', '4'}, {2, '8', '5'}, {2, '8', '6'}, {2, '8', '7'}, {2, '8', '8'}, {2, '8', '9'},
- {2, '9', '0'}, {2, '9', '1'}, {2, '9', '2'}, {2, '9', '3'}, {2, '9', '4'}, {2, '9', '5'}, {2, '9', '6'}, {2, '9', '7'}, {2, '9', '8'}, {2, '9', '9'},
- {3, '1', '0', '0'}, {3, '1', '0', '1'}, {3, '1', '0', '2'}, {3, '1', '0', '3'}, {3, '1', '0', '4'}, {3, '1', '0', '5'}, {3, '1', '0', '6'}, {3, '1', '0', '7'}, {3, '1', '0', '8'}, {3, '1', '0', '9'},
- {3, '1', '1', '0'}, {3, '1', '1', '1'}, {3, '1', '1', '2'}, {3, '1', '1', '3'}, {3, '1', '1', '4'}, {3, '1', '1', '5'}, {3, '1', '1', '6'}, {3, '1', '1', '7'}, {3, '1', '1', '8'}, {3, '1', '1', '9'},
- {3, '1', '2', '0'}, {3, '1', '2', '1'}, {3, '1', '2', '2'}, {3, '1', '2', '3'}, {3, '1', '2', '4'}, {3, '1', '2', '5'}, {3, '1', '2', '6'}, {3, '1', '2', '7'}, {3, '1', '2', '8'}, {3, '1', '2', '9'},
- {3, '1', '3', '0'}, {3, '1', '3', '1'}, {3, '1', '3', '2'}, {3, '1', '3', '3'}, {3, '1', '3', '4'}, {3, '1', '3', '5'}, {3, '1', '3', '6'}, {3, '1', '3', '7'}, {3, '1', '3', '8'}, {3, '1', '3', '9'},
- {3, '1', '4', '0'}, {3, '1', '4', '1'}, {3, '1', '4', '2'}, {3, '1', '4', '3'}, {3, '1', '4', '4'}, {3, '1', '4', '5'}, {3, '1', '4', '6'}, {3, '1', '4', '7'}, {3, '1', '4', '8'}, {3, '1', '4', '9'},
- {3, '1', '5', '0'}, {3, '1', '5', '1'}, {3, '1', '5', '2'}, {3, '1', '5', '3'}, {3, '1', '5', '4'}, {3, '1', '5', '5'}, {3, '1', '5', '6'}, {3, '1', '5', '7'}, {3, '1', '5', '8'}, {3, '1', '5', '9'},
- {3, '1', '6', '0'}, {3, '1', '6', '1'}, {3, '1', '6', '2'}, {3, '1', '6', '3'}, {3, '1', '6', '4'}, {3, '1', '6', '5'}, {3, '1', '6', '6'}, {3, '1', '6', '7'}, {3, '1', '6', '8'}, {3, '1', '6', '9'},
- {3, '1', '7', '0'}, {3, '1', '7', '1'}, {3, '1', '7', '2'}, {3, '1', '7', '3'}, {3, '1', '7', '4'}, {3, '1', '7', '5'}, {3, '1', '7', '6'}, {3, '1', '7', '7'}, {3, '1', '7', '8'}, {3, '1', '7', '9'},
- {3, '1', '8', '0'}, {3, '1', '8', '1'}, {3, '1', '8', '2'}, {3, '1', '8', '3'}, {3, '1', '8', '4'}, {3, '1', '8', '5'}, {3, '1', '8', '6'}, {3, '1', '8', '7'}, {3, '1', '8', '8'}, {3, '1', '8', '9'},
- {3, '1', '9', '0'}, {3, '1', '9', '1'}, {3, '1', '9', '2'}, {3, '1', '9', '3'}, {3, '1', '9', '4'}, {3, '1', '9', '5'}, {3, '1', '9', '6'}, {3, '1', '9', '7'}, {3, '1', '9', '8'}, {3, '1', '9', '9'},
- {3, '2', '0', '0'}, {3, '2', '0', '1'}, {3, '2', '0', '2'}, {3, '2', '0', '3'}, {3, '2', '0', '4'}, {3, '2', '0', '5'}, {3, '2', '0', '6'}, {3, '2', '0', '7'}, {3, '2', '0', '8'}, {3, '2', '0', '9'},
- {3, '2', '1', '0'}, {3, '2', '1', '1'}, {3, '2', '1', '2'}, {3, '2', '1', '3'}, {3, '2', '1', '4'}, {3, '2', '1', '5'}, {3, '2', '1', '6'}, {3, '2', '1', '7'}, {3, '2', '1', '8'}, {3, '2', '1', '9'},
- {3, '2', '2', '0'}, {3, '2', '2', '1'}, {3, '2', '2', '2'}, {3, '2', '2', '3'}, {3, '2', '2', '4'}, {3, '2', '2', '5'}, {3, '2', '2', '6'}, {3, '2', '2', '7'}, {3, '2', '2', '8'}, {3, '2', '2', '9'},
- {3, '2', '3', '0'}, {3, '2', '3', '1'}, {3, '2', '3', '2'}, {3, '2', '3', '3'}, {3, '2', '3', '4'}, {3, '2', '3', '5'}, {3, '2', '3', '6'}, {3, '2', '3', '7'}, {3, '2', '3', '8'}, {3, '2', '3', '9'},
- {3, '2', '4', '0'}, {3, '2', '4', '1'}, {3, '2', '4', '2'}, {3, '2', '4', '3'}, {3, '2', '4', '4'}, {3, '2', '4', '5'}, {3, '2', '4', '6'}, {3, '2', '4', '7'}, {3, '2', '4', '8'}, {3, '2', '4', '9'},
- {3, '2', '5', '0'}, {3, '2', '5', '1'}, {3, '2', '5', '2'}, {3, '2', '5', '3'}, {3, '2', '5', '4'}, {3, '2', '5', '5'},
-};
-
-/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
-static constexpr UInt32 intLog(const UInt32 value, const UInt32 base, const bool carry)
-{
- return value >= base ? 1 + intLog(value / base, base, value % base || carry) : value % base > 1 || carry;
-}
-
-/// Print integer in desired base, faster than sprintf.
-/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
-/// But it doesn't matter here.
-template <UInt32 base, typename T>
-static void printInteger(char *& out, T value)
-{
- if (value == 0)
- *out++ = '0';
- else
- {
- constexpr size_t buffer_size = sizeof(T) * intLog(256, base, false);
-
- char buf[buffer_size];
- auto ptr = buf;
-
- while (value > 0)
- {
- *ptr = hexDigitLowercase(value % base);
- ++ptr;
- value /= base;
- }
-
- /// Copy to out reversed.
- while (ptr != buf)
- {
- --ptr;
- *out = *ptr;
- ++out;
- }
- }
-}
-
-void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count)
-{
- struct { int base, len; } best{-1, 0}, cur{-1, 0};
- std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words{};
-
- /** Preprocess:
- * Copy the input (bytewise) array into a wordwise array.
- * Find the longest run of 0x00's in src[] for :: shorthanding. */
+#include <array>
+#include <algorithm>
+
+namespace DB
+{
+
+// To be used in formatIPv4, maps a byte to it's string form prefixed with length (so save strlen call).
+extern const char one_byte_to_string_lookup_table[256][4] =
+{
+ {1, '0'}, {1, '1'}, {1, '2'}, {1, '3'}, {1, '4'}, {1, '5'}, {1, '6'}, {1, '7'}, {1, '8'}, {1, '9'},
+ {2, '1', '0'}, {2, '1', '1'}, {2, '1', '2'}, {2, '1', '3'}, {2, '1', '4'}, {2, '1', '5'}, {2, '1', '6'}, {2, '1', '7'}, {2, '1', '8'}, {2, '1', '9'},
+ {2, '2', '0'}, {2, '2', '1'}, {2, '2', '2'}, {2, '2', '3'}, {2, '2', '4'}, {2, '2', '5'}, {2, '2', '6'}, {2, '2', '7'}, {2, '2', '8'}, {2, '2', '9'},
+ {2, '3', '0'}, {2, '3', '1'}, {2, '3', '2'}, {2, '3', '3'}, {2, '3', '4'}, {2, '3', '5'}, {2, '3', '6'}, {2, '3', '7'}, {2, '3', '8'}, {2, '3', '9'},
+ {2, '4', '0'}, {2, '4', '1'}, {2, '4', '2'}, {2, '4', '3'}, {2, '4', '4'}, {2, '4', '5'}, {2, '4', '6'}, {2, '4', '7'}, {2, '4', '8'}, {2, '4', '9'},
+ {2, '5', '0'}, {2, '5', '1'}, {2, '5', '2'}, {2, '5', '3'}, {2, '5', '4'}, {2, '5', '5'}, {2, '5', '6'}, {2, '5', '7'}, {2, '5', '8'}, {2, '5', '9'},
+ {2, '6', '0'}, {2, '6', '1'}, {2, '6', '2'}, {2, '6', '3'}, {2, '6', '4'}, {2, '6', '5'}, {2, '6', '6'}, {2, '6', '7'}, {2, '6', '8'}, {2, '6', '9'},
+ {2, '7', '0'}, {2, '7', '1'}, {2, '7', '2'}, {2, '7', '3'}, {2, '7', '4'}, {2, '7', '5'}, {2, '7', '6'}, {2, '7', '7'}, {2, '7', '8'}, {2, '7', '9'},
+ {2, '8', '0'}, {2, '8', '1'}, {2, '8', '2'}, {2, '8', '3'}, {2, '8', '4'}, {2, '8', '5'}, {2, '8', '6'}, {2, '8', '7'}, {2, '8', '8'}, {2, '8', '9'},
+ {2, '9', '0'}, {2, '9', '1'}, {2, '9', '2'}, {2, '9', '3'}, {2, '9', '4'}, {2, '9', '5'}, {2, '9', '6'}, {2, '9', '7'}, {2, '9', '8'}, {2, '9', '9'},
+ {3, '1', '0', '0'}, {3, '1', '0', '1'}, {3, '1', '0', '2'}, {3, '1', '0', '3'}, {3, '1', '0', '4'}, {3, '1', '0', '5'}, {3, '1', '0', '6'}, {3, '1', '0', '7'}, {3, '1', '0', '8'}, {3, '1', '0', '9'},
+ {3, '1', '1', '0'}, {3, '1', '1', '1'}, {3, '1', '1', '2'}, {3, '1', '1', '3'}, {3, '1', '1', '4'}, {3, '1', '1', '5'}, {3, '1', '1', '6'}, {3, '1', '1', '7'}, {3, '1', '1', '8'}, {3, '1', '1', '9'},
+ {3, '1', '2', '0'}, {3, '1', '2', '1'}, {3, '1', '2', '2'}, {3, '1', '2', '3'}, {3, '1', '2', '4'}, {3, '1', '2', '5'}, {3, '1', '2', '6'}, {3, '1', '2', '7'}, {3, '1', '2', '8'}, {3, '1', '2', '9'},
+ {3, '1', '3', '0'}, {3, '1', '3', '1'}, {3, '1', '3', '2'}, {3, '1', '3', '3'}, {3, '1', '3', '4'}, {3, '1', '3', '5'}, {3, '1', '3', '6'}, {3, '1', '3', '7'}, {3, '1', '3', '8'}, {3, '1', '3', '9'},
+ {3, '1', '4', '0'}, {3, '1', '4', '1'}, {3, '1', '4', '2'}, {3, '1', '4', '3'}, {3, '1', '4', '4'}, {3, '1', '4', '5'}, {3, '1', '4', '6'}, {3, '1', '4', '7'}, {3, '1', '4', '8'}, {3, '1', '4', '9'},
+ {3, '1', '5', '0'}, {3, '1', '5', '1'}, {3, '1', '5', '2'}, {3, '1', '5', '3'}, {3, '1', '5', '4'}, {3, '1', '5', '5'}, {3, '1', '5', '6'}, {3, '1', '5', '7'}, {3, '1', '5', '8'}, {3, '1', '5', '9'},
+ {3, '1', '6', '0'}, {3, '1', '6', '1'}, {3, '1', '6', '2'}, {3, '1', '6', '3'}, {3, '1', '6', '4'}, {3, '1', '6', '5'}, {3, '1', '6', '6'}, {3, '1', '6', '7'}, {3, '1', '6', '8'}, {3, '1', '6', '9'},
+ {3, '1', '7', '0'}, {3, '1', '7', '1'}, {3, '1', '7', '2'}, {3, '1', '7', '3'}, {3, '1', '7', '4'}, {3, '1', '7', '5'}, {3, '1', '7', '6'}, {3, '1', '7', '7'}, {3, '1', '7', '8'}, {3, '1', '7', '9'},
+ {3, '1', '8', '0'}, {3, '1', '8', '1'}, {3, '1', '8', '2'}, {3, '1', '8', '3'}, {3, '1', '8', '4'}, {3, '1', '8', '5'}, {3, '1', '8', '6'}, {3, '1', '8', '7'}, {3, '1', '8', '8'}, {3, '1', '8', '9'},
+ {3, '1', '9', '0'}, {3, '1', '9', '1'}, {3, '1', '9', '2'}, {3, '1', '9', '3'}, {3, '1', '9', '4'}, {3, '1', '9', '5'}, {3, '1', '9', '6'}, {3, '1', '9', '7'}, {3, '1', '9', '8'}, {3, '1', '9', '9'},
+ {3, '2', '0', '0'}, {3, '2', '0', '1'}, {3, '2', '0', '2'}, {3, '2', '0', '3'}, {3, '2', '0', '4'}, {3, '2', '0', '5'}, {3, '2', '0', '6'}, {3, '2', '0', '7'}, {3, '2', '0', '8'}, {3, '2', '0', '9'},
+ {3, '2', '1', '0'}, {3, '2', '1', '1'}, {3, '2', '1', '2'}, {3, '2', '1', '3'}, {3, '2', '1', '4'}, {3, '2', '1', '5'}, {3, '2', '1', '6'}, {3, '2', '1', '7'}, {3, '2', '1', '8'}, {3, '2', '1', '9'},
+ {3, '2', '2', '0'}, {3, '2', '2', '1'}, {3, '2', '2', '2'}, {3, '2', '2', '3'}, {3, '2', '2', '4'}, {3, '2', '2', '5'}, {3, '2', '2', '6'}, {3, '2', '2', '7'}, {3, '2', '2', '8'}, {3, '2', '2', '9'},
+ {3, '2', '3', '0'}, {3, '2', '3', '1'}, {3, '2', '3', '2'}, {3, '2', '3', '3'}, {3, '2', '3', '4'}, {3, '2', '3', '5'}, {3, '2', '3', '6'}, {3, '2', '3', '7'}, {3, '2', '3', '8'}, {3, '2', '3', '9'},
+ {3, '2', '4', '0'}, {3, '2', '4', '1'}, {3, '2', '4', '2'}, {3, '2', '4', '3'}, {3, '2', '4', '4'}, {3, '2', '4', '5'}, {3, '2', '4', '6'}, {3, '2', '4', '7'}, {3, '2', '4', '8'}, {3, '2', '4', '9'},
+ {3, '2', '5', '0'}, {3, '2', '5', '1'}, {3, '2', '5', '2'}, {3, '2', '5', '3'}, {3, '2', '5', '4'}, {3, '2', '5', '5'},
+};
+
+/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
+static constexpr UInt32 intLog(const UInt32 value, const UInt32 base, const bool carry)
+{
+ return value >= base ? 1 + intLog(value / base, base, value % base || carry) : value % base > 1 || carry;
+}
+
+/// Print integer in desired base, faster than sprintf.
+/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
+/// But it doesn't matter here.
+template <UInt32 base, typename T>
+static void printInteger(char *& out, T value)
+{
+ if (value == 0)
+ *out++ = '0';
+ else
+ {
+ constexpr size_t buffer_size = sizeof(T) * intLog(256, base, false);
+
+ char buf[buffer_size];
+ auto ptr = buf;
+
+ while (value > 0)
+ {
+ *ptr = hexDigitLowercase(value % base);
+ ++ptr;
+ value /= base;
+ }
+
+ /// Copy to out reversed.
+ while (ptr != buf)
+ {
+ --ptr;
+ *out = *ptr;
+ ++out;
+ }
+ }
+}
+
+void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count)
+{
+ struct { int base, len; } best{-1, 0}, cur{-1, 0};
+ std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words{};
+
+ /** Preprocess:
+ * Copy the input (bytewise) array into a wordwise array.
+ * Find the longest run of 0x00's in src[] for :: shorthanding. */
for (const auto i : collections::range(0, IPV6_BINARY_LENGTH - zeroed_tail_bytes_count))
- words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
-
+ words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
+
for (const auto i : collections::range(0, words.size()))
- {
- if (words[i] == 0)
- {
- if (cur.base == -1)
- {
- cur.base = i;
- cur.len = 1;
- }
- else
- cur.len++;
- }
- else
- {
- if (cur.base != -1)
- {
- if (best.base == -1 || cur.len > best.len)
- best = cur;
- cur.base = -1;
- }
- }
- }
-
- if (cur.base != -1)
- {
- if (best.base == -1 || cur.len > best.len)
- best = cur;
- }
-
- if (best.base != -1 && best.len < 2)
- best.base = -1;
-
- /// Format the result.
+ {
+ if (words[i] == 0)
+ {
+ if (cur.base == -1)
+ {
+ cur.base = i;
+ cur.len = 1;
+ }
+ else
+ cur.len++;
+ }
+ else
+ {
+ if (cur.base != -1)
+ {
+ if (best.base == -1 || cur.len > best.len)
+ best = cur;
+ cur.base = -1;
+ }
+ }
+ }
+
+ if (cur.base != -1)
+ {
+ if (best.base == -1 || cur.len > best.len)
+ best = cur;
+ }
+
+ if (best.base != -1 && best.len < 2)
+ best.base = -1;
+
+ /// Format the result.
for (const int i : collections::range(0, words.size()))
- {
- /// Are we inside the best run of 0x00's?
- if (best.base != -1 && i >= best.base && i < (best.base + best.len))
- {
- if (i == best.base)
- *dst++ = ':';
- continue;
- }
-
- /// Are we following an initial run of 0x00s or any real hex?
- if (i != 0)
- *dst++ = ':';
-
- /// Is this address an encapsulated IPv4?
- if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
- {
- uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
- memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
- // Due to historical reasons formatIPv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
- std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
-
- formatIPv4(ipv4_buffer, dst, std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), "0");
- // formatIPv4 has already added a null-terminator for us.
- return;
- }
-
- printInteger<16>(dst, words[i]);
- }
-
- /// Was it a trailing run of 0x00's?
- if (best.base != -1 && size_t(best.base) + size_t(best.len) == words.size())
- *dst++ = ':';
-
- *dst++ = '\0';
-}
-
-}
+ {
+ /// Are we inside the best run of 0x00's?
+ if (best.base != -1 && i >= best.base && i < (best.base + best.len))
+ {
+ if (i == best.base)
+ *dst++ = ':';
+ continue;
+ }
+
+ /// Are we following an initial run of 0x00s or any real hex?
+ if (i != 0)
+ *dst++ = ':';
+
+ /// Is this address an encapsulated IPv4?
+ if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
+ {
+ uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
+ memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
+ // Due to historical reasons formatIPv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
+ std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
+
+ formatIPv4(ipv4_buffer, dst, std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), "0");
+ // formatIPv4 has already added a null-terminator for us.
+ return;
+ }
+
+ printInteger<16>(dst, words[i]);
+ }
+
+ /// Was it a trailing run of 0x00's?
+ if (best.base != -1 && size_t(best.base) + size_t(best.len) == words.size())
+ *dst++ = ':';
+
+ *dst++ = '\0';
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h
index bb8acd6d17..8c6d3dc207 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/formatIPv6.h
@@ -1,227 +1,227 @@
-#pragma once
-
-#include <common/types.h>
-#include <string.h>
-#include <algorithm>
-#include <utility>
+#pragma once
+
+#include <common/types.h>
+#include <string.h>
+#include <algorithm>
+#include <utility>
#include <common/range.h>
-#include <Common/hex.h>
-#include <Common/StringUtils/StringUtils.h>
-
-constexpr size_t IPV4_BINARY_LENGTH = 4;
-constexpr size_t IPV6_BINARY_LENGTH = 16;
-constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
-constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
-
-namespace DB
-{
-
-
-/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
- * performs significantly faster than the reference implementation due to the absence of sprintf calls,
- * bounds checking, unnecessary string copying and length calculation.
- */
-void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count = 0);
-
-/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
- *
+#include <Common/hex.h>
+#include <Common/StringUtils/StringUtils.h>
+
+constexpr size_t IPV4_BINARY_LENGTH = 4;
+constexpr size_t IPV6_BINARY_LENGTH = 16;
+constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
+constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
+
+namespace DB
+{
+
+
+/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
+ * performs significantly faster than the reference implementation due to the absence of sprintf calls,
+ * bounds checking, unnecessary string copying and length calculation.
+ */
+void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count = 0);
+
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
+ *
* Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
- * which should be long enough.
- * That is "127.0.0.1" becomes 0x7f000001.
- *
- * In case of failure returns false and doesn't modify buffer pointed by `dst`.
- *
- * @param src - input string, expected to be non-null and null-terminated right after the IPv4 string value.
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure returns false and doesn't modify buffer pointed by `dst`.
+ *
+ * @param src - input string, expected to be non-null and null-terminated right after the IPv4 string value.
* @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
- * @return false if parsing failed, true otherwise.
- */
-inline bool parseIPv4(const char * src, unsigned char * dst)
-{
- UInt32 result = 0;
- for (int offset = 24; offset >= 0; offset -= 8)
- {
- UInt32 value = 0;
- size_t len = 0;
- while (isNumericASCII(*src) && len <= 3)
- {
- value = value * 10 + (*src - '0');
- ++len;
- ++src;
- }
- if (len == 0 || value > 255 || (offset > 0 && *src != '.'))
- return false;
- result |= value << offset;
- ++src;
- }
- if (*(src - 1) != '\0')
- return false;
-
- memcpy(dst, &result, sizeof(result));
- return true;
-}
-
-/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
-*
-* Slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
+ * @return false if parsing failed, true otherwise.
+ */
+inline bool parseIPv4(const char * src, unsigned char * dst)
+{
+ UInt32 result = 0;
+ for (int offset = 24; offset >= 0; offset -= 8)
+ {
+ UInt32 value = 0;
+ size_t len = 0;
+ while (isNumericASCII(*src) && len <= 3)
+ {
+ value = value * 10 + (*src - '0');
+ ++len;
+ ++src;
+ }
+ if (len == 0 || value > 255 || (offset > 0 && *src != '.'))
+ return false;
+ result |= value << offset;
+ ++src;
+ }
+ if (*(src - 1) != '\0')
+ return false;
+
+ memcpy(dst, &result, sizeof(result));
+ return true;
+}
+
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
+*
+* Slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
* Parses the input string `src` and stores binary big-endian value into buffer pointed by `dst`,
-* which should be long enough. In case of failure zeroes
-* IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
-*
-* @param src - input string, expected to be non-null and null-terminated right after the IPv6 string value.
+* which should be long enough. In case of failure zeroes
+* IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
+*
+* @param src - input string, expected to be non-null and null-terminated right after the IPv6 string value.
* @param dst - where to put output bytes, expected to be non-null and at IPV6_BINARY_LENGTH-long.
-* @return false if parsing failed, true otherwise.
-*/
-inline bool parseIPv6(const char * src, unsigned char * dst)
-{
- const auto clear_dst = [dst]()
- {
- memset(dst, '\0', IPV6_BINARY_LENGTH);
- return false;
- };
-
- /// Leading :: requires some special handling.
- if (*src == ':')
- if (*++src != ':')
- return clear_dst();
-
- unsigned char tmp[IPV6_BINARY_LENGTH]{};
+* @return false if parsing failed, true otherwise.
+*/
+inline bool parseIPv6(const char * src, unsigned char * dst)
+{
+ const auto clear_dst = [dst]()
+ {
+ memset(dst, '\0', IPV6_BINARY_LENGTH);
+ return false;
+ };
+
+ /// Leading :: requires some special handling.
+ if (*src == ':')
+ if (*++src != ':')
+ return clear_dst();
+
+ unsigned char tmp[IPV6_BINARY_LENGTH]{};
unsigned char * tp = tmp;
unsigned char * endp = tp + IPV6_BINARY_LENGTH;
const char * curtok = src;
bool saw_xdigit = false;
- UInt32 val{};
- unsigned char * colonp = nullptr;
-
- /// Assuming zero-terminated string.
+ UInt32 val{};
+ unsigned char * colonp = nullptr;
+
+ /// Assuming zero-terminated string.
while (char ch = *src++)
- {
+ {
UInt8 num = unhex(ch);
-
+
if (num != 0xFF)
- {
- val <<= 4;
- val |= num;
- if (val > 0xffffu)
- return clear_dst();
-
+ {
+ val <<= 4;
+ val |= num;
+ if (val > 0xffffu)
+ return clear_dst();
+
saw_xdigit = true;
- continue;
- }
-
- if (ch == ':')
- {
- curtok = src;
- if (!saw_xdigit)
- {
- if (colonp)
- return clear_dst();
-
- colonp = tp;
- continue;
- }
-
- if (tp + sizeof(UInt16) > endp)
- return clear_dst();
-
- *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu);
- *tp++ = static_cast<unsigned char>(val & 0xffu);
- saw_xdigit = false;
- val = 0;
- continue;
- }
-
- if (ch == '.' && (tp + IPV4_BINARY_LENGTH) <= endp)
- {
- if (!parseIPv4(curtok, tp))
- return clear_dst();
- std::reverse(tp, tp + IPV4_BINARY_LENGTH);
-
- tp += IPV4_BINARY_LENGTH;
- saw_xdigit = false;
- break; /* '\0' was seen by ipv4_scan(). */
- }
-
- return clear_dst();
- }
-
- if (saw_xdigit)
- {
- if (tp + sizeof(UInt16) > endp)
- return clear_dst();
-
- *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu);
- *tp++ = static_cast<unsigned char>(val & 0xffu);
- }
-
- if (colonp)
- {
- /*
- * Since some memmove()'s erroneously fail to handle
- * overlapping regions, we'll do the shift by hand.
- */
- const auto n = tp - colonp;
-
- for (int i = 1; i <= n; ++i)
- {
- endp[- i] = colonp[n - i];
- colonp[n - i] = 0;
- }
- tp = endp;
- }
-
- if (tp != endp)
- return clear_dst();
-
- memcpy(dst, tmp, sizeof(tmp));
- return true;
-}
-
-/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
+ continue;
+ }
+
+ if (ch == ':')
+ {
+ curtok = src;
+ if (!saw_xdigit)
+ {
+ if (colonp)
+ return clear_dst();
+
+ colonp = tp;
+ continue;
+ }
+
+ if (tp + sizeof(UInt16) > endp)
+ return clear_dst();
+
+ *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu);
+ *tp++ = static_cast<unsigned char>(val & 0xffu);
+ saw_xdigit = false;
+ val = 0;
+ continue;
+ }
+
+ if (ch == '.' && (tp + IPV4_BINARY_LENGTH) <= endp)
+ {
+ if (!parseIPv4(curtok, tp))
+ return clear_dst();
+ std::reverse(tp, tp + IPV4_BINARY_LENGTH);
+
+ tp += IPV4_BINARY_LENGTH;
+ saw_xdigit = false;
+ break; /* '\0' was seen by ipv4_scan(). */
+ }
+
+ return clear_dst();
+ }
+
+ if (saw_xdigit)
+ {
+ if (tp + sizeof(UInt16) > endp)
+ return clear_dst();
+
+ *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu);
+ *tp++ = static_cast<unsigned char>(val & 0xffu);
+ }
+
+ if (colonp)
+ {
+ /*
+ * Since some memmove()'s erroneously fail to handle
+ * overlapping regions, we'll do the shift by hand.
+ */
+ const auto n = tp - colonp;
+
+ for (int i = 1; i <= n; ++i)
+ {
+ endp[- i] = colonp[n - i];
+ colonp[n - i] = 0;
+ }
+ tp = endp;
+ }
+
+ if (tp != endp)
+ return clear_dst();
+
+ memcpy(dst, tmp, sizeof(tmp));
+ return true;
+}
+
+/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
* expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
- *
- * Any number of the tail bytes can be masked with given mask string.
- *
- * Assumptions:
- * src is IPV4_BINARY_LENGTH long,
- * dst is IPV4_MAX_TEXT_LENGTH long,
- * mask_tail_octets <= IPV4_BINARY_LENGTH
- * mask_string is NON-NULL, if mask_tail_octets > 0.
- *
- * Examples:
- * formatIPv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
- * > dst == "127.0.0.1"
- * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
- * > dst == "127.0.0.xxx"
- * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
- * > dst == "127.0.0.0"
- */
-inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail_octets = 0, const char * mask_string = "xxx")
-{
- extern const char one_byte_to_string_lookup_table[256][4];
-
- const size_t mask_length = mask_string ? strlen(mask_string) : 0;
- const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
- for (size_t octet = 0; octet < limit; ++octet)
- {
- const uint8_t value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
+ *
+ * Any number of the tail bytes can be masked with given mask string.
+ *
+ * Assumptions:
+ * src is IPV4_BINARY_LENGTH long,
+ * dst is IPV4_MAX_TEXT_LENGTH long,
+ * mask_tail_octets <= IPV4_BINARY_LENGTH
+ * mask_string is NON-NULL, if mask_tail_octets > 0.
+ *
+ * Examples:
+ * formatIPv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
+ * > dst == "127.0.0.1"
+ * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
+ * > dst == "127.0.0.xxx"
+ * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
+ * > dst == "127.0.0.0"
+ */
+inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail_octets = 0, const char * mask_string = "xxx")
+{
+ extern const char one_byte_to_string_lookup_table[256][4];
+
+ const size_t mask_length = mask_string ? strlen(mask_string) : 0;
+ const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
+ for (size_t octet = 0; octet < limit; ++octet)
+ {
+ const uint8_t value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
const auto * rep = one_byte_to_string_lookup_table[value];
- const uint8_t len = rep[0];
- const char* str = rep + 1;
-
- memcpy(dst, str, len);
- dst += len;
- *dst++ = '.';
- }
-
- for (size_t mask = 0; mask < mask_tail_octets; ++mask)
- {
- memcpy(dst, mask_string, mask_length);
- dst += mask_length;
-
- *dst++ = '.';
- }
-
- dst[-1] = '\0';
-}
-
-}
+ const uint8_t len = rep[0];
+ const char* str = rep + 1;
+
+ memcpy(dst, str, len);
+ dst += len;
+ *dst++ = '.';
+ }
+
+ for (size_t mask = 0; mask < mask_tail_octets; ++mask)
+ {
+ memcpy(dst, mask_string, mask_length);
+ dst += mask_length;
+
+ *dst++ = '.';
+ }
+
+ dst[-1] = '\0';
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp
index 39798c4882..fea591369f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.cpp
@@ -1,30 +1,30 @@
-#include <Common/getMultipleKeysFromConfig.h>
-
-#include <Poco/Util/AbstractConfiguration.h>
-
-namespace DB
-{
-std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name)
-{
- std::vector<std::string> values;
- Poco::Util::AbstractConfiguration::Keys config_keys;
- config.keys(root, config_keys);
- for (const auto & key : config_keys)
- {
+#include <Common/getMultipleKeysFromConfig.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+
+namespace DB
+{
+std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name)
+{
+ std::vector<std::string> values;
+ Poco::Util::AbstractConfiguration::Keys config_keys;
+ config.keys(root, config_keys);
+ for (const auto & key : config_keys)
+ {
if (key != name && !(key.starts_with(name + "[") && key.ends_with("]")))
- continue;
- values.emplace_back(key);
- }
- return values;
-}
-
-
-std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name)
-{
- std::vector<std::string> values;
- for (const auto & key : DB::getMultipleKeysFromConfig(config, root, name))
- values.emplace_back(config.getString(root.empty() ? key : root + "." + key));
- return values;
-}
-
-}
+ continue;
+ values.emplace_back(key);
+ }
+ return values;
+}
+
+
+std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name)
+{
+ std::vector<std::string> values;
+ for (const auto & key : DB::getMultipleKeysFromConfig(config, root, name))
+ values.emplace_back(config.getString(root.empty() ? key : root + "." + key));
+ return values;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h
index 1c58af7bb4..c55f8f0e98 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getMultipleKeysFromConfig.h
@@ -1,18 +1,18 @@
-#pragma once
-#include <string>
-#include <vector>
-
-namespace Poco
-{
-namespace Util
-{
- class AbstractConfiguration;
-}
-}
-namespace DB
-{
-/// get all internal key names for given key
-std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name);
-/// Get all values for given key
-std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name);
-}
+#pragma once
+#include <string>
+#include <vector>
+
+namespace Poco
+{
+namespace Util
+{
+ class AbstractConfiguration;
+}
+}
+namespace DB
+{
+/// get all internal key names for given key
+std::vector<std::string> getMultipleKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name);
+/// Get all values for given key
+std::vector<std::string> getMultipleValuesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & root, const std::string & name);
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp
index a9db8223eb..f0de9bb752 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.cpp
@@ -1,9 +1,9 @@
-#include "getNumberOfPhysicalCPUCores.h"
-
-#include <thread>
-
-unsigned getNumberOfPhysicalCPUCores()
-{
+#include "getNumberOfPhysicalCPUCores.h"
+
+#include <thread>
+
+unsigned getNumberOfPhysicalCPUCores()
+{
static const unsigned number = []
{
/// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
@@ -11,4 +11,4 @@ unsigned getNumberOfPhysicalCPUCores()
return std::thread::hardware_concurrency();
}();
return number;
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h
index 827e95e1be..bfb3d5ee8d 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/getNumberOfPhysicalCPUCores.h
@@ -1,4 +1,4 @@
-#pragma once
-
-/// Get number of CPU cores without hyper-threading.
-unsigned getNumberOfPhysicalCPUCores();
+#pragma once
+
+/// Get number of CPU cores without hyper-threading.
+unsigned getNumberOfPhysicalCPUCores();
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp
index c71a5f6c9d..90d830ac73 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.cpp
@@ -1,43 +1,43 @@
-#if defined(__linux__)
-
-#include "hasLinuxCapability.h"
-
-#include <syscall.h>
-#include <unistd.h>
-#include <linux/capability.h>
-#include <Common/Exception.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int NETLINK_ERROR;
-}
-
-static __user_cap_data_struct getCapabilities()
-{
- /// See man getcap.
- __user_cap_header_struct request{};
- request.version = _LINUX_CAPABILITY_VERSION_1; /// It's enough to check just single CAP_NET_ADMIN capability we are interested.
- request.pid = getpid();
-
- __user_cap_data_struct response{};
-
- /// Avoid dependency on 'libcap'.
- if (0 != syscall(SYS_capget, &request, &response))
- throwFromErrno("Cannot do 'capget' syscall", ErrorCodes::NETLINK_ERROR);
-
- return response;
-}
-
-bool hasLinuxCapability(int cap)
-{
- static __user_cap_data_struct capabilities = getCapabilities();
- return (1 << cap) & capabilities.effective;
-}
-
-}
-
-#endif
+#if defined(__linux__)
+
+#include "hasLinuxCapability.h"
+
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/capability.h>
+#include <Common/Exception.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NETLINK_ERROR;
+}
+
+static __user_cap_data_struct getCapabilities()
+{
+ /// See man getcap.
+ __user_cap_header_struct request{};
+ request.version = _LINUX_CAPABILITY_VERSION_1; /// It's enough to check just single CAP_NET_ADMIN capability we are interested.
+ request.pid = getpid();
+
+ __user_cap_data_struct response{};
+
+ /// Avoid dependency on 'libcap'.
+ if (0 != syscall(SYS_capget, &request, &response))
+ throwFromErrno("Cannot do 'capget' syscall", ErrorCodes::NETLINK_ERROR);
+
+ return response;
+}
+
+bool hasLinuxCapability(int cap)
+{
+ static __user_cap_data_struct capabilities = getCapabilities();
+ return (1 << cap) & capabilities.effective;
+}
+
+}
+
+#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h
index 4a9d2214a7..55181dbe56 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/hasLinuxCapability.h
@@ -1,14 +1,14 @@
#pragma once
-#if defined(__linux__)
-
-#include <linux/capability.h>
-
-namespace DB
-{
-
-/// Check that the current process has Linux capability. Examples: CAP_IPC_LOCK, CAP_NET_ADMIN.
-bool hasLinuxCapability(int cap);
-
-}
-
-#endif
+#if defined(__linux__)
+
+#include <linux/capability.h>
+
+namespace DB
+{
+
+/// Check that the current process has Linux capability. Examples: CAP_IPC_LOCK, CAP_NET_ADMIN.
+bool hasLinuxCapability(int cap);
+
+}
+
+#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp
index 9e22c58f94..2e9dc759d9 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.cpp
@@ -1,22 +1,22 @@
-#include <Common/isLocalAddress.h>
-
+#include <Common/isLocalAddress.h>
+
#include <ifaddrs.h>
-#include <cstring>
+#include <cstring>
#include <optional>
#include <common/types.h>
#include <Common/Exception.h>
#include <Poco/Net/IPAddress.h>
-#include <Poco/Net/SocketAddress.h>
-
-
-namespace DB
-{
-
+#include <Poco/Net/SocketAddress.h>
+
+
+namespace DB
+{
+
namespace ErrorCodes
-{
+{
extern const int SYSTEM_ERROR;
}
-
+
namespace
{
@@ -46,7 +46,7 @@ struct NetworkInterfaces
{
/// We interested only in IP-adresses
case AF_INET:
- {
+ {
interface_address.emplace(*(iface->ifa_addr));
break;
}
@@ -76,8 +76,8 @@ struct NetworkInterfaces
}
};
-}
-
+}
+
bool isLocalAddress(const Poco::Net::IPAddress & address)
{
@@ -116,19 +116,19 @@ bool isLocalAddress(const Poco::Net::IPAddress & address)
}
-bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port)
-{
- return clickhouse_port == address.port() && isLocalAddress(address.host());
-}
-
-
-size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
-{
- size_t hostname_difference = 0;
- for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
- if (local_hostname[i] != host[i])
- ++hostname_difference;
- return hostname_difference;
-}
-
-}
+bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port)
+{
+ return clickhouse_port == address.port() && isLocalAddress(address.host());
+}
+
+
+size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
+{
+ size_t hostname_difference = 0;
+ for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
+ if (local_hostname[i] != host[i])
+ ++hostname_difference;
+ return hostname_difference;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h
index 3d0db2d955..520038ace9 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/isLocalAddress.h
@@ -1,31 +1,31 @@
-#pragma once
-
-#include <common/types.h>
-#include <Poco/Net/IPAddress.h>
-
-
-namespace Poco
-{
- namespace Net
- {
- class SocketAddress;
- }
-}
-
-namespace DB
-{
- /** Lets you check if the address is similar to `localhost`.
- * The purpose of this check is usually to make an assumption,
- * that when we go to this address via the Internet, we'll get to ourselves.
- * Please note that this check is not accurate:
- * - the address is simply compared to the addresses of the network interfaces;
- * - only the first address is taken for each network interface;
- * - the routing rules that affect which network interface we go to the specified address are not checked.
- */
- bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port);
- bool isLocalAddress(const Poco::Net::SocketAddress & address);
- bool isLocalAddress(const Poco::Net::IPAddress & address);
-
- /// Returns number of different bytes in hostnames, used for load balancing
- size_t getHostNameDifference(const std::string & local_hostname, const std::string & host);
-}
+#pragma once
+
+#include <common/types.h>
+#include <Poco/Net/IPAddress.h>
+
+
+namespace Poco
+{
+ namespace Net
+ {
+ class SocketAddress;
+ }
+}
+
+namespace DB
+{
+ /** Lets you check if the address is similar to `localhost`.
+ * The purpose of this check is usually to make an assumption,
+ * that when we go to this address via the Internet, we'll get to ourselves.
+ * Please note that this check is not accurate:
+ * - the address is simply compared to the addresses of the network interfaces;
+ * - only the first address is taken for each network interface;
+ * - the routing rules that affect which network interface we go to the specified address are not checked.
+ */
+ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port);
+ bool isLocalAddress(const Poco::Net::SocketAddress & address);
+ bool isLocalAddress(const Poco::Net::IPAddress & address);
+
+ /// Returns number of different bytes in hostnames, used for load balancing
+ size_t getHostNameDifference(const std::string & local_hostname, const std::string & host);
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp
index c99c08896a..f5450d05d6 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.cpp
@@ -1,40 +1,40 @@
-#include <Common/parseAddress.h>
-#include <Common/Exception.h>
-#include <IO/ReadHelpers.h>
-#include <common/find_symbols.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int BAD_ARGUMENTS;
-}
-
-std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port)
-{
- if (str.empty())
- throw Exception("Empty address passed to function parseAddress", ErrorCodes::BAD_ARGUMENTS);
-
- const char * begin = str.data();
- const char * end = begin + str.size();
- const char * port = end; // NOLINT
-
- if (begin[0] == '[')
- {
- const char * closing_square_bracket = find_first_symbols<']'>(begin + 1, end);
- if (closing_square_bracket >= end)
- throw Exception("Illegal address passed to function parseAddress: "
- "the address begins with opening square bracket, but no closing square bracket found", ErrorCodes::BAD_ARGUMENTS);
-
+#include <Common/parseAddress.h>
+#include <Common/Exception.h>
+#include <IO/ReadHelpers.h>
+#include <common/find_symbols.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+}
+
+std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port)
+{
+ if (str.empty())
+ throw Exception("Empty address passed to function parseAddress", ErrorCodes::BAD_ARGUMENTS);
+
+ const char * begin = str.data();
+ const char * end = begin + str.size();
+ const char * port = end; // NOLINT
+
+ if (begin[0] == '[')
+ {
+ const char * closing_square_bracket = find_first_symbols<']'>(begin + 1, end);
+ if (closing_square_bracket >= end)
+ throw Exception("Illegal address passed to function parseAddress: "
+ "the address begins with opening square bracket, but no closing square bracket found", ErrorCodes::BAD_ARGUMENTS);
+
port = closing_square_bracket + 1;
- }
- else
- port = find_first_symbols<':'>(begin, end);
-
- if (port != end)
- {
+ }
+ else
+ port = find_first_symbols<':'>(begin, end);
+
+ if (port != end)
+ {
if (*port != ':')
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Illegal port prefix passed to function parseAddress: {}", port);
@@ -49,14 +49,14 @@ std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 defa
"Illegal port passed to function parseAddress: {}", port);
}
return { std::string(begin, port - 1), port_number };
- }
- else if (default_port)
- {
- return { str, default_port };
- }
- else
- throw Exception("The address passed to function parseAddress doesn't contain port number "
- "and no 'default_port' was passed", ErrorCodes::BAD_ARGUMENTS);
-}
-
-}
+ }
+ else if (default_port)
+ {
+ return { str, default_port };
+ }
+ else
+ throw Exception("The address passed to function parseAddress doesn't contain port number "
+ "and no 'default_port' was passed", ErrorCodes::BAD_ARGUMENTS);
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h
index 602a9adc0b..078aea0a73 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/parseAddress.h
@@ -1,22 +1,22 @@
-#pragma once
-
-#include <string>
-#include <map>
-#include <common/types.h>
-
-
-namespace DB
-{
-
-/** Parse address from string, that can contain host with or without port.
- * If port was not specified and default_port is not zero, default_port is used.
- * Otherwise, an exception is thrown.
- *
- * Examples:
- * yandex.ru - returns "yandex.ru" and default_port
- * yandex.ru:80 - returns "yandex.ru" and 80
- * [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host.
- */
-std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port);
-
-}
+#pragma once
+
+#include <string>
+#include <map>
+#include <common/types.h>
+
+
+namespace DB
+{
+
+/** Parse address from string, that can contain host with or without port.
+ * If port was not specified and default_port is not zero, default_port is used.
+ * Otherwise, an exception is thrown.
+ *
+ * Examples:
+ * yandex.ru - returns "yandex.ru" and default_port
+ * yandex.ru:80 - returns "yandex.ru" and 80
+ * [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host.
+ */
+std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port);
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp
index ded224e56c..a8b0ebb7bd 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.cpp
@@ -1,34 +1,34 @@
-#include <time.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <Common/Exception.h>
-#include <Common/randomSeed.h>
-#include <Common/SipHash.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <Common/Exception.h>
+#include <Common/randomSeed.h>
+#include <Common/SipHash.h>
#include <common/getThreadId.h>
#include <common/types.h>
-
-
-namespace DB
-{
- namespace ErrorCodes
- {
- extern const int CANNOT_CLOCK_GETTIME;
- }
-}
-
-
-DB::UInt64 randomSeed()
-{
- struct timespec times;
+
+
+namespace DB
+{
+ namespace ErrorCodes
+ {
+ extern const int CANNOT_CLOCK_GETTIME;
+ }
+}
+
+
+DB::UInt64 randomSeed()
+{
+ struct timespec times;
if (clock_gettime(CLOCK_MONOTONIC, &times))
- DB::throwFromErrno("Cannot clock_gettime.", DB::ErrorCodes::CANNOT_CLOCK_GETTIME);
-
- /// Not cryptographically secure as time, pid and stack address can be predictable.
-
- SipHash hash;
- hash.update(times.tv_nsec);
- hash.update(times.tv_sec);
+ DB::throwFromErrno("Cannot clock_gettime.", DB::ErrorCodes::CANNOT_CLOCK_GETTIME);
+
+ /// Not cryptographically secure as time, pid and stack address can be predictable.
+
+ SipHash hash;
+ hash.update(times.tv_nsec);
+ hash.update(times.tv_sec);
hash.update(getThreadId());
- hash.update(&times);
- return hash.get64();
-}
+ hash.update(&times);
+ return hash.get64();
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h
index 4f04e4b974..9305715cf6 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/randomSeed.h
@@ -1,7 +1,7 @@
-#pragma once
-
-#include <cstdint>
+#pragma once
+
+#include <cstdint>
#include <common/types.h>
-
-/** Returns a number suitable as seed for PRNG. Use clock_gettime, pid and so on. */
-DB::UInt64 randomSeed();
+
+/** Returns a number suitable as seed for PRNG. Use clock_gettime, pid and so on. */
+DB::UInt64 randomSeed();
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp
index 78241ec1b6..f82f47e029 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.cpp
@@ -1,76 +1,76 @@
-#include "CompressedReadBuffer.h"
-#include <Compression/LZ4_decompress_faster.h>
-
-
-namespace DB
-{
-
-bool CompressedReadBuffer::nextImpl()
-{
- size_t size_decompressed;
- size_t size_compressed_without_checksum;
+#include "CompressedReadBuffer.h"
+#include <Compression/LZ4_decompress_faster.h>
+
+
+namespace DB
+{
+
+bool CompressedReadBuffer::nextImpl()
+{
+ size_t size_decompressed;
+ size_t size_compressed_without_checksum;
size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
- if (!size_compressed)
- return false;
-
- auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
-
- /// This is for clang static analyzer.
- assert(size_decompressed + additional_size_at_the_end_of_buffer > 0);
-
- memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
- working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
-
+ if (!size_compressed)
+ return false;
+
+ auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
+
+ /// This is for clang static analyzer.
+ assert(size_decompressed + additional_size_at_the_end_of_buffer > 0);
+
+ memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
+ working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
+
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
-
- return true;
-}
-
-size_t CompressedReadBuffer::readBig(char * to, size_t n)
-{
- size_t bytes_read = 0;
-
- /// If there are unread bytes in the buffer, then we copy necessary to `to`.
- if (pos < working_buffer.end())
- bytes_read += read(to, std::min(static_cast<size_t>(working_buffer.end() - pos), n));
-
- /// If you need to read more - we will, if possible, uncompress at once to `to`.
- while (bytes_read < n)
- {
- size_t size_decompressed;
- size_t size_compressed_without_checksum;
-
+
+ return true;
+}
+
+size_t CompressedReadBuffer::readBig(char * to, size_t n)
+{
+ size_t bytes_read = 0;
+
+ /// If there are unread bytes in the buffer, then we copy necessary to `to`.
+ if (pos < working_buffer.end())
+ bytes_read += read(to, std::min(static_cast<size_t>(working_buffer.end() - pos), n));
+
+ /// If you need to read more - we will, if possible, uncompress at once to `to`.
+ while (bytes_read < n)
+ {
+ size_t size_decompressed;
+ size_t size_compressed_without_checksum;
+
if (!readCompressedData(size_decompressed, size_compressed_without_checksum, false))
- return bytes_read;
-
- auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
-
- /// If the decompressed block fits entirely where it needs to be copied.
- if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
- {
+ return bytes_read;
+
+ auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
+
+ /// If the decompressed block fits entirely where it needs to be copied.
+ if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
+ {
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
- bytes_read += size_decompressed;
- bytes += size_decompressed;
- }
- else
- {
- bytes += offset();
-
- /// This is for clang static analyzer.
- assert(size_decompressed + additional_size_at_the_end_of_buffer > 0);
-
- memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
- working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
+ bytes_read += size_decompressed;
+ bytes += size_decompressed;
+ }
+ else
+ {
+ bytes += offset();
+
+ /// This is for clang static analyzer.
+ assert(size_decompressed + additional_size_at_the_end_of_buffer > 0);
+
+ memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
+ working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
- pos = working_buffer.begin();
-
- bytes_read += read(to + bytes_read, n - bytes_read);
- break;
- }
- }
-
- return bytes_read;
-}
-
-}
+ pos = working_buffer.begin();
+
+ bytes_read += read(to + bytes_read, n - bytes_read);
+ break;
+ }
+ }
+
+ return bytes_read;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h
index 3fa7347507..40e5d87111 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressedReadBuffer.h
@@ -1,33 +1,33 @@
-#pragma once
-
-#include "CompressedReadBufferBase.h"
-#include <IO/BufferWithOwnMemory.h>
-#include <IO/ReadBuffer.h>
-
-
-namespace DB
-{
-
-class CompressedReadBuffer : public CompressedReadBufferBase, public BufferWithOwnMemory<ReadBuffer>
-{
-private:
- size_t size_compressed = 0;
-
- bool nextImpl() override;
-
-public:
+#pragma once
+
+#include "CompressedReadBufferBase.h"
+#include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadBuffer.h>
+
+
+namespace DB
+{
+
+class CompressedReadBuffer : public CompressedReadBufferBase, public BufferWithOwnMemory<ReadBuffer>
+{
+private:
+ size_t size_compressed = 0;
+
+ bool nextImpl() override;
+
+public:
CompressedReadBuffer(ReadBuffer & in_, bool allow_different_codecs_ = false)
: CompressedReadBufferBase(&in_, allow_different_codecs_), BufferWithOwnMemory<ReadBuffer>(0)
- {
- }
-
- size_t readBig(char * to, size_t n) override;
-
- /// The compressed size of the current block.
- size_t getSizeCompressed() const
- {
- return size_compressed;
- }
-};
-
-}
+ {
+ }
+
+ size_t readBig(char * to, size_t n) override;
+
+ /// The compressed size of the current block.
+ size_t getSizeCompressed() const
+ {
+ return size_compressed;
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp
index 12f138dc95..f584a9f0a3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecLZ4.cpp
@@ -1,24 +1,24 @@
-#include <lz4.h>
-#include <lz4hc.h>
+#include <lz4.h>
+#include <lz4hc.h>
#include <Compression/ICompressionCodec.h>
-#include <Compression/CompressionInfo.h>
-#include <Compression/CompressionFactory.h>
-#include <Compression/LZ4_decompress_faster.h>
-#include <Parsers/IAST.h>
-#include <Parsers/ASTLiteral.h>
+#include <Compression/CompressionInfo.h>
+#include <Compression/CompressionFactory.h>
+#include <Compression/LZ4_decompress_faster.h>
+#include <Parsers/IAST.h>
+#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <IO/WriteBuffer.h>
-#include <IO/WriteHelpers.h>
+#include <IO/WriteHelpers.h>
#include <IO/BufferWithOwnMemory.h>
-
-#pragma GCC diagnostic ignored "-Wold-style-cast"
-
-
-namespace DB
-{
-
+
+#pragma GCC diagnostic ignored "-Wold-style-cast"
+
+
+namespace DB
+{
+
class CompressionCodecLZ4 : public ICompressionCodec
{
public:
@@ -59,98 +59,98 @@ private:
};
-namespace ErrorCodes
-{
+namespace ErrorCodes
+{
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int ILLEGAL_CODEC_PARAMETER;
-}
-
+}
+
CompressionCodecLZ4::CompressionCodecLZ4()
{
setCodecDescription("LZ4");
}
-
-uint8_t CompressionCodecLZ4::getMethodByte() const
-{
- return static_cast<uint8_t>(CompressionMethodByte::LZ4);
-}
-
+
+uint8_t CompressionCodecLZ4::getMethodByte() const
+{
+ return static_cast<uint8_t>(CompressionMethodByte::LZ4);
+}
+
void CompressionCodecLZ4::updateHash(SipHash & hash) const
{
getCodecDesc()->updateTreeHash(hash);
}
-UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const
-{
- return LZ4_COMPRESSBOUND(uncompressed_size);
-}
-
-UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_size, char * dest) const
-{
- return LZ4_compress_default(source, dest, source_size, LZ4_COMPRESSBOUND(source_size));
-}
-
-void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
-{
+UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const
+{
+ return LZ4_COMPRESSBOUND(uncompressed_size);
+}
+
+UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_size, char * dest) const
+{
+ return LZ4_compress_default(source, dest, source_size, LZ4_COMPRESSBOUND(source_size));
+}
+
+void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
+{
bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
if (!success)
throw Exception("Cannot decompress", ErrorCodes::CANNOT_DECOMPRESS);
-}
-
-void registerCodecLZ4(CompressionCodecFactory & factory)
-{
- factory.registerSimpleCompressionCodec("LZ4", static_cast<UInt8>(CompressionMethodByte::LZ4), [&] ()
- {
- return std::make_shared<CompressionCodecLZ4>();
- });
-}
-
-UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_size, char * dest) const
-{
- auto success = LZ4_compress_HC(source, dest, source_size, LZ4_COMPRESSBOUND(source_size), level);
-
- if (!success)
- throw Exception("Cannot LZ4_compress_HC", ErrorCodes::CANNOT_COMPRESS);
-
- return success;
-}
-
-void registerCodecLZ4HC(CompressionCodecFactory & factory)
-{
- factory.registerCompressionCodec("LZ4HC", {}, [&](const ASTPtr & arguments) -> CompressionCodecPtr
- {
- int level = 0;
-
- if (arguments && !arguments->children.empty())
- {
- if (arguments->children.size() > 1)
- throw Exception("LZ4HC codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
-
- const auto children = arguments->children;
- const auto * literal = children[0]->as<ASTLiteral>();
- if (!literal)
- throw Exception("LZ4HC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
-
- level = literal->value.safeGet<UInt64>();
- }
-
- return std::make_shared<CompressionCodecLZ4HC>(level);
- });
-}
-
-CompressionCodecLZ4HC::CompressionCodecLZ4HC(int level_)
- : level(level_)
-{
+}
+
+void registerCodecLZ4(CompressionCodecFactory & factory)
+{
+ factory.registerSimpleCompressionCodec("LZ4", static_cast<UInt8>(CompressionMethodByte::LZ4), [&] ()
+ {
+ return std::make_shared<CompressionCodecLZ4>();
+ });
+}
+
+UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_size, char * dest) const
+{
+ auto success = LZ4_compress_HC(source, dest, source_size, LZ4_COMPRESSBOUND(source_size), level);
+
+ if (!success)
+ throw Exception("Cannot LZ4_compress_HC", ErrorCodes::CANNOT_COMPRESS);
+
+ return success;
+}
+
+void registerCodecLZ4HC(CompressionCodecFactory & factory)
+{
+ factory.registerCompressionCodec("LZ4HC", {}, [&](const ASTPtr & arguments) -> CompressionCodecPtr
+ {
+ int level = 0;
+
+ if (arguments && !arguments->children.empty())
+ {
+ if (arguments->children.size() > 1)
+ throw Exception("LZ4HC codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
+
+ const auto children = arguments->children;
+ const auto * literal = children[0]->as<ASTLiteral>();
+ if (!literal)
+ throw Exception("LZ4HC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+
+ level = literal->value.safeGet<UInt64>();
+ }
+
+ return std::make_shared<CompressionCodecLZ4HC>(level);
+ });
+}
+
+CompressionCodecLZ4HC::CompressionCodecLZ4HC(int level_)
+ : level(level_)
+{
setCodecDescription("LZ4HC", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
-}
-
+}
+
CompressionCodecPtr getCompressionCodecLZ4(int level)
{
return std::make_shared<CompressionCodecLZ4HC>(level);
-}
+}
}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp
index 84bcb5bd84..05b584f052 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.cpp
@@ -1,44 +1,44 @@
-#include <Compression/CompressionCodecNone.h>
-#include <Compression/CompressionInfo.h>
-#include <Compression/CompressionFactory.h>
+#include <Compression/CompressionCodecNone.h>
+#include <Compression/CompressionInfo.h>
+#include <Compression/CompressionFactory.h>
#include <Parsers/ASTIdentifier.h>
-
-
-namespace DB
-{
-
+
+
+namespace DB
+{
+
CompressionCodecNone::CompressionCodecNone()
-{
+{
setCodecDescription("NONE");
-}
-
+}
+
uint8_t CompressionCodecNone::getMethodByte() const
-{
+{
return static_cast<uint8_t>(CompressionMethodByte::NONE);
-}
-
+}
+
void CompressionCodecNone::updateHash(SipHash & hash) const
{
getCodecDesc()->updateTreeHash(hash);
}
-UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_size, char * dest) const
-{
- memcpy(dest, source, source_size);
- return source_size;
-}
-
-void CompressionCodecNone::doDecompressData(const char * source, UInt32 /*source_size*/, char * dest, UInt32 uncompressed_size) const
-{
- memcpy(dest, source, uncompressed_size);
-}
-
-void registerCodecNone(CompressionCodecFactory & factory)
-{
- factory.registerSimpleCompressionCodec("NONE", static_cast<char>(CompressionMethodByte::NONE), [&] ()
- {
- return std::make_shared<CompressionCodecNone>();
- });
-}
-
-}
+UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_size, char * dest) const
+{
+ memcpy(dest, source, source_size);
+ return source_size;
+}
+
+void CompressionCodecNone::doDecompressData(const char * source, UInt32 /*source_size*/, char * dest, UInt32 uncompressed_size) const
+{
+ memcpy(dest, source, uncompressed_size);
+}
+
+void registerCodecNone(CompressionCodecFactory & factory)
+{
+ factory.registerSimpleCompressionCodec("NONE", static_cast<char>(CompressionMethodByte::NONE), [&] ()
+ {
+ return std::make_shared<CompressionCodecNone>();
+ });
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h
index bf6bb6de4e..8a0b0add67 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionCodecNone.h
@@ -1,31 +1,31 @@
-#pragma once
-
-#include <IO/WriteBuffer.h>
-#include <Compression/ICompressionCodec.h>
-#include <IO/BufferWithOwnMemory.h>
-#include <Parsers/StringRange.h>
-
-namespace DB
-{
-
-class CompressionCodecNone : public ICompressionCodec
-{
-public:
+#pragma once
+
+#include <IO/WriteBuffer.h>
+#include <Compression/ICompressionCodec.h>
+#include <IO/BufferWithOwnMemory.h>
+#include <Parsers/StringRange.h>
+
+namespace DB
+{
+
+class CompressionCodecNone : public ICompressionCodec
+{
+public:
CompressionCodecNone();
- uint8_t getMethodByte() const override;
-
+ uint8_t getMethodByte() const override;
+
void updateHash(SipHash & hash) const override;
-protected:
-
- UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-
- void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-
- bool isCompression() const override { return false; }
- bool isGenericCompression() const override { return false; }
- bool isNone() const override { return true; }
-};
-
-}
+protected:
+
+ UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+ void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+ bool isCompression() const override { return false; }
+ bool isGenericCompression() const override { return false; }
+ bool isNone() const override { return true; }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp
index 1796303988..9244906e48 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Compression/CompressionFactory.cpp
@@ -1,48 +1,48 @@
-#include <Compression/CompressionFactory.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTLiteral.h>
-#include <Poco/String.h>
-#include <IO/ReadBuffer.h>
-#include <Parsers/queryToString.h>
-#include <Compression/CompressionCodecMultiple.h>
+#include <Compression/CompressionFactory.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Poco/String.h>
+#include <IO/ReadBuffer.h>
+#include <Parsers/queryToString.h>
+#include <Compression/CompressionCodecMultiple.h>
#include <Compression/CompressionCodecNone.h>
-#include <IO/WriteHelpers.h>
-
+#include <IO/WriteHelpers.h>
+
#include <boost/algorithm/string/join.hpp>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int UNKNOWN_CODEC;
- extern const int UNEXPECTED_AST_STRUCTURE;
- extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
-}
-
-CompressionCodecPtr CompressionCodecFactory::getDefaultCodec() const
-{
- return default_codec;
-}
-
-
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int UNKNOWN_CODEC;
+ extern const int UNEXPECTED_AST_STRUCTURE;
+ extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
+}
+
+CompressionCodecPtr CompressionCodecFactory::getDefaultCodec() const
+{
+ return default_codec;
+}
+
+
CompressionCodecPtr CompressionCodecFactory::get(const String & family_name, std::optional<int> level) const
-{
- if (level)
- {
+{
+ if (level)
+ {
auto level_literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
return get(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), level_literal)), {});
- }
- else
- {
- auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
+ }
+ else
+ {
+ auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
return get(makeASTFunction("CODEC", identifier), {});
- }
-}
-
+ }
+}
+
CompressionCodecPtr CompressionCodecFactory::get(
const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const
@@ -52,24 +52,24 @@ CompressionCodecPtr CompressionCodecFactory::get(
if (const auto * func = ast->as<ASTFunction>())
{
- Codecs codecs;
- codecs.reserve(func->arguments->children.size());
- for (const auto & inner_codec_ast : func->arguments->children)
- {
+ Codecs codecs;
+ codecs.reserve(func->arguments->children.size());
+ for (const auto & inner_codec_ast : func->arguments->children)
+ {
String codec_family_name;
ASTPtr codec_arguments;
- if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
+ if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
{
codec_family_name = family_name->name();
codec_arguments = {};
}
- else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())
+ else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())
{
codec_family_name = ast_func->name;
codec_arguments = ast_func->arguments;
}
- else
- throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
+ else
+ throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
CompressionCodecPtr codec;
if (codec_family_name == DEFAULT_CODEC_NAME)
@@ -81,119 +81,119 @@ CompressionCodecPtr CompressionCodecFactory::get(
continue;
codecs.emplace_back(codec);
- }
-
- CompressionCodecPtr res;
-
- if (codecs.size() == 1)
+ }
+
+ CompressionCodecPtr res;
+
+ if (codecs.size() == 1)
return codecs.back();
- else if (codecs.size() > 1)
+ else if (codecs.size() > 1)
return std::make_shared<CompressionCodecMultiple>(codecs);
else
return std::make_shared<CompressionCodecNone>();
- }
-
+ }
+
throw Exception("Unexpected AST structure for compression codec: " + queryToString(ast), ErrorCodes::UNEXPECTED_AST_STRUCTURE);
-}
-
-
-CompressionCodecPtr CompressionCodecFactory::get(const uint8_t byte_code) const
-{
- const auto family_code_and_creator = family_code_with_codec.find(byte_code);
-
- if (family_code_and_creator == family_code_with_codec.end())
- throw Exception("Unknown codec family code: " + toString(byte_code), ErrorCodes::UNKNOWN_CODEC);
-
- return family_code_and_creator->second({}, nullptr);
-}
-
-
+}
+
+
+CompressionCodecPtr CompressionCodecFactory::get(const uint8_t byte_code) const
+{
+ const auto family_code_and_creator = family_code_with_codec.find(byte_code);
+
+ if (family_code_and_creator == family_code_with_codec.end())
+ throw Exception("Unknown codec family code: " + toString(byte_code), ErrorCodes::UNKNOWN_CODEC);
+
+ return family_code_and_creator->second({}, nullptr);
+}
+
+
CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const
-{
- if (family_name == "Multiple")
- throw Exception("Codec Multiple cannot be specified directly", ErrorCodes::UNKNOWN_CODEC);
-
- const auto family_and_creator = family_name_with_codec.find(family_name);
-
- if (family_and_creator == family_name_with_codec.end())
- throw Exception("Unknown codec family: " + family_name, ErrorCodes::UNKNOWN_CODEC);
-
- return family_and_creator->second(arguments, column_type);
-}
-
-void CompressionCodecFactory::registerCompressionCodecWithType(
- const String & family_name,
- std::optional<uint8_t> byte_code,
- CreatorWithType creator)
-{
- if (creator == nullptr)
- throw Exception("CompressionCodecFactory: the codec family " + family_name + " has been provided a null constructor",
- ErrorCodes::LOGICAL_ERROR);
-
- if (!family_name_with_codec.emplace(family_name, creator).second)
- throw Exception("CompressionCodecFactory: the codec family name '" + family_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
-
- if (byte_code)
- if (!family_code_with_codec.emplace(*byte_code, creator).second)
- throw Exception("CompressionCodecFactory: the codec family code '" + std::to_string(*byte_code) + "' is not unique", ErrorCodes::LOGICAL_ERROR);
-}
-
-void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, Creator creator)
-{
+{
+ if (family_name == "Multiple")
+ throw Exception("Codec Multiple cannot be specified directly", ErrorCodes::UNKNOWN_CODEC);
+
+ const auto family_and_creator = family_name_with_codec.find(family_name);
+
+ if (family_and_creator == family_name_with_codec.end())
+ throw Exception("Unknown codec family: " + family_name, ErrorCodes::UNKNOWN_CODEC);
+
+ return family_and_creator->second(arguments, column_type);
+}
+
+void CompressionCodecFactory::registerCompressionCodecWithType(
+ const String & family_name,
+ std::optional<uint8_t> byte_code,
+ CreatorWithType creator)
+{
+ if (creator == nullptr)
+ throw Exception("CompressionCodecFactory: the codec family " + family_name + " has been provided a null constructor",
+ ErrorCodes::LOGICAL_ERROR);
+
+ if (!family_name_with_codec.emplace(family_name, creator).second)
+ throw Exception("CompressionCodecFactory: the codec family name '" + family_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
+
+ if (byte_code)
+ if (!family_code_with_codec.emplace(*byte_code, creator).second)
+ throw Exception("CompressionCodecFactory: the codec family code '" + std::to_string(*byte_code) + "' is not unique", ErrorCodes::LOGICAL_ERROR);
+}
+
+void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, Creator creator)
+{
registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, const IDataType * /* data_type */)
- {
- return creator(ast);
- });
-}
-
-void CompressionCodecFactory::registerSimpleCompressionCodec(
- const String & family_name,
- std::optional<uint8_t> byte_code,
- SimpleCreator creator)
-{
- registerCompressionCodec(family_name, byte_code, [family_name, creator](const ASTPtr & ast)
- {
- if (ast)
+ {
+ return creator(ast);
+ });
+}
+
+void CompressionCodecFactory::registerSimpleCompressionCodec(
+ const String & family_name,
+ std::optional<uint8_t> byte_code,
+ SimpleCreator creator)
+{
+ registerCompressionCodec(family_name, byte_code, [family_name, creator](const ASTPtr & ast)
+ {
+ if (ast)
throw Exception(ErrorCodes::DATA_TYPE_CANNOT_HAVE_ARGUMENTS, "Compression codec {} cannot have arguments", family_name);
- return creator();
- });
-}
-
-
-void registerCodecNone(CompressionCodecFactory & factory);
+ return creator();
+ });
+}
+
+
+void registerCodecNone(CompressionCodecFactory & factory);
void registerCodecLZ4(CompressionCodecFactory & factory);
/*
void registerCodecLZ4HC(CompressionCodecFactory & factory);
-void registerCodecZSTD(CompressionCodecFactory & factory);
-void registerCodecDelta(CompressionCodecFactory & factory);
-void registerCodecT64(CompressionCodecFactory & factory);
-void registerCodecDoubleDelta(CompressionCodecFactory & factory);
-void registerCodecGorilla(CompressionCodecFactory & factory);
+void registerCodecZSTD(CompressionCodecFactory & factory);
+void registerCodecDelta(CompressionCodecFactory & factory);
+void registerCodecT64(CompressionCodecFactory & factory);
+void registerCodecDoubleDelta(CompressionCodecFactory & factory);
+void registerCodecGorilla(CompressionCodecFactory & factory);
void registerCodecEncrypted(CompressionCodecFactory & factory);
void registerCodecMultiple(CompressionCodecFactory & factory);
*/
-
-CompressionCodecFactory::CompressionCodecFactory()
-{
- registerCodecLZ4(*this);
- registerCodecNone(*this);
+
+CompressionCodecFactory::CompressionCodecFactory()
+{
+ registerCodecLZ4(*this);
+ registerCodecNone(*this);
/*
- registerCodecZSTD(*this);
- registerCodecLZ4HC(*this);
- registerCodecDelta(*this);
- registerCodecT64(*this);
- registerCodecDoubleDelta(*this);
- registerCodecGorilla(*this);
+ registerCodecZSTD(*this);
+ registerCodecLZ4HC(*this);
+ registerCodecDelta(*this);
+ registerCodecT64(*this);
+ registerCodecDoubleDelta(*this);
+ registerCodecGorilla(*this);
registerCodecEncrypted(*this);
registerCodecMultiple(*this);
*/
default_codec = get("LZ4", {});
-}
-
-CompressionCodecFactory & CompressionCodecFactory::instance()
-{
- static CompressionCodecFactory ret;
- return ret;
-}
-
-}
+}
+
+CompressionCodecFactory & CompressionCodecFactory::instance()
+{
+ static CompressionCodecFactory ret;
+ return ret;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp
index f241eb29db..3b0cb92de5 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp
@@ -1,255 +1,255 @@
-#include "BackgroundSchedulePool.h"
-#include <Common/Exception.h>
-#include <Common/setThreadName.h>
-#include <Common/Stopwatch.h>
-#include <Common/CurrentThread.h>
-#include <common/logger_useful.h>
-#include <chrono>
+#include "BackgroundSchedulePool.h"
+#include <Common/Exception.h>
+#include <Common/setThreadName.h>
+#include <Common/Stopwatch.h>
+#include <Common/CurrentThread.h>
+#include <common/logger_useful.h>
+#include <chrono>
#include <common/scope_guard.h>
-
-
-namespace DB
-{
-
-
-class TaskNotification final : public Poco::Notification
-{
-public:
- explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task_) : task(task_) {}
- void execute() { task->execute(); }
-
-private:
- BackgroundSchedulePoolTaskInfoPtr task;
-};
-
-
-BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo(
- BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_)
- : pool(pool_), log_name(log_name_), function(function_)
-{
-}
-
-bool BackgroundSchedulePoolTaskInfo::schedule()
-{
- std::lock_guard lock(schedule_mutex);
-
- if (deactivated || scheduled)
- return false;
-
- scheduleImpl(lock);
- return true;
-}
-
-bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms, bool overwrite)
-{
- std::lock_guard lock(schedule_mutex);
-
- if (deactivated || scheduled)
- return false;
- if (delayed && !overwrite)
- return false;
-
- pool.scheduleDelayedTask(shared_from_this(), ms, lock);
- return true;
-}
-
-void BackgroundSchedulePoolTaskInfo::deactivate()
-{
- std::lock_guard lock_exec(exec_mutex);
- std::lock_guard lock_schedule(schedule_mutex);
-
- if (deactivated)
- return;
-
- deactivated = true;
- scheduled = false;
-
- if (delayed)
- pool.cancelDelayedTask(shared_from_this(), lock_schedule);
-}
-
-void BackgroundSchedulePoolTaskInfo::activate()
-{
- std::lock_guard lock(schedule_mutex);
- deactivated = false;
-}
-
-bool BackgroundSchedulePoolTaskInfo::activateAndSchedule()
-{
- std::lock_guard lock(schedule_mutex);
-
- deactivated = false;
- if (scheduled)
- return false;
-
- scheduleImpl(lock);
- return true;
-}
-
-void BackgroundSchedulePoolTaskInfo::execute()
-{
- Stopwatch watch;
- CurrentMetrics::Increment metric_increment{pool.tasks_metric};
-
- std::lock_guard lock_exec(exec_mutex);
-
- {
- std::lock_guard lock_schedule(schedule_mutex);
-
- if (deactivated)
- return;
-
- scheduled = false;
- executing = true;
- }
-
- function();
- UInt64 milliseconds = watch.elapsedMilliseconds();
-
- /// If the task is executed longer than specified time, it will be logged.
- static const int32_t slow_execution_threshold_ms = 200;
-
- if (milliseconds >= slow_execution_threshold_ms)
- LOG_TRACE(&Poco::Logger::get(log_name), "Execution took {} ms.", milliseconds);
-
- {
- std::lock_guard lock_schedule(schedule_mutex);
-
- executing = false;
-
- /// In case was scheduled while executing (including a scheduleAfter which expired) we schedule the task
- /// on the queue. We don't call the function again here because this way all tasks
- /// will have their chance to execute
-
- if (scheduled)
- pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
- }
-}
-
-void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
-{
- scheduled = true;
-
- if (delayed)
- pool.cancelDelayedTask(shared_from_this(), schedule_mutex_lock);
-
- /// If the task is not executing at the moment, enqueue it for immediate execution.
- /// But if it is currently executing, do nothing because it will be enqueued
- /// at the end of the execute() method.
- if (!executing)
- pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
-}
-
-Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback()
-{
- return [t = shared_from_this()](const Coordination::WatchResponse &)
- {
- t->schedule();
- };
-}
-
-
+
+
+namespace DB
+{
+
+
+class TaskNotification final : public Poco::Notification
+{
+public:
+ explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task_) : task(task_) {}
+ void execute() { task->execute(); }
+
+private:
+ BackgroundSchedulePoolTaskInfoPtr task;
+};
+
+
+BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo(
+ BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_)
+ : pool(pool_), log_name(log_name_), function(function_)
+{
+}
+
+bool BackgroundSchedulePoolTaskInfo::schedule()
+{
+ std::lock_guard lock(schedule_mutex);
+
+ if (deactivated || scheduled)
+ return false;
+
+ scheduleImpl(lock);
+ return true;
+}
+
+bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms, bool overwrite)
+{
+ std::lock_guard lock(schedule_mutex);
+
+ if (deactivated || scheduled)
+ return false;
+ if (delayed && !overwrite)
+ return false;
+
+ pool.scheduleDelayedTask(shared_from_this(), ms, lock);
+ return true;
+}
+
+void BackgroundSchedulePoolTaskInfo::deactivate()
+{
+ std::lock_guard lock_exec(exec_mutex);
+ std::lock_guard lock_schedule(schedule_mutex);
+
+ if (deactivated)
+ return;
+
+ deactivated = true;
+ scheduled = false;
+
+ if (delayed)
+ pool.cancelDelayedTask(shared_from_this(), lock_schedule);
+}
+
+void BackgroundSchedulePoolTaskInfo::activate()
+{
+ std::lock_guard lock(schedule_mutex);
+ deactivated = false;
+}
+
+bool BackgroundSchedulePoolTaskInfo::activateAndSchedule()
+{
+ std::lock_guard lock(schedule_mutex);
+
+ deactivated = false;
+ if (scheduled)
+ return false;
+
+ scheduleImpl(lock);
+ return true;
+}
+
+void BackgroundSchedulePoolTaskInfo::execute()
+{
+ Stopwatch watch;
+ CurrentMetrics::Increment metric_increment{pool.tasks_metric};
+
+ std::lock_guard lock_exec(exec_mutex);
+
+ {
+ std::lock_guard lock_schedule(schedule_mutex);
+
+ if (deactivated)
+ return;
+
+ scheduled = false;
+ executing = true;
+ }
+
+ function();
+ UInt64 milliseconds = watch.elapsedMilliseconds();
+
+ /// If the task is executed longer than specified time, it will be logged.
+ static const int32_t slow_execution_threshold_ms = 200;
+
+ if (milliseconds >= slow_execution_threshold_ms)
+ LOG_TRACE(&Poco::Logger::get(log_name), "Execution took {} ms.", milliseconds);
+
+ {
+ std::lock_guard lock_schedule(schedule_mutex);
+
+ executing = false;
+
+ /// In case was scheduled while executing (including a scheduleAfter which expired) we schedule the task
+ /// on the queue. We don't call the function again here because this way all tasks
+ /// will have their chance to execute
+
+ if (scheduled)
+ pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
+ }
+}
+
+void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
+{
+ scheduled = true;
+
+ if (delayed)
+ pool.cancelDelayedTask(shared_from_this(), schedule_mutex_lock);
+
+ /// If the task is not executing at the moment, enqueue it for immediate execution.
+ /// But if it is currently executing, do nothing because it will be enqueued
+ /// at the end of the execute() method.
+ if (!executing)
+ pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
+}
+
+Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback()
+{
+ return [t = shared_from_this()](const Coordination::WatchResponse &)
+ {
+ t->schedule();
+ };
+}
+
+
BackgroundSchedulePool::BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, const char *thread_name_)
- : size(size_)
- , tasks_metric(tasks_metric_)
- , thread_name(thread_name_)
-{
- LOG_INFO(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Create BackgroundSchedulePool with {} threads", size);
-
- threads.resize(size);
- for (auto & thread : threads)
- thread = ThreadFromGlobalPool([this] { threadFunction(); });
-
- delayed_thread = ThreadFromGlobalPool([this] { delayExecutionThreadFunction(); });
-}
-
-
-BackgroundSchedulePool::~BackgroundSchedulePool()
-{
- try
- {
- {
- std::unique_lock lock(delayed_tasks_mutex);
- shutdown = true;
- wakeup_cond.notify_all();
- }
-
- queue.wakeUpAll();
- delayed_thread.join();
-
- LOG_TRACE(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Waiting for threads to finish.");
- for (auto & thread : threads)
- thread.join();
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
-}
-
-
-BackgroundSchedulePool::TaskHolder BackgroundSchedulePool::createTask(const std::string & name, const TaskFunc & function)
-{
- return TaskHolder(std::make_shared<TaskInfo>(*this, name, function));
-}
-
-
-void BackgroundSchedulePool::scheduleDelayedTask(const TaskInfoPtr & task, size_t ms, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */)
-{
- Poco::Timestamp current_time;
-
- {
- std::lock_guard lock(delayed_tasks_mutex);
-
- if (task->delayed)
- delayed_tasks.erase(task->iterator);
-
- task->iterator = delayed_tasks.emplace(current_time + (ms * 1000), task);
- task->delayed = true;
- }
-
- wakeup_cond.notify_all();
-}
-
-
-void BackgroundSchedulePool::cancelDelayedTask(const TaskInfoPtr & task, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */)
-{
- {
- std::lock_guard lock(delayed_tasks_mutex);
- delayed_tasks.erase(task->iterator);
- task->delayed = false;
- }
-
- wakeup_cond.notify_all();
-}
-
-
-void BackgroundSchedulePool::attachToThreadGroup()
-{
- std::lock_guard lock(delayed_tasks_mutex);
-
- if (thread_group)
- {
- /// Put all threads to one thread pool
- CurrentThread::attachTo(thread_group);
- }
- else
- {
- CurrentThread::initializeQuery();
- thread_group = CurrentThread::getGroup();
- }
-}
-
-
-void BackgroundSchedulePool::threadFunction()
-{
- setThreadName(thread_name.c_str());
-
- attachToThreadGroup();
- SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
-
- while (!shutdown)
- {
+ : size(size_)
+ , tasks_metric(tasks_metric_)
+ , thread_name(thread_name_)
+{
+ LOG_INFO(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Create BackgroundSchedulePool with {} threads", size);
+
+ threads.resize(size);
+ for (auto & thread : threads)
+ thread = ThreadFromGlobalPool([this] { threadFunction(); });
+
+ delayed_thread = ThreadFromGlobalPool([this] { delayExecutionThreadFunction(); });
+}
+
+
+BackgroundSchedulePool::~BackgroundSchedulePool()
+{
+ try
+ {
+ {
+ std::unique_lock lock(delayed_tasks_mutex);
+ shutdown = true;
+ wakeup_cond.notify_all();
+ }
+
+ queue.wakeUpAll();
+ delayed_thread.join();
+
+ LOG_TRACE(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Waiting for threads to finish.");
+ for (auto & thread : threads)
+ thread.join();
+ }
+ catch (...)
+ {
+ tryLogCurrentException(__PRETTY_FUNCTION__);
+ }
+}
+
+
+BackgroundSchedulePool::TaskHolder BackgroundSchedulePool::createTask(const std::string & name, const TaskFunc & function)
+{
+ return TaskHolder(std::make_shared<TaskInfo>(*this, name, function));
+}
+
+
+void BackgroundSchedulePool::scheduleDelayedTask(const TaskInfoPtr & task, size_t ms, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */)
+{
+ Poco::Timestamp current_time;
+
+ {
+ std::lock_guard lock(delayed_tasks_mutex);
+
+ if (task->delayed)
+ delayed_tasks.erase(task->iterator);
+
+ task->iterator = delayed_tasks.emplace(current_time + (ms * 1000), task);
+ task->delayed = true;
+ }
+
+ wakeup_cond.notify_all();
+}
+
+
+void BackgroundSchedulePool::cancelDelayedTask(const TaskInfoPtr & task, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */)
+{
+ {
+ std::lock_guard lock(delayed_tasks_mutex);
+ delayed_tasks.erase(task->iterator);
+ task->delayed = false;
+ }
+
+ wakeup_cond.notify_all();
+}
+
+
+void BackgroundSchedulePool::attachToThreadGroup()
+{
+ std::lock_guard lock(delayed_tasks_mutex);
+
+ if (thread_group)
+ {
+ /// Put all threads to one thread pool
+ CurrentThread::attachTo(thread_group);
+ }
+ else
+ {
+ CurrentThread::initializeQuery();
+ thread_group = CurrentThread::getGroup();
+ }
+}
+
+
+void BackgroundSchedulePool::threadFunction()
+{
+ setThreadName(thread_name.c_str());
+
+ attachToThreadGroup();
+ SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
+
+ while (!shutdown)
+ {
/// We have to wait with timeout to prevent very rare deadlock, caused by the following race condition:
/// 1. Background thread N: threadFunction(): checks for shutdown (it's false)
/// 2. Main thread: ~BackgroundSchedulePool(): sets shutdown to true, calls queue.wakeUpAll(), it triggers
@@ -260,65 +260,65 @@ void BackgroundSchedulePool::threadFunction()
/// TODO Do we really need Poco::NotificationQueue? Why not to use std::queue + mutex + condvar or maybe even DB::ThreadPool?
constexpr size_t wait_timeout_ms = 500;
if (Poco::AutoPtr<Poco::Notification> notification = queue.waitDequeueNotification(wait_timeout_ms))
- {
- TaskNotification & task_notification = static_cast<TaskNotification &>(*notification);
- task_notification.execute();
- }
- }
-}
-
-
-void BackgroundSchedulePool::delayExecutionThreadFunction()
-{
- setThreadName((thread_name + "/D").c_str());
-
- attachToThreadGroup();
- SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
-
- while (!shutdown)
- {
- TaskInfoPtr task;
- bool found = false;
-
- {
- std::unique_lock lock(delayed_tasks_mutex);
-
- while (!shutdown)
- {
- Poco::Timestamp min_time;
-
- if (!delayed_tasks.empty())
- {
- auto t = delayed_tasks.begin();
- min_time = t->first;
- task = t->second;
- }
-
- if (!task)
- {
- wakeup_cond.wait(lock);
- continue;
- }
-
- Poco::Timestamp current_time;
-
- if (min_time > current_time)
- {
- wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time));
- continue;
- }
- else
- {
- /// We have a task ready for execution
- found = true;
- break;
- }
- }
- }
-
- if (found)
- task->schedule();
- }
-}
-
-}
+ {
+ TaskNotification & task_notification = static_cast<TaskNotification &>(*notification);
+ task_notification.execute();
+ }
+ }
+}
+
+
+void BackgroundSchedulePool::delayExecutionThreadFunction()
+{
+ setThreadName((thread_name + "/D").c_str());
+
+ attachToThreadGroup();
+ SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
+
+ while (!shutdown)
+ {
+ TaskInfoPtr task;
+ bool found = false;
+
+ {
+ std::unique_lock lock(delayed_tasks_mutex);
+
+ while (!shutdown)
+ {
+ Poco::Timestamp min_time;
+
+ if (!delayed_tasks.empty())
+ {
+ auto t = delayed_tasks.begin();
+ min_time = t->first;
+ task = t->second;
+ }
+
+ if (!task)
+ {
+ wakeup_cond.wait(lock);
+ continue;
+ }
+
+ Poco::Timestamp current_time;
+
+ if (min_time > current_time)
+ {
+ wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time));
+ continue;
+ }
+ else
+ {
+ /// We have a task ready for execution
+ found = true;
+ break;
+ }
+ }
+ }
+
+ if (found)
+ task->schedule();
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h
index 092824c069..0e7e891118 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h
@@ -1,173 +1,173 @@
-#pragma once
-
-#include <Poco/Notification.h>
-#include <Poco/NotificationQueue.h>
-#include <Poco/Timestamp.h>
-#include <thread>
-#include <atomic>
-#include <mutex>
-#include <condition_variable>
-#include <vector>
-#include <map>
-#include <functional>
-#include <boost/noncopyable.hpp>
-#include <Common/ZooKeeper/Types.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/CurrentThread.h>
-#include <Common/ThreadPool.h>
-
-
-namespace DB
-{
-
-class TaskNotification;
-class BackgroundSchedulePoolTaskInfo;
-class BackgroundSchedulePoolTaskHolder;
-
-
-/** Executes functions scheduled at a specific point in time.
- * Basically all tasks are added in a queue and precessed by worker threads.
- *
- * The most important difference between this and BackgroundProcessingPool
- * is that we have the guarantee that the same function is not executed from many workers in the same time.
- *
- * The usage scenario: instead starting a separate thread for each task,
- * register a task in BackgroundSchedulePool and when you need to run the task,
- * call schedule or scheduleAfter(duration) method.
- */
-class BackgroundSchedulePool
-{
-public:
- friend class BackgroundSchedulePoolTaskInfo;
-
- using TaskInfo = BackgroundSchedulePoolTaskInfo;
- using TaskInfoPtr = std::shared_ptr<TaskInfo>;
- using TaskFunc = std::function<void()>;
- using TaskHolder = BackgroundSchedulePoolTaskHolder;
- using DelayedTasks = std::multimap<Poco::Timestamp, TaskInfoPtr>;
-
- TaskHolder createTask(const std::string & log_name, const TaskFunc & function);
-
- size_t getNumberOfThreads() const { return size; }
-
- /// thread_name_ cannot be longer then 13 bytes (2 bytes is reserved for "/D" suffix for delayExecutionThreadFunction())
+#pragma once
+
+#include <Poco/Notification.h>
+#include <Poco/NotificationQueue.h>
+#include <Poco/Timestamp.h>
+#include <thread>
+#include <atomic>
+#include <mutex>
+#include <condition_variable>
+#include <vector>
+#include <map>
+#include <functional>
+#include <boost/noncopyable.hpp>
+#include <Common/ZooKeeper/Types.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/CurrentThread.h>
+#include <Common/ThreadPool.h>
+
+
+namespace DB
+{
+
+class TaskNotification;
+class BackgroundSchedulePoolTaskInfo;
+class BackgroundSchedulePoolTaskHolder;
+
+
+/** Executes functions scheduled at a specific point in time.
+ * Basically all tasks are added in a queue and precessed by worker threads.
+ *
+ * The most important difference between this and BackgroundProcessingPool
+ * is that we have the guarantee that the same function is not executed from many workers in the same time.
+ *
+ * The usage scenario: instead starting a separate thread for each task,
+ * register a task in BackgroundSchedulePool and when you need to run the task,
+ * call schedule or scheduleAfter(duration) method.
+ */
+class BackgroundSchedulePool
+{
+public:
+ friend class BackgroundSchedulePoolTaskInfo;
+
+ using TaskInfo = BackgroundSchedulePoolTaskInfo;
+ using TaskInfoPtr = std::shared_ptr<TaskInfo>;
+ using TaskFunc = std::function<void()>;
+ using TaskHolder = BackgroundSchedulePoolTaskHolder;
+ using DelayedTasks = std::multimap<Poco::Timestamp, TaskInfoPtr>;
+
+ TaskHolder createTask(const std::string & log_name, const TaskFunc & function);
+
+ size_t getNumberOfThreads() const { return size; }
+
+ /// thread_name_ cannot be longer then 13 bytes (2 bytes is reserved for "/D" suffix for delayExecutionThreadFunction())
BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, const char *thread_name_);
- ~BackgroundSchedulePool();
-
-private:
- using Threads = std::vector<ThreadFromGlobalPool>;
-
- void threadFunction();
- void delayExecutionThreadFunction();
-
- /// Schedule task for execution after specified delay from now.
- void scheduleDelayedTask(const TaskInfoPtr & task_info, size_t ms, std::lock_guard<std::mutex> & task_schedule_mutex_lock);
-
- /// Remove task, that was scheduled with delay, from schedule.
- void cancelDelayedTask(const TaskInfoPtr & task_info, std::lock_guard<std::mutex> & task_schedule_mutex_lock);
-
- /// Number for worker threads.
- const size_t size;
- std::atomic<bool> shutdown {false};
- Threads threads;
- Poco::NotificationQueue queue;
-
- /// Delayed notifications.
-
- std::condition_variable wakeup_cond;
- std::mutex delayed_tasks_mutex;
- /// Thread waiting for next delayed task.
- ThreadFromGlobalPool delayed_thread;
- /// Tasks ordered by scheduled time.
- DelayedTasks delayed_tasks;
-
- /// Thread group used for profiling purposes
- ThreadGroupStatusPtr thread_group;
-
- CurrentMetrics::Metric tasks_metric;
- std::string thread_name;
-
- void attachToThreadGroup();
-};
-
-
-class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this<BackgroundSchedulePoolTaskInfo>, private boost::noncopyable
-{
-public:
- BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_);
-
- /// Schedule for execution as soon as possible (if not already scheduled).
- /// If the task was already scheduled with delay, the delay will be ignored.
- bool schedule();
-
- /// Schedule for execution after specified delay.
- /// If overwrite is set then the task will be re-scheduled (if it was already scheduled, i.e. delayed == true).
- bool scheduleAfter(size_t ms, bool overwrite = true);
-
- /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
- void deactivate();
-
- void activate();
-
- /// Atomically activate task and schedule it for execution.
- bool activateAndSchedule();
-
- /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
- Coordination::WatchCallback getWatchCallback();
-
-private:
- friend class TaskNotification;
- friend class BackgroundSchedulePool;
-
- void execute();
-
- void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
-
- BackgroundSchedulePool & pool;
- std::string log_name;
- BackgroundSchedulePool::TaskFunc function;
-
- std::mutex exec_mutex;
- std::mutex schedule_mutex;
-
- /// Invariants:
- /// * If deactivated is true then scheduled, delayed and executing are all false.
- /// * scheduled and delayed cannot be true at the same time.
- bool deactivated = false;
- bool scheduled = false;
- bool delayed = false;
- bool executing = false;
-
- /// If the task is scheduled with delay, points to element of delayed_tasks.
- BackgroundSchedulePool::DelayedTasks::iterator iterator;
-};
-
-using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr<BackgroundSchedulePoolTaskInfo>;
-
-
-class BackgroundSchedulePoolTaskHolder
-{
-public:
- BackgroundSchedulePoolTaskHolder() = default;
- explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {}
- BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete;
- BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
- BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete;
- BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
-
- ~BackgroundSchedulePoolTaskHolder()
- {
- if (task_info)
- task_info->deactivate();
- }
-
- operator bool() const { return task_info != nullptr; }
-
- BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); }
- const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); }
-
-private:
- BackgroundSchedulePoolTaskInfoPtr task_info;
-};
-
-}
+ ~BackgroundSchedulePool();
+
+private:
+ using Threads = std::vector<ThreadFromGlobalPool>;
+
+ void threadFunction();
+ void delayExecutionThreadFunction();
+
+ /// Schedule task for execution after specified delay from now.
+ void scheduleDelayedTask(const TaskInfoPtr & task_info, size_t ms, std::lock_guard<std::mutex> & task_schedule_mutex_lock);
+
+ /// Remove task, that was scheduled with delay, from schedule.
+ void cancelDelayedTask(const TaskInfoPtr & task_info, std::lock_guard<std::mutex> & task_schedule_mutex_lock);
+
+ /// Number for worker threads.
+ const size_t size;
+ std::atomic<bool> shutdown {false};
+ Threads threads;
+ Poco::NotificationQueue queue;
+
+ /// Delayed notifications.
+
+ std::condition_variable wakeup_cond;
+ std::mutex delayed_tasks_mutex;
+ /// Thread waiting for next delayed task.
+ ThreadFromGlobalPool delayed_thread;
+ /// Tasks ordered by scheduled time.
+ DelayedTasks delayed_tasks;
+
+ /// Thread group used for profiling purposes
+ ThreadGroupStatusPtr thread_group;
+
+ CurrentMetrics::Metric tasks_metric;
+ std::string thread_name;
+
+ void attachToThreadGroup();
+};
+
+
+class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this<BackgroundSchedulePoolTaskInfo>, private boost::noncopyable
+{
+public:
+ BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_);
+
+ /// Schedule for execution as soon as possible (if not already scheduled).
+ /// If the task was already scheduled with delay, the delay will be ignored.
+ bool schedule();
+
+ /// Schedule for execution after specified delay.
+ /// If overwrite is set then the task will be re-scheduled (if it was already scheduled, i.e. delayed == true).
+ bool scheduleAfter(size_t ms, bool overwrite = true);
+
+ /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
+ void deactivate();
+
+ void activate();
+
+ /// Atomically activate task and schedule it for execution.
+ bool activateAndSchedule();
+
+ /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
+ Coordination::WatchCallback getWatchCallback();
+
+private:
+ friend class TaskNotification;
+ friend class BackgroundSchedulePool;
+
+ void execute();
+
+ void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
+
+ BackgroundSchedulePool & pool;
+ std::string log_name;
+ BackgroundSchedulePool::TaskFunc function;
+
+ std::mutex exec_mutex;
+ std::mutex schedule_mutex;
+
+ /// Invariants:
+ /// * If deactivated is true then scheduled, delayed and executing are all false.
+ /// * scheduled and delayed cannot be true at the same time.
+ bool deactivated = false;
+ bool scheduled = false;
+ bool delayed = false;
+ bool executing = false;
+
+ /// If the task is scheduled with delay, points to element of delayed_tasks.
+ BackgroundSchedulePool::DelayedTasks::iterator iterator;
+};
+
+using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr<BackgroundSchedulePoolTaskInfo>;
+
+
+class BackgroundSchedulePoolTaskHolder
+{
+public:
+ BackgroundSchedulePoolTaskHolder() = default;
+ explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {}
+ BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete;
+ BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
+ BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete;
+ BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
+
+ ~BackgroundSchedulePoolTaskHolder()
+ {
+ if (task_info)
+ task_info->deactivate();
+ }
+
+ operator bool() const { return task_info != nullptr; }
+
+ BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); }
+ const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); }
+
+private:
+ BackgroundSchedulePoolTaskInfoPtr task_info;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h
index 5b017cd463..1c42b211a3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/DecimalComparison.h
@@ -1,320 +1,320 @@
-#pragma once
-
-#include <common/arithmeticOverflow.h>
-#include <Core/Block.h>
-#include <Core/AccurateComparison.h>
-#include <Core/callOnTypeIndex.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypesDecimal.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnConst.h>
-#include <Functions/FunctionHelpers.h> /// TODO Core should not depend on Functions
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int DECIMAL_OVERFLOW;
-}
-
-
-inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataTypePtr & right_type)
-{
- if (isColumnedAsDecimal(left_type))
- {
- if (isColumnedAsDecimal(right_type) || isNotDecimalButComparableToDecimal(right_type))
- return true;
- }
- else if (isNotDecimalButComparableToDecimal(left_type) && isColumnedAsDecimal(right_type))
+#pragma once
+
+#include <common/arithmeticOverflow.h>
+#include <Core/Block.h>
+#include <Core/AccurateComparison.h>
+#include <Core/callOnTypeIndex.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnConst.h>
+#include <Functions/FunctionHelpers.h> /// TODO Core should not depend on Functions
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int DECIMAL_OVERFLOW;
+}
+
+
+inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataTypePtr & right_type)
+{
+ if (isColumnedAsDecimal(left_type))
+ {
+ if (isColumnedAsDecimal(right_type) || isNotDecimalButComparableToDecimal(right_type))
+ return true;
+ }
+ else if (isNotDecimalButComparableToDecimal(left_type) && isColumnedAsDecimal(right_type))
{
- return true;
+ return true;
}
- return false;
-}
-
+ return false;
+}
+
template <size_t> struct ConstructDecInt;
template <> struct ConstructDecInt<1> { using Type = Int32; };
template <> struct ConstructDecInt<2> { using Type = Int32; };
template <> struct ConstructDecInt<4> { using Type = Int32; };
-template <> struct ConstructDecInt<8> { using Type = Int64; };
-template <> struct ConstructDecInt<16> { using Type = Int128; };
+template <> struct ConstructDecInt<8> { using Type = Int64; };
+template <> struct ConstructDecInt<16> { using Type = Int128; };
template <> struct ConstructDecInt<32> { using Type = Int256; };
-
-template <typename T, typename U>
-struct DecCompareInt
-{
- using Type = typename ConstructDecInt<(!IsDecimalNumber<U> || sizeof(T) > sizeof(U)) ? sizeof(T) : sizeof(U)>::Type;
- using TypeA = Type;
- using TypeB = Type;
-};
-
-///
-template <typename A, typename B, template <typename, typename> typename Operation, bool _check_overflow = true,
- bool _actual = IsDecimalNumber<A> || IsDecimalNumber<B>>
-class DecimalComparison
-{
-public:
- using CompareInt = typename DecCompareInt<A, B>::Type;
- using Op = Operation<CompareInt, CompareInt>;
- using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>;
- using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>;
-
- using ArrayA = typename ColVecA::Container;
- using ArrayB = typename ColVecB::Container;
-
+
+template <typename T, typename U>
+struct DecCompareInt
+{
+ using Type = typename ConstructDecInt<(!IsDecimalNumber<U> || sizeof(T) > sizeof(U)) ? sizeof(T) : sizeof(U)>::Type;
+ using TypeA = Type;
+ using TypeB = Type;
+};
+
+///
+template <typename A, typename B, template <typename, typename> typename Operation, bool _check_overflow = true,
+ bool _actual = IsDecimalNumber<A> || IsDecimalNumber<B>>
+class DecimalComparison
+{
+public:
+ using CompareInt = typename DecCompareInt<A, B>::Type;
+ using Op = Operation<CompareInt, CompareInt>;
+ using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>;
+ using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>;
+
+ using ArrayA = typename ColVecA::Container;
+ using ArrayB = typename ColVecB::Container;
+
static ColumnPtr apply(const ColumnWithTypeAndName & col_left, const ColumnWithTypeAndName & col_right)
- {
- if constexpr (_actual)
- {
- ColumnPtr c_res;
- Shift shift = getScales<A, B>(col_left.type, col_right.type);
-
+ {
+ if constexpr (_actual)
+ {
+ ColumnPtr c_res;
+ Shift shift = getScales<A, B>(col_left.type, col_right.type);
+
return applyWithScale(col_left.column, col_right.column, shift);
- }
+ }
else
return nullptr;
- }
-
- static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b)
- {
+ }
+
+ static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b)
+ {
static const UInt32 max_scale = DecimalUtils::max_precision<Decimal256>;
- if (scale_a > max_scale || scale_b > max_scale)
- throw Exception("Bad scale of decimal field", ErrorCodes::DECIMAL_OVERFLOW);
-
- Shift shift;
- if (scale_a < scale_b)
+ if (scale_a > max_scale || scale_b > max_scale)
+ throw Exception("Bad scale of decimal field", ErrorCodes::DECIMAL_OVERFLOW);
+
+ Shift shift;
+ if (scale_a < scale_b)
shift.a = static_cast<CompareInt>(DecimalUtils::scaleMultiplier<B>(scale_b - scale_a));
- if (scale_a > scale_b)
+ if (scale_a > scale_b)
shift.b = static_cast<CompareInt>(DecimalUtils::scaleMultiplier<A>(scale_a - scale_b));
-
- return applyWithScale(a, b, shift);
- }
-
-private:
- struct Shift
- {
- CompareInt a = 1;
- CompareInt b = 1;
-
- bool none() const { return a == 1 && b == 1; }
- bool left() const { return a != 1; }
- bool right() const { return b != 1; }
- };
-
- template <typename T, typename U>
- static auto applyWithScale(T a, U b, const Shift & shift)
- {
- if (shift.left())
- return apply<true, false>(a, b, shift.a);
- else if (shift.right())
- return apply<false, true>(a, b, shift.b);
- return apply<false, false>(a, b, 1);
- }
-
- template <typename T, typename U>
- static std::enable_if_t<IsDecimalNumber<T> && IsDecimalNumber<U>, Shift>
- getScales(const DataTypePtr & left_type, const DataTypePtr & right_type)
- {
+
+ return applyWithScale(a, b, shift);
+ }
+
+private:
+ struct Shift
+ {
+ CompareInt a = 1;
+ CompareInt b = 1;
+
+ bool none() const { return a == 1 && b == 1; }
+ bool left() const { return a != 1; }
+ bool right() const { return b != 1; }
+ };
+
+ template <typename T, typename U>
+ static auto applyWithScale(T a, U b, const Shift & shift)
+ {
+ if (shift.left())
+ return apply<true, false>(a, b, shift.a);
+ else if (shift.right())
+ return apply<false, true>(a, b, shift.b);
+ return apply<false, false>(a, b, 1);
+ }
+
+ template <typename T, typename U>
+ static std::enable_if_t<IsDecimalNumber<T> && IsDecimalNumber<U>, Shift>
+ getScales(const DataTypePtr & left_type, const DataTypePtr & right_type)
+ {
const DataTypeDecimalBase<T> * decimal0 = checkDecimalBase<T>(*left_type);
const DataTypeDecimalBase<U> * decimal1 = checkDecimalBase<U>(*right_type);
-
- Shift shift;
- if (decimal0 && decimal1)
- {
+
+ Shift shift;
+ if (decimal0 && decimal1)
+ {
auto result_type = DecimalUtils::binaryOpResult<false, false>(*decimal0, *decimal1);
shift.a = static_cast<CompareInt>(result_type.scaleFactorFor(decimal0->getTrait(), false).value);
shift.b = static_cast<CompareInt>(result_type.scaleFactorFor(decimal1->getTrait(), false).value);
- }
- else if (decimal0)
+ }
+ else if (decimal0)
shift.b = static_cast<CompareInt>(decimal0->getScaleMultiplier().value);
- else if (decimal1)
+ else if (decimal1)
shift.a = static_cast<CompareInt>(decimal1->getScaleMultiplier().value);
-
- return shift;
- }
-
- template <typename T, typename U>
- static std::enable_if_t<IsDecimalNumber<T> && !IsDecimalNumber<U>, Shift>
- getScales(const DataTypePtr & left_type, const DataTypePtr &)
- {
- Shift shift;
+
+ return shift;
+ }
+
+ template <typename T, typename U>
+ static std::enable_if_t<IsDecimalNumber<T> && !IsDecimalNumber<U>, Shift>
+ getScales(const DataTypePtr & left_type, const DataTypePtr &)
+ {
+ Shift shift;
const DataTypeDecimalBase<T> * decimal0 = checkDecimalBase<T>(*left_type);
- if (decimal0)
+ if (decimal0)
shift.b = static_cast<CompareInt>(decimal0->getScaleMultiplier().value);
- return shift;
- }
-
- template <typename T, typename U>
- static std::enable_if_t<!IsDecimalNumber<T> && IsDecimalNumber<U>, Shift>
- getScales(const DataTypePtr &, const DataTypePtr & right_type)
- {
- Shift shift;
+ return shift;
+ }
+
+ template <typename T, typename U>
+ static std::enable_if_t<!IsDecimalNumber<T> && IsDecimalNumber<U>, Shift>
+ getScales(const DataTypePtr &, const DataTypePtr & right_type)
+ {
+ Shift shift;
const DataTypeDecimalBase<U> * decimal1 = checkDecimalBase<U>(*right_type);
- if (decimal1)
+ if (decimal1)
shift.a = static_cast<CompareInt>(decimal1->getScaleMultiplier().value);
- return shift;
- }
-
- template <bool scale_left, bool scale_right>
- static ColumnPtr apply(const ColumnPtr & c0, const ColumnPtr & c1, CompareInt scale)
- {
- auto c_res = ColumnUInt8::create();
-
- if constexpr (_actual)
- {
- bool c0_is_const = isColumnConst(*c0);
- bool c1_is_const = isColumnConst(*c1);
-
- if (c0_is_const && c1_is_const)
- {
- const ColumnConst * c0_const = checkAndGetColumnConst<ColVecA>(c0.get());
- const ColumnConst * c1_const = checkAndGetColumnConst<ColVecB>(c1.get());
-
- A a = c0_const->template getValue<A>();
- B b = c1_const->template getValue<B>();
- UInt8 res = apply<scale_left, scale_right>(a, b, scale);
- return DataTypeUInt8().createColumnConst(c0->size(), toField(res));
- }
-
- ColumnUInt8::Container & vec_res = c_res->getData();
- vec_res.resize(c0->size());
-
- if (c0_is_const)
- {
- const ColumnConst * c0_const = checkAndGetColumnConst<ColVecA>(c0.get());
- A a = c0_const->template getValue<A>();
- if (const ColVecB * c1_vec = checkAndGetColumn<ColVecB>(c1.get()))
- constantVector<scale_left, scale_right>(a, c1_vec->getData(), vec_res, scale);
- else
- throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR);
- }
- else if (c1_is_const)
- {
- const ColumnConst * c1_const = checkAndGetColumnConst<ColVecB>(c1.get());
- B b = c1_const->template getValue<B>();
- if (const ColVecA * c0_vec = checkAndGetColumn<ColVecA>(c0.get()))
- vectorConstant<scale_left, scale_right>(c0_vec->getData(), b, vec_res, scale);
- else
- throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR);
- }
- else
- {
- if (const ColVecA * c0_vec = checkAndGetColumn<ColVecA>(c0.get()))
- {
- if (const ColVecB * c1_vec = checkAndGetColumn<ColVecB>(c1.get()))
- vectorVector<scale_left, scale_right>(c0_vec->getData(), c1_vec->getData(), vec_res, scale);
- else
- throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR);
- }
- else
- throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR);
- }
- }
-
- return c_res;
- }
-
- template <bool scale_left, bool scale_right>
- static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]])
- {
+ return shift;
+ }
+
+ template <bool scale_left, bool scale_right>
+ static ColumnPtr apply(const ColumnPtr & c0, const ColumnPtr & c1, CompareInt scale)
+ {
+ auto c_res = ColumnUInt8::create();
+
+ if constexpr (_actual)
+ {
+ bool c0_is_const = isColumnConst(*c0);
+ bool c1_is_const = isColumnConst(*c1);
+
+ if (c0_is_const && c1_is_const)
+ {
+ const ColumnConst * c0_const = checkAndGetColumnConst<ColVecA>(c0.get());
+ const ColumnConst * c1_const = checkAndGetColumnConst<ColVecB>(c1.get());
+
+ A a = c0_const->template getValue<A>();
+ B b = c1_const->template getValue<B>();
+ UInt8 res = apply<scale_left, scale_right>(a, b, scale);
+ return DataTypeUInt8().createColumnConst(c0->size(), toField(res));
+ }
+
+ ColumnUInt8::Container & vec_res = c_res->getData();
+ vec_res.resize(c0->size());
+
+ if (c0_is_const)
+ {
+ const ColumnConst * c0_const = checkAndGetColumnConst<ColVecA>(c0.get());
+ A a = c0_const->template getValue<A>();
+ if (const ColVecB * c1_vec = checkAndGetColumn<ColVecB>(c1.get()))
+ constantVector<scale_left, scale_right>(a, c1_vec->getData(), vec_res, scale);
+ else
+ throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR);
+ }
+ else if (c1_is_const)
+ {
+ const ColumnConst * c1_const = checkAndGetColumnConst<ColVecB>(c1.get());
+ B b = c1_const->template getValue<B>();
+ if (const ColVecA * c0_vec = checkAndGetColumn<ColVecA>(c0.get()))
+ vectorConstant<scale_left, scale_right>(c0_vec->getData(), b, vec_res, scale);
+ else
+ throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR);
+ }
+ else
+ {
+ if (const ColVecA * c0_vec = checkAndGetColumn<ColVecA>(c0.get()))
+ {
+ if (const ColVecB * c1_vec = checkAndGetColumn<ColVecB>(c1.get()))
+ vectorVector<scale_left, scale_right>(c0_vec->getData(), c1_vec->getData(), vec_res, scale);
+ else
+ throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR);
+ }
+ else
+ throw Exception("Wrong column in Decimal comparison", ErrorCodes::LOGICAL_ERROR);
+ }
+ }
+
+ return c_res;
+ }
+
+ template <bool scale_left, bool scale_right>
+ static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]])
+ {
CompareInt x;
if constexpr (IsDecimalNumber<A>)
x = a.value;
else
x = a;
-
+
CompareInt y;
if constexpr (IsDecimalNumber<B>)
y = b.value;
else
y = b;
- if constexpr (_check_overflow)
- {
- bool overflow = false;
-
- if constexpr (sizeof(A) > sizeof(CompareInt))
+ if constexpr (_check_overflow)
+ {
+ bool overflow = false;
+
+ if constexpr (sizeof(A) > sizeof(CompareInt))
overflow |= (static_cast<A>(x) != a);
- if constexpr (sizeof(B) > sizeof(CompareInt))
+ if constexpr (sizeof(B) > sizeof(CompareInt))
overflow |= (static_cast<B>(y) != b);
- if constexpr (is_unsigned_v<A>)
- overflow |= (x < 0);
- if constexpr (is_unsigned_v<B>)
- overflow |= (y < 0);
-
- if constexpr (scale_left)
- overflow |= common::mulOverflow(x, scale, x);
- if constexpr (scale_right)
- overflow |= common::mulOverflow(y, scale, y);
-
- if (overflow)
+ if constexpr (is_unsigned_v<A>)
+ overflow |= (x < 0);
+ if constexpr (is_unsigned_v<B>)
+ overflow |= (y < 0);
+
+ if constexpr (scale_left)
+ overflow |= common::mulOverflow(x, scale, x);
+ if constexpr (scale_right)
+ overflow |= common::mulOverflow(y, scale, y);
+
+ if (overflow)
throw Exception("Can't compare decimal number due to overflow", ErrorCodes::DECIMAL_OVERFLOW);
- }
- else
- {
- if constexpr (scale_left)
+ }
+ else
+ {
+ if constexpr (scale_left)
x = common::mulIgnoreOverflow(x, scale);
- if constexpr (scale_right)
+ if constexpr (scale_right)
y = common::mulIgnoreOverflow(y, scale);
- }
-
- return Op::apply(x, y);
- }
-
- template <bool scale_left, bool scale_right>
- static void NO_INLINE vectorVector(const ArrayA & a, const ArrayB & b, PaddedPODArray<UInt8> & c,
- CompareInt scale)
- {
- size_t size = a.size();
- const A * a_pos = a.data();
- const B * b_pos = b.data();
- UInt8 * c_pos = c.data();
- const A * a_end = a_pos + size;
-
- while (a_pos < a_end)
- {
- *c_pos = apply<scale_left, scale_right>(*a_pos, *b_pos, scale);
- ++a_pos;
- ++b_pos;
- ++c_pos;
- }
- }
-
- template <bool scale_left, bool scale_right>
- static void NO_INLINE vectorConstant(const ArrayA & a, B b, PaddedPODArray<UInt8> & c, CompareInt scale)
- {
- size_t size = a.size();
- const A * a_pos = a.data();
- UInt8 * c_pos = c.data();
- const A * a_end = a_pos + size;
-
- while (a_pos < a_end)
- {
- *c_pos = apply<scale_left, scale_right>(*a_pos, b, scale);
- ++a_pos;
- ++c_pos;
- }
- }
-
- template <bool scale_left, bool scale_right>
- static void NO_INLINE constantVector(A a, const ArrayB & b, PaddedPODArray<UInt8> & c, CompareInt scale)
- {
- size_t size = b.size();
- const B * b_pos = b.data();
- UInt8 * c_pos = c.data();
- const B * b_end = b_pos + size;
-
- while (b_pos < b_end)
- {
- *c_pos = apply<scale_left, scale_right>(a, *b_pos, scale);
- ++b_pos;
- ++c_pos;
- }
- }
-};
-
-}
+ }
+
+ return Op::apply(x, y);
+ }
+
+ template <bool scale_left, bool scale_right>
+ static void NO_INLINE vectorVector(const ArrayA & a, const ArrayB & b, PaddedPODArray<UInt8> & c,
+ CompareInt scale)
+ {
+ size_t size = a.size();
+ const A * a_pos = a.data();
+ const B * b_pos = b.data();
+ UInt8 * c_pos = c.data();
+ const A * a_end = a_pos + size;
+
+ while (a_pos < a_end)
+ {
+ *c_pos = apply<scale_left, scale_right>(*a_pos, *b_pos, scale);
+ ++a_pos;
+ ++b_pos;
+ ++c_pos;
+ }
+ }
+
+ template <bool scale_left, bool scale_right>
+ static void NO_INLINE vectorConstant(const ArrayA & a, B b, PaddedPODArray<UInt8> & c, CompareInt scale)
+ {
+ size_t size = a.size();
+ const A * a_pos = a.data();
+ UInt8 * c_pos = c.data();
+ const A * a_end = a_pos + size;
+
+ while (a_pos < a_end)
+ {
+ *c_pos = apply<scale_left, scale_right>(*a_pos, b, scale);
+ ++a_pos;
+ ++c_pos;
+ }
+ }
+
+ template <bool scale_left, bool scale_right>
+ static void NO_INLINE constantVector(A a, const ArrayB & b, PaddedPODArray<UInt8> & c, CompareInt scale)
+ {
+ size_t size = b.size();
+ const B * b_pos = b.data();
+ UInt8 * c_pos = c.data();
+ const B * b_end = b_pos + size;
+
+ while (b_pos < b_end)
+ {
+ *c_pos = apply<scale_left, scale_right>(a, *b_pos, scale);
+ ++b_pos;
+ ++c_pos;
+ }
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h
index 92e780104b..955b292f3b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h
@@ -1,93 +1,93 @@
-#pragma once
-
+#pragma once
+
#include <common/types.h>
-
-
-namespace DB
-{
-
-
-/// Client-server protocol.
-///
-/// Client opens a connection and sends Hello packet.
-/// If client version is incompatible, the server can terminate the connection.
-/// Server responds with Hello packet.
-/// If server version is incompatible, the client can terminate the connection.
-///
-/// The main loop follows:
-///
-/// 1. The client sends Query packet.
-///
-/// Starting from version 50263 immediately after sending the Query packet the client starts
-/// transfer of external (temporary) table (external storages) - one or several Data packets.
-/// End of transmission is marked by an empty block.
-/// At present, non-empty tables can be sent only along with SELECT query.
-///
-/// If the query is an INSERT (and thus requires data transfer from client), then the server transmits
-/// Data packet containing empty block that describes the table structure.
-/// Then the client sends one or several Data packets - data for insertion.
-/// End of data is marked by the transmission of empty block.
-/// Then the server sends EndOfStream packet.
-///
-/// If the query is a SELECT or a query of other type, then the server transmits packets of
-/// one of the following types:
-/// - Data - data corresponding to one block of query results.
-/// - Progress - query execution progress.
-/// - Exception - error description.
-/// - EndOfStream - the end of data transmission.
-///
-/// The client should read packets until EndOfStream or Exception.
-///
-/// The client can also send Cancel packet - a request to cancel the query.
-/// In this case the server can stop executing the query and return incomplete data,
-/// but the client must still read until EndOfStream packet.
-///
-/// Also if there is profiling info and the client revision is recent enough, the server can
-/// send one of the following packets before EndOfStream:
-/// - Totals - a block with total values
-/// - ProfileInfo - serialized BlockStreamProfileInfo structure.
-///
-/// If a query returns data, the server sends an empty header block containing
-/// the description of resulting columns before executing the query.
-/// Using this block the client can initialize the output formatter and display the prefix of resulting table
-/// beforehand.
-
+
+
+namespace DB
+{
+
+
+/// Client-server protocol.
+///
+/// Client opens a connection and sends Hello packet.
+/// If client version is incompatible, the server can terminate the connection.
+/// Server responds with Hello packet.
+/// If server version is incompatible, the client can terminate the connection.
+///
+/// The main loop follows:
+///
+/// 1. The client sends Query packet.
+///
+/// Starting from version 50263 immediately after sending the Query packet the client starts
+/// transfer of external (temporary) table (external storages) - one or several Data packets.
+/// End of transmission is marked by an empty block.
+/// At present, non-empty tables can be sent only along with SELECT query.
+///
+/// If the query is an INSERT (and thus requires data transfer from client), then the server transmits
+/// Data packet containing empty block that describes the table structure.
+/// Then the client sends one or several Data packets - data for insertion.
+/// End of data is marked by the transmission of empty block.
+/// Then the server sends EndOfStream packet.
+///
+/// If the query is a SELECT or a query of other type, then the server transmits packets of
+/// one of the following types:
+/// - Data - data corresponding to one block of query results.
+/// - Progress - query execution progress.
+/// - Exception - error description.
+/// - EndOfStream - the end of data transmission.
+///
+/// The client should read packets until EndOfStream or Exception.
+///
+/// The client can also send Cancel packet - a request to cancel the query.
+/// In this case the server can stop executing the query and return incomplete data,
+/// but the client must still read until EndOfStream packet.
+///
+/// Also if there is profiling info and the client revision is recent enough, the server can
+/// send one of the following packets before EndOfStream:
+/// - Totals - a block with total values
+/// - ProfileInfo - serialized BlockStreamProfileInfo structure.
+///
+/// If a query returns data, the server sends an empty header block containing
+/// the description of resulting columns before executing the query.
+/// Using this block the client can initialize the output formatter and display the prefix of resulting table
+/// beforehand.
+
/// Marker of the inter-server secret (passed in the user name)
/// (anyway user cannot be started with a whitespace)
const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET ";
-namespace Protocol
-{
- /// Packet types that server transmits.
- namespace Server
- {
- enum Enum
- {
- Hello = 0, /// Name, version, revision.
- Data = 1, /// A block of data (compressed or not).
- Exception = 2, /// The exception during query execution.
- Progress = 3, /// Query execution progress: rows read, bytes read.
- Pong = 4, /// Ping response
- EndOfStream = 5, /// All packets were transmitted
- ProfileInfo = 6, /// Packet with profiling info.
- Totals = 7, /// A block with totals (compressed or not).
- Extremes = 8, /// A block with minimums and maximums (compressed or not).
- TablesStatusResponse = 9, /// A response to TablesStatus request.
- Log = 10, /// System logs of the query execution
- TableColumns = 11, /// Columns' description for default values calculation
+namespace Protocol
+{
+ /// Packet types that server transmits.
+ namespace Server
+ {
+ enum Enum
+ {
+ Hello = 0, /// Name, version, revision.
+ Data = 1, /// A block of data (compressed or not).
+ Exception = 2, /// The exception during query execution.
+ Progress = 3, /// Query execution progress: rows read, bytes read.
+ Pong = 4, /// Ping response
+ EndOfStream = 5, /// All packets were transmitted
+ ProfileInfo = 6, /// Packet with profiling info.
+ Totals = 7, /// A block with totals (compressed or not).
+ Extremes = 8, /// A block with minimums and maximums (compressed or not).
+ TablesStatusResponse = 9, /// A response to TablesStatus request.
+ Log = 10, /// System logs of the query execution
+ TableColumns = 11, /// Columns' description for default values calculation
PartUUIDs = 12, /// List of unique parts ids.
ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed
/// This is such an inverted logic, where server sends requests
/// And client returns back response
MAX = ReadTaskRequest,
- };
-
- /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
- /// would always be true because of compiler optimisation. That would lead to out-of-bounds error
- /// if the packet is invalid.
- /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values
- inline const char * toString(UInt64 packet)
- {
+ };
+
+ /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
+ /// would always be true because of compiler optimisation. That would lead to out-of-bounds error
+ /// if the packet is invalid.
+ /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values
+ inline const char * toString(UInt64 packet)
+ {
static const char * data[] = {
"Hello",
"Data",
@@ -105,46 +105,46 @@ namespace Protocol
"ReadTaskRequest"
};
return packet <= MAX
- ? data[packet]
- : "Unknown packet";
- }
-
- inline size_t stringsInMessage(UInt64 msg_type)
- {
- switch (msg_type)
- {
- case TableColumns:
- return 2;
- default:
- break;
- }
- return 0;
- }
- }
-
- /// Packet types that client transmits.
- namespace Client
- {
- enum Enum
- {
- Hello = 0, /// Name, version, revision, default DB
- Query = 1, /// Query id, query settings, stage up to which the query must be executed,
- /// whether the compression must be used,
- /// query text (without data for INSERTs).
- Data = 2, /// A block of data (compressed or not).
- Cancel = 3, /// Cancel the query execution.
- Ping = 4, /// Check that connection to the server is alive.
- TablesStatusRequest = 5, /// Check status of tables on the server.
- KeepAlive = 6, /// Keep the connection alive
+ ? data[packet]
+ : "Unknown packet";
+ }
+
+ inline size_t stringsInMessage(UInt64 msg_type)
+ {
+ switch (msg_type)
+ {
+ case TableColumns:
+ return 2;
+ default:
+ break;
+ }
+ return 0;
+ }
+ }
+
+ /// Packet types that client transmits.
+ namespace Client
+ {
+ enum Enum
+ {
+ Hello = 0, /// Name, version, revision, default DB
+ Query = 1, /// Query id, query settings, stage up to which the query must be executed,
+ /// whether the compression must be used,
+ /// query text (without data for INSERTs).
+ Data = 2, /// A block of data (compressed or not).
+ Cancel = 3, /// Cancel the query execution.
+ Ping = 4, /// Check that connection to the server is alive.
+ TablesStatusRequest = 5, /// Check status of tables on the server.
+ KeepAlive = 6, /// Keep the connection alive
Scalar = 7, /// A block of data (compressed or not).
IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing
ReadTaskResponse = 9, /// TODO:
MAX = ReadTaskResponse,
- };
-
- inline const char * toString(UInt64 packet)
- {
+ };
+
+ inline const char * toString(UInt64 packet)
+ {
static const char * data[] = {
"Hello",
"Query",
@@ -158,25 +158,25 @@ namespace Protocol
"ReadTaskResponse",
};
return packet <= MAX
- ? data[packet]
- : "Unknown packet";
- }
- }
-
- /// Whether the compression must be used.
- enum class Compression
- {
- Disable = 0,
- Enable = 1,
- };
-
- /// Whether the ssl must be used.
- enum class Secure
- {
- Disable = 0,
- Enable = 1,
- };
-
-}
-
-}
+ ? data[packet]
+ : "Unknown packet";
+ }
+ }
+
+ /// Whether the compression must be used.
+ enum class Compression
+ {
+ Disable = 0,
+ Enable = 1,
+ };
+
+ /// Whether the ssl must be used.
+ enum class Secure
+ {
+ Disable = 0,
+ Enable = 1,
+ };
+
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h
index ba125e2882..931c9818fe 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h
@@ -1,21 +1,21 @@
-#pragma once
-
-#include <DataStreams/IBlockOutputStream.h>
+#pragma once
+
+#include <DataStreams/IBlockOutputStream.h>
#include <Interpreters/QueryViewsLog.h>
#include <Parsers/IAST_fwd.h>
#include <Storages/IStorage.h>
#include <Common/Stopwatch.h>
-
+
namespace Poco
{
class Logger;
}
-namespace DB
-{
-
+namespace DB
+{
+
class ReplicatedMergeTreeSink;
-
+
struct ViewRuntimeData
{
const ASTPtr query;
@@ -31,44 +31,44 @@ struct ViewRuntimeData
}
};
-/** Writes data to the specified table and to all dependent materialized views.
- */
+/** Writes data to the specified table and to all dependent materialized views.
+ */
class PushingToViewsBlockOutputStream : public IBlockOutputStream, WithContext
-{
-public:
+{
+public:
PushingToViewsBlockOutputStream(
const StoragePtr & storage_,
const StorageMetadataPtr & metadata_snapshot_,
ContextPtr context_,
const ASTPtr & query_ptr_,
bool no_destination = false);
-
- Block getHeader() const override;
- void write(const Block & block) override;
-
- void flush() override;
- void writePrefix() override;
- void writeSuffix() override;
+
+ Block getHeader() const override;
+ void write(const Block & block) override;
+
+ void flush() override;
+ void writePrefix() override;
+ void writeSuffix() override;
void onProgress(const Progress & progress) override;
-
-private:
- StoragePtr storage;
+
+private:
+ StoragePtr storage;
StorageMetadataPtr metadata_snapshot;
- BlockOutputStreamPtr output;
+ BlockOutputStreamPtr output;
ReplicatedMergeTreeSink * replicated_output = nullptr;
Poco::Logger * log;
-
- ASTPtr query_ptr;
+
+ ASTPtr query_ptr;
Stopwatch main_watch;
-
+
std::vector<ViewRuntimeData> views;
ContextMutablePtr select_context;
ContextMutablePtr insert_context;
-
+
void process(const Block & block, ViewRuntimeData & view);
void checkExceptionsInViews();
void logQueryViews();
-};
-
-
-}
+};
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp
index 7caa54cff2..ccfcc1799b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp
@@ -1,69 +1,69 @@
-#include <DataStreams/RemoteBlockInputStream.h>
-#include <Interpreters/Context.h>
-
-namespace DB
-{
-
-RemoteBlockInputStream::RemoteBlockInputStream(
+#include <DataStreams/RemoteBlockInputStream.h>
+#include <Interpreters/Context.h>
+
+namespace DB
+{
+
+RemoteBlockInputStream::RemoteBlockInputStream(
Connection & connection,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
: query_executor(connection, query_, header_, context_, throttler, scalars_, external_tables_, stage_)
-{
- init();
-}
-
-RemoteBlockInputStream::RemoteBlockInputStream(
+{
+ init();
+}
+
+RemoteBlockInputStream::RemoteBlockInputStream(
const ConnectionPoolWithFailoverPtr & pool,
std::vector<IConnectionPool::Entry> && connections,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
: query_executor(pool, std::move(connections), query_, header_, context_, throttler, scalars_, external_tables_, stage_)
-{
- init();
-}
-
-RemoteBlockInputStream::RemoteBlockInputStream(
+{
+ init();
+}
+
+RemoteBlockInputStream::RemoteBlockInputStream(
const ConnectionPoolWithFailoverPtr & pool,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
: query_executor(pool, query_, header_, context_, throttler, scalars_, external_tables_, stage_)
-{
- init();
-}
-
-void RemoteBlockInputStream::init()
-{
- query_executor.setProgressCallback([this](const Progress & progress) { progressImpl(progress); });
- query_executor.setProfileInfoCallback([this](const BlockStreamProfileInfo & info_) { info.setFrom(info_, true); });
- query_executor.setLogger(log);
-}
-
-void RemoteBlockInputStream::cancel(bool kill)
-{
- if (kill)
- is_killed = true;
-
- bool old_val = false;
- if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
- return;
-
- query_executor.cancel();
-}
-
-Block RemoteBlockInputStream::readImpl()
-{
- auto block = query_executor.read();
-
- if (isCancelledOrThrowIfKilled())
- return Block();
-
- return block;
-}
-
-void RemoteBlockInputStream::readSuffixImpl()
-{
- query_executor.finish();
-}
-
-}
+{
+ init();
+}
+
+void RemoteBlockInputStream::init()
+{
+ query_executor.setProgressCallback([this](const Progress & progress) { progressImpl(progress); });
+ query_executor.setProfileInfoCallback([this](const BlockStreamProfileInfo & info_) { info.setFrom(info_, true); });
+ query_executor.setLogger(log);
+}
+
+void RemoteBlockInputStream::cancel(bool kill)
+{
+ if (kill)
+ is_killed = true;
+
+ bool old_val = false;
+ if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
+ return;
+
+ query_executor.cancel();
+}
+
+Block RemoteBlockInputStream::readImpl()
+{
+ auto block = query_executor.read();
+
+ if (isCancelledOrThrowIfKilled())
+ return Block();
+
+ return block;
+}
+
+void RemoteBlockInputStream::readSuffixImpl()
+{
+ query_executor.finish();
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h
index 1be6b03152..2800b38f37 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h
@@ -1,78 +1,78 @@
-#pragma once
-
-#include <optional>
-
-#include <common/logger_useful.h>
-
-#include <DataStreams/IBlockInputStream.h>
-#include <Common/Throttler.h>
-#include <Client/ConnectionPool.h>
-#include <Client/MultiplexedConnections.h>
-#include <Interpreters/Cluster.h>
-
-#include <DataStreams/RemoteQueryExecutor.h>
-
-namespace DB
-{
-
+#pragma once
+
+#include <optional>
+
+#include <common/logger_useful.h>
+
+#include <DataStreams/IBlockInputStream.h>
+#include <Common/Throttler.h>
+#include <Client/ConnectionPool.h>
+#include <Client/MultiplexedConnections.h>
+#include <Interpreters/Cluster.h>
+
+#include <DataStreams/RemoteQueryExecutor.h>
+
+namespace DB
+{
+
class Context;
-/** This class allows one to launch queries on remote replicas of one shard and get results
- */
-class RemoteBlockInputStream : public IBlockInputStream
-{
-public:
- /// Takes already set connection.
- RemoteBlockInputStream(
+/** This class allows one to launch queries on remote replicas of one shard and get results
+ */
+class RemoteBlockInputStream : public IBlockInputStream
+{
+public:
+ /// Takes already set connection.
+ RemoteBlockInputStream(
Connection & connection,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
-
- /// Accepts several connections already taken from pool.
- RemoteBlockInputStream(
+
+ /// Accepts several connections already taken from pool.
+ RemoteBlockInputStream(
const ConnectionPoolWithFailoverPtr & pool,
std::vector<IConnectionPool::Entry> && connections,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
-
- /// Takes a pool and gets one or several connections from it.
- RemoteBlockInputStream(
+
+ /// Takes a pool and gets one or several connections from it.
+ RemoteBlockInputStream(
const ConnectionPoolWithFailoverPtr & pool,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
-
- /// Set the query_id. For now, used by performance test to later find the query
- /// in the server query_log. Must be called before sending the query to the server.
- void setQueryId(const std::string & query_id) { query_executor.setQueryId(query_id); }
-
- /// Specify how we allocate connections on a shard.
- void setPoolMode(PoolMode pool_mode) { query_executor.setPoolMode(pool_mode); }
-
- void setMainTable(StorageID main_table_) { query_executor.setMainTable(std::move(main_table_)); }
-
- /// Prevent default progress notification because progress' callback is called by its own.
- void progress(const Progress & /*value*/) override {}
-
- void cancel(bool kill) override;
-
- String getName() const override { return "Remote"; }
-
- Block getHeader() const override { return query_executor.getHeader(); }
- Block getTotals() override { return query_executor.getTotals(); }
- Block getExtremes() override { return query_executor.getExtremes(); }
-
-protected:
- Block readImpl() override;
- void readSuffixImpl() override;
-
-private:
- RemoteQueryExecutor query_executor;
- Poco::Logger * log = &Poco::Logger::get("RemoteBlockInputStream");
-
- void init();
-};
-
-}
+
+ /// Set the query_id. For now, used by performance test to later find the query
+ /// in the server query_log. Must be called before sending the query to the server.
+ void setQueryId(const std::string & query_id) { query_executor.setQueryId(query_id); }
+
+ /// Specify how we allocate connections on a shard.
+ void setPoolMode(PoolMode pool_mode) { query_executor.setPoolMode(pool_mode); }
+
+ void setMainTable(StorageID main_table_) { query_executor.setMainTable(std::move(main_table_)); }
+
+ /// Prevent default progress notification because progress' callback is called by its own.
+ void progress(const Progress & /*value*/) override {}
+
+ void cancel(bool kill) override;
+
+ String getName() const override { return "Remote"; }
+
+ Block getHeader() const override { return query_executor.getHeader(); }
+ Block getTotals() override { return query_executor.getTotals(); }
+ Block getExtremes() override { return query_executor.getExtremes(); }
+
+protected:
+ Block readImpl() override;
+ void readSuffixImpl() override;
+
+private:
+ RemoteQueryExecutor query_executor;
+ Poco::Logger * log = &Poco::Logger::get("RemoteBlockInputStream");
+
+ void init();
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp
index a64c4409cc..f5ed1de947 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp
@@ -1,22 +1,22 @@
#include <DataStreams/ConnectionCollector.h>
-#include <DataStreams/RemoteQueryExecutor.h>
+#include <DataStreams/RemoteQueryExecutor.h>
#include <DataStreams/RemoteQueryExecutorReadContext.h>
-
-#include <Columns/ColumnConst.h>
-#include <Common/CurrentThread.h>
-#include <Processors/Pipe.h>
-#include <Processors/Sources/SourceFromSingleChunk.h>
-#include <Storages/IStorage.h>
+
+#include <Columns/ColumnConst.h>
+#include <Common/CurrentThread.h>
+#include <Processors/Pipe.h>
+#include <Processors/Sources/SourceFromSingleChunk.h>
+#include <Storages/IStorage.h>
#include <Storages/SelectQueryInfo.h>
-#include <Interpreters/castColumn.h>
-#include <Interpreters/Cluster.h>
+#include <Interpreters/castColumn.h>
+#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
-#include <Interpreters/InternalTextLogsQueue.h>
+#include <Interpreters/InternalTextLogsQueue.h>
#include <IO/ConnectionTimeoutsContext.h>
#include <Client/MultiplexedConnections.h>
#include <Client/HedgedConnections.h>
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
-
+
namespace CurrentMetrics
{
@@ -24,17 +24,17 @@ namespace CurrentMetrics
extern const Metric ActiveSyncDrainedConnections;
}
-namespace DB
-{
-
-namespace ErrorCodes
-{
+namespace DB
+{
+
+namespace ErrorCodes
+{
extern const int LOGICAL_ERROR;
- extern const int UNKNOWN_PACKET_FROM_SERVER;
+ extern const int UNKNOWN_PACKET_FROM_SERVER;
extern const int DUPLICATED_PART_UUIDS;
-}
-
-RemoteQueryExecutor::RemoteQueryExecutor(
+}
+
+RemoteQueryExecutor::RemoteQueryExecutor(
const String & query_, const Block & header_, ContextPtr context_,
const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
@@ -43,19 +43,19 @@ RemoteQueryExecutor::RemoteQueryExecutor(
{}
RemoteQueryExecutor::RemoteQueryExecutor(
- Connection & connection,
+ Connection & connection,
const String & query_, const Block & header_, ContextPtr context_,
ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_)
-{
+{
create_connections = [this, &connection, throttler]()
- {
+ {
return std::make_shared<MultiplexedConnections>(connection, context->getSettingsRef(), throttler);
- };
-}
-
-RemoteQueryExecutor::RemoteQueryExecutor(
+ };
+}
+
+RemoteQueryExecutor::RemoteQueryExecutor(
std::shared_ptr<Connection> connection_ptr,
const String & query_, const Block & header_, ContextPtr context_,
ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
@@ -76,24 +76,24 @@ RemoteQueryExecutor::RemoteQueryExecutor(
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: header(header_), query(query_), context(context_)
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_)
-{
+{
create_connections = [this, connections_, throttler]() mutable {
return std::make_shared<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
- };
-}
-
-RemoteQueryExecutor::RemoteQueryExecutor(
+ };
+}
+
+RemoteQueryExecutor::RemoteQueryExecutor(
const ConnectionPoolWithFailoverPtr & pool_,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: header(header_), query(query_), context(context_)
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_)
-{
+{
create_connections = [this, throttler]()->std::shared_ptr<IConnections>
- {
+ {
const Settings & current_settings = context->getSettingsRef();
- auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
+ auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
#if defined(OS_LINUX)
if (current_settings.use_hedged_requests)
@@ -107,90 +107,90 @@ RemoteQueryExecutor::RemoteQueryExecutor(
#endif
std::vector<IConnectionPool::Entry> connection_entries;
- if (main_table)
- {
- auto try_results = pool->getManyChecked(timeouts, &current_settings, pool_mode, main_table.getQualifiedName());
+ if (main_table)
+ {
+ auto try_results = pool->getManyChecked(timeouts, &current_settings, pool_mode, main_table.getQualifiedName());
connection_entries.reserve(try_results.size());
- for (auto & try_result : try_results)
+ for (auto & try_result : try_results)
connection_entries.emplace_back(std::move(try_result.entry));
- }
- else
+ }
+ else
connection_entries = pool->getMany(timeouts, &current_settings, pool_mode);
-
+
return std::make_shared<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
- };
-}
-
-RemoteQueryExecutor::~RemoteQueryExecutor()
-{
- /** If interrupted in the middle of the loop of communication with replicas, then interrupt
- * all connections, then read and skip the remaining packets to make sure
- * these connections did not remain hanging in the out-of-sync state.
- */
- if (established || isQueryPending())
+ };
+}
+
+RemoteQueryExecutor::~RemoteQueryExecutor()
+{
+ /** If interrupted in the middle of the loop of communication with replicas, then interrupt
+ * all connections, then read and skip the remaining packets to make sure
+ * these connections did not remain hanging in the out-of-sync state.
+ */
+ if (established || isQueryPending())
connections->disconnect();
-}
-
-/** If we receive a block with slightly different column types, or with excessive columns,
- * we will adapt it to expected structure.
- */
-static Block adaptBlockStructure(const Block & block, const Block & header)
-{
- /// Special case when reader doesn't care about result structure. Deprecated and used only in Benchmark, PerformanceTest.
- if (!header)
- return block;
-
- Block res;
- res.info = block.info;
-
- for (const auto & elem : header)
- {
- ColumnPtr column;
-
- if (elem.column && isColumnConst(*elem.column))
- {
- /// We expect constant column in block.
- /// If block is not empty, then get value for constant from it,
- /// because it may be different for remote server for functions like version(), uptime(), ...
- if (block.rows() > 0 && block.has(elem.name))
- {
- /// Const column is passed as materialized. Get first value from it.
- ///
- /// TODO: check that column contains the same value.
- /// TODO: serialize const columns.
- auto col = block.getByName(elem.name);
- col.column = block.getByName(elem.name).column->cut(0, 1);
-
- column = castColumn(col, elem.type);
-
- if (!isColumnConst(*column))
- column = ColumnConst::create(column, block.rows());
- else
- /// It is not possible now. Just in case we support const columns serialization.
- column = column->cloneResized(block.rows());
- }
- else
- column = elem.column->cloneResized(block.rows());
- }
- else
- column = castColumn(block.getByName(elem.name), elem.type);
-
- res.insert({column, elem.type, elem.name});
- }
- return res;
-}
-
-void RemoteQueryExecutor::sendQuery()
-{
- if (sent_query)
- return;
-
+}
+
+/** If we receive a block with slightly different column types, or with excessive columns,
+ * we will adapt it to expected structure.
+ */
+static Block adaptBlockStructure(const Block & block, const Block & header)
+{
+ /// Special case when reader doesn't care about result structure. Deprecated and used only in Benchmark, PerformanceTest.
+ if (!header)
+ return block;
+
+ Block res;
+ res.info = block.info;
+
+ for (const auto & elem : header)
+ {
+ ColumnPtr column;
+
+ if (elem.column && isColumnConst(*elem.column))
+ {
+ /// We expect constant column in block.
+ /// If block is not empty, then get value for constant from it,
+ /// because it may be different for remote server for functions like version(), uptime(), ...
+ if (block.rows() > 0 && block.has(elem.name))
+ {
+ /// Const column is passed as materialized. Get first value from it.
+ ///
+ /// TODO: check that column contains the same value.
+ /// TODO: serialize const columns.
+ auto col = block.getByName(elem.name);
+ col.column = block.getByName(elem.name).column->cut(0, 1);
+
+ column = castColumn(col, elem.type);
+
+ if (!isColumnConst(*column))
+ column = ColumnConst::create(column, block.rows());
+ else
+ /// It is not possible now. Just in case we support const columns serialization.
+ column = column->cloneResized(block.rows());
+ }
+ else
+ column = elem.column->cloneResized(block.rows());
+ }
+ else
+ column = castColumn(block.getByName(elem.name), elem.type);
+
+ res.insert({column, elem.type, elem.name});
+ }
+ return res;
+}
+
+void RemoteQueryExecutor::sendQuery()
+{
+ if (sent_query)
+ return;
+
connections = create_connections();
-
+
const auto & settings = context->getSettingsRef();
if (settings.skip_unavailable_shards && 0 == connections->size())
- return;
-
+ return;
+
/// Query cannot be canceled in the middle of the send query,
/// since there are multiple packets:
/// - Query
@@ -202,17 +202,17 @@ void RemoteQueryExecutor::sendQuery()
///
std::lock_guard guard(was_cancelled_mutex);
- established = true;
+ established = true;
was_cancelled = false;
-
- auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
+
+ auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
ClientInfo modified_client_info = context->getClientInfo();
- modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+ modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
if (CurrentThread::isInitialized())
{
modified_client_info.client_trace_context = CurrentThread::get().thread_trace_context;
}
-
+
{
std::lock_guard lock(duplicated_part_uuids_mutex);
if (!duplicated_part_uuids.empty())
@@ -220,32 +220,32 @@ void RemoteQueryExecutor::sendQuery()
}
connections->sendQuery(timeouts, query, query_id, stage, modified_client_info, true);
-
- established = false;
- sent_query = true;
-
- if (settings.enable_scalar_subquery_optimization)
- sendScalars();
- sendExternalTables();
-}
-
-Block RemoteQueryExecutor::read()
-{
- if (!sent_query)
- {
- sendQuery();
-
+
+ established = false;
+ sent_query = true;
+
+ if (settings.enable_scalar_subquery_optimization)
+ sendScalars();
+ sendExternalTables();
+}
+
+Block RemoteQueryExecutor::read()
+{
+ if (!sent_query)
+ {
+ sendQuery();
+
if (context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()))
- return {};
- }
-
- while (true)
- {
- if (was_cancelled)
- return Block();
-
+ return {};
+ }
+
+ while (true)
+ {
+ if (was_cancelled)
+ return Block();
+
Packet packet = connections->receivePacket();
-
+
if (auto block = processPacket(std::move(packet)))
return *block;
else if (got_duplicated_part_uuids)
@@ -280,7 +280,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
return Block();
if (read_context->is_read_in_progress.load(std::memory_order_relaxed))
- {
+ {
read_context->setTimer();
return read_context->epoll.getFileDescriptor();
}
@@ -297,7 +297,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
return read();
#endif
}
-
+
std::variant<Block, int> RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context)
{
@@ -339,12 +339,12 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
if (packet.block && (packet.block.rows() > 0))
return adaptBlockStructure(packet.block, header);
break; /// If the block is empty - we will receive other packets before EndOfStream.
-
+
case Protocol::Server::Exception:
got_exception_from_replica = true;
packet.exception->rethrow();
break;
-
+
case Protocol::Server::EndOfStream:
if (!connections->hasActiveConnections())
{
@@ -352,7 +352,7 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
return Block();
}
break;
-
+
case Protocol::Server::Progress:
/** We use the progress from a remote server.
* We also include in ProcessList,
@@ -363,21 +363,21 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
if (progress_callback)
progress_callback(packet.progress);
break;
-
+
case Protocol::Server::ProfileInfo:
/// Use own (client-side) info about read bytes, it is more correct info than server-side one.
if (profile_info_callback)
profile_info_callback(packet.profile_info);
break;
-
+
case Protocol::Server::Totals:
totals = packet.block;
break;
-
+
case Protocol::Server::Extremes:
extremes = packet.block;
break;
-
+
case Protocol::Server::Log:
/// Pass logs from remote server to client
if (auto log_queue = CurrentThread::getInternalTextLogsQueue())
@@ -389,11 +389,11 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}",
toString(packet.type),
connections->dumpAddresses());
- }
+ }
return {};
-}
-
+}
+
bool RemoteQueryExecutor::setPartUUIDs(const std::vector<UUID> & uuids)
{
auto query_context = context->getQueryContext();
@@ -417,82 +417,82 @@ void RemoteQueryExecutor::processReadTaskRequest()
}
void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
-{
- /** If one of:
- * - nothing started to do;
- * - received all packets before EndOfStream;
- * - received exception from one replica;
- * - received an unknown packet from one replica;
- * then you do not need to read anything.
- */
- if (!isQueryPending() || hasThrownException())
- return;
-
- /** If you have not read all the data yet, but they are no longer needed.
- * This may be due to the fact that the data is sufficient (for example, when using LIMIT).
- */
-
- /// Send the request to abort the execution of the request, if not already sent.
+{
+ /** If one of:
+ * - nothing started to do;
+ * - received all packets before EndOfStream;
+ * - received exception from one replica;
+ * - received an unknown packet from one replica;
+ * then you do not need to read anything.
+ */
+ if (!isQueryPending() || hasThrownException())
+ return;
+
+ /** If you have not read all the data yet, but they are no longer needed.
+ * This may be due to the fact that the data is sufficient (for example, when using LIMIT).
+ */
+
+ /// Send the request to abort the execution of the request, if not already sent.
tryCancel("Cancelling query because enough data has been read", read_context);
/// Try to drain connections asynchronously.
if (auto conn = ConnectionCollector::enqueueConnectionCleanup(pool, connections))
- {
+ {
/// Drain connections synchronously.
CurrentMetrics::Increment metric_increment(CurrentMetrics::ActiveSyncDrainedConnections);
ConnectionCollector::drainConnections(*conn);
CurrentMetrics::add(CurrentMetrics::SyncDrainedConnections, 1);
- }
+ }
finished = true;
-}
-
+}
+
void RemoteQueryExecutor::cancel(std::unique_ptr<ReadContext> * read_context)
-{
- {
- std::lock_guard lock(external_tables_mutex);
-
- /// Stop sending external data.
- for (auto & vec : external_tables_data)
- for (auto & elem : vec)
- elem->is_cancelled = true;
- }
-
- if (!isQueryPending() || hasThrownException())
- return;
-
+{
+ {
+ std::lock_guard lock(external_tables_mutex);
+
+ /// Stop sending external data.
+ for (auto & vec : external_tables_data)
+ for (auto & elem : vec)
+ elem->is_cancelled = true;
+ }
+
+ if (!isQueryPending() || hasThrownException())
+ return;
+
tryCancel("Cancelling query", read_context);
-}
-
-void RemoteQueryExecutor::sendScalars()
-{
+}
+
+void RemoteQueryExecutor::sendScalars()
+{
connections->sendScalarsData(scalars);
-}
-
-void RemoteQueryExecutor::sendExternalTables()
-{
+}
+
+void RemoteQueryExecutor::sendExternalTables()
+{
size_t count = connections->size();
-
- {
- std::lock_guard lock(external_tables_mutex);
-
+
+ {
+ std::lock_guard lock(external_tables_mutex);
+
external_tables_data.clear();
- external_tables_data.reserve(count);
-
- for (size_t i = 0; i < count; ++i)
- {
- ExternalTablesData res;
- for (const auto & table : external_tables)
- {
- StoragePtr cur = table.second;
-
- auto data = std::make_unique<ExternalTableData>();
- data->table_name = table.first;
+ external_tables_data.reserve(count);
+
+ for (size_t i = 0; i < count; ++i)
+ {
+ ExternalTablesData res;
+ for (const auto & table : external_tables)
+ {
+ StoragePtr cur = table.second;
+
+ auto data = std::make_unique<ExternalTableData>();
+ data->table_name = table.first;
data->creating_pipe_callback = [cur, context = this->context]()
{
SelectQueryInfo query_info;
auto metadata_snapshot = cur->getInMemoryMetadataPtr();
QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(
context, QueryProcessingStage::Complete, metadata_snapshot, query_info);
-
+
Pipe pipe = cur->read(
metadata_snapshot->getColumns().getNamesOfPhysical(),
metadata_snapshot, query_info, context,
@@ -501,28 +501,28 @@ void RemoteQueryExecutor::sendExternalTables()
if (pipe.empty())
return std::make_unique<Pipe>(
std::make_shared<SourceFromSingleChunk>(metadata_snapshot->getSampleBlock(), Chunk()));
-
+
return std::make_unique<Pipe>(std::move(pipe));
};
data->pipe = data->creating_pipe_callback();
- res.emplace_back(std::move(data));
- }
- external_tables_data.push_back(std::move(res));
- }
- }
-
+ res.emplace_back(std::move(data));
+ }
+ external_tables_data.push_back(std::move(res));
+ }
+ }
+
connections->sendExternalTablesData(external_tables_data);
-}
-
+}
+
void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context)
-{
+{
/// Flag was_cancelled is atomic because it is checked in read().
std::lock_guard guard(was_cancelled_mutex);
-
+
if (was_cancelled)
return;
-
+
was_cancelled = true;
if (read_context && *read_context)
@@ -538,21 +538,21 @@ void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadCon
(*read_context)->setTimer();
(*read_context)->cancel();
}
-
+
connections->sendCancel();
- if (log)
+ if (log)
LOG_TRACE(log, "({}) {}", connections->dumpAddresses(), reason);
-}
-
-bool RemoteQueryExecutor::isQueryPending() const
-{
- return sent_query && !finished;
-}
-
-bool RemoteQueryExecutor::hasThrownException() const
-{
- return got_exception_from_replica || got_unknown_packet_from_replica;
-}
-
-}
+}
+
+bool RemoteQueryExecutor::isQueryPending() const
+{
+ return sent_query && !finished;
+}
+
+bool RemoteQueryExecutor::hasThrownException() const
+{
+ return got_exception_from_replica || got_unknown_packet_from_replica;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h
index d82f998389..56d99e230e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h
@@ -1,6 +1,6 @@
-#pragma once
-
-#include <Client/ConnectionPool.h>
+#pragma once
+
+#include <Client/ConnectionPool.h>
#include <Client/IConnections.h>
#include <Client/ConnectionPoolWithFailover.h>
#include <Storages/IStorage_fwd.h>
@@ -8,41 +8,41 @@
#include <Interpreters/StorageID.h>
#include <Common/TimerDescriptor.h>
#include <variant>
+
-
-namespace DB
-{
-
+namespace DB
+{
+
class Context;
-class Throttler;
-using ThrottlerPtr = std::shared_ptr<Throttler>;
-
-struct Progress;
-using ProgressCallback = std::function<void(const Progress & progress)>;
-
-struct BlockStreamProfileInfo;
-using ProfileInfoCallback = std::function<void(const BlockStreamProfileInfo & info)>;
-
+class Throttler;
+using ThrottlerPtr = std::shared_ptr<Throttler>;
+
+struct Progress;
+using ProgressCallback = std::function<void(const Progress & progress)>;
+
+struct BlockStreamProfileInfo;
+using ProfileInfoCallback = std::function<void(const BlockStreamProfileInfo & info)>;
+
class RemoteQueryExecutorReadContext;
/// This is the same type as StorageS3Source::IteratorWrapper
using TaskIterator = std::function<String()>;
-/// This class allows one to launch queries on remote replicas of one shard and get results
-class RemoteQueryExecutor
-{
-public:
+/// This class allows one to launch queries on remote replicas of one shard and get results
+class RemoteQueryExecutor
+{
+public:
using ReadContext = RemoteQueryExecutorReadContext;
- /// Takes already set connection.
+ /// Takes already set connection.
/// We don't own connection, thus we have to drain it synchronously.
- RemoteQueryExecutor(
- Connection & connection,
+ RemoteQueryExecutor(
+ Connection & connection,
const String & query_, const Block & header_, ContextPtr context_,
- ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
+ ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
-
+
/// Takes already set connection.
RemoteQueryExecutor(
std::shared_ptr<Connection> connection,
@@ -50,134 +50,134 @@ public:
ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
- /// Accepts several connections already taken from pool.
- RemoteQueryExecutor(
+ /// Accepts several connections already taken from pool.
+ RemoteQueryExecutor(
const ConnectionPoolWithFailoverPtr & pool,
std::vector<IConnectionPool::Entry> && connections_,
const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
+ const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
-
- /// Takes a pool and gets one or several connections from it.
- RemoteQueryExecutor(
- const ConnectionPoolWithFailoverPtr & pool,
+
+ /// Takes a pool and gets one or several connections from it.
+ RemoteQueryExecutor(
+ const ConnectionPoolWithFailoverPtr & pool,
const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
+ const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
-
- ~RemoteQueryExecutor();
-
- /// Create connection and send query, external tables and scalars.
- void sendQuery();
-
+
+ ~RemoteQueryExecutor();
+
+ /// Create connection and send query, external tables and scalars.
+ void sendQuery();
+
/// Query is resent to a replica, the query itself can be modified.
std::atomic<bool> resent_query { false };
- /// Read next block of data. Returns empty block if query is finished.
- Block read();
-
+ /// Read next block of data. Returns empty block if query is finished.
+ Block read();
+
/// Async variant of read. Returns ready block or file descriptor which may be used for polling.
/// ReadContext is an internal read state. Pass empty ptr first time, reuse created one for every call.
std::variant<Block, int> read(std::unique_ptr<ReadContext> & read_context);
- /// Receive all remain packets and finish query.
- /// It should be cancelled after read returned empty block.
+ /// Receive all remain packets and finish query.
+ /// It should be cancelled after read returned empty block.
void finish(std::unique_ptr<ReadContext> * read_context = nullptr);
-
- /// Cancel query execution. Sends Cancel packet and ignore others.
- /// This method may be called from separate thread.
+
+ /// Cancel query execution. Sends Cancel packet and ignore others.
+ /// This method may be called from separate thread.
void cancel(std::unique_ptr<ReadContext> * read_context = nullptr);
-
- /// Get totals and extremes if any.
- Block getTotals() { return std::move(totals); }
- Block getExtremes() { return std::move(extremes); }
-
- /// Set callback for progress. It will be called on Progress packet.
- void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); }
-
- /// Set callback for profile info. It will be called on ProfileInfo packet.
- void setProfileInfoCallback(ProfileInfoCallback callback) { profile_info_callback = std::move(callback); }
-
- /// Set the query_id. For now, used by performance test to later find the query
- /// in the server query_log. Must be called before sending the query to the server.
- void setQueryId(const std::string& query_id_) { assert(!sent_query); query_id = query_id_; }
-
- /// Specify how we allocate connections on a shard.
- void setPoolMode(PoolMode pool_mode_) { pool_mode = pool_mode_; }
-
- void setMainTable(StorageID main_table_) { main_table = std::move(main_table_); }
-
- void setLogger(Poco::Logger * logger) { log = logger; }
-
- const Block & getHeader() const { return header; }
-
-private:
+
+ /// Get totals and extremes if any.
+ Block getTotals() { return std::move(totals); }
+ Block getExtremes() { return std::move(extremes); }
+
+ /// Set callback for progress. It will be called on Progress packet.
+ void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); }
+
+ /// Set callback for profile info. It will be called on ProfileInfo packet.
+ void setProfileInfoCallback(ProfileInfoCallback callback) { profile_info_callback = std::move(callback); }
+
+ /// Set the query_id. For now, used by performance test to later find the query
+ /// in the server query_log. Must be called before sending the query to the server.
+ void setQueryId(const std::string& query_id_) { assert(!sent_query); query_id = query_id_; }
+
+ /// Specify how we allocate connections on a shard.
+ void setPoolMode(PoolMode pool_mode_) { pool_mode = pool_mode_; }
+
+ void setMainTable(StorageID main_table_) { main_table = std::move(main_table_); }
+
+ void setLogger(Poco::Logger * logger) { log = logger; }
+
+ const Block & getHeader() const { return header; }
+
+private:
RemoteQueryExecutor(
const String & query_, const Block & header_, ContextPtr context_,
const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_);
- Block header;
- Block totals;
- Block extremes;
-
- const String query;
+ Block header;
+ Block totals;
+ Block extremes;
+
+ const String query;
String query_id;
ContextPtr context;
-
- ProgressCallback progress_callback;
- ProfileInfoCallback profile_info_callback;
-
- /// Scalars needed to be sent to remote servers
- Scalars scalars;
- /// Temporary tables needed to be sent to remote servers
- Tables external_tables;
- QueryProcessingStage::Enum stage;
+
+ ProgressCallback progress_callback;
+ ProfileInfoCallback profile_info_callback;
+
+ /// Scalars needed to be sent to remote servers
+ Scalars scalars;
+ /// Temporary tables needed to be sent to remote servers
+ Tables external_tables;
+ QueryProcessingStage::Enum stage;
/// Initiator identifier for distributed task processing
std::shared_ptr<TaskIterator> task_iterator;
-
+
std::function<std::shared_ptr<IConnections>()> create_connections;
/// Hold a shared reference to the connection pool so that asynchronous connection draining will
/// work safely. Make sure it's the first member so that we don't destruct it too early.
const ConnectionPoolWithFailoverPtr pool;
std::shared_ptr<IConnections> connections;
- /// Streams for reading from temporary tables and following sending of data
- /// to remote servers for GLOBAL-subqueries
- std::vector<ExternalTablesData> external_tables_data;
- std::mutex external_tables_mutex;
-
- /// Connections to replicas are established, but no queries are sent yet
- std::atomic<bool> established { false };
-
- /// Query is sent (used before getting first block)
- std::atomic<bool> sent_query { false };
-
- /** All data from all replicas are received, before EndOfStream packet.
- * To prevent desynchronization, if not all data is read before object
- * destruction, it's required to send cancel query request to replicas and
- * read all packets before EndOfStream
- */
- std::atomic<bool> finished { false };
-
- /** Cancel query request was sent to all replicas because data is not needed anymore
- * This behaviour may occur when:
- * - data size is already satisfactory (when using LIMIT, for example)
- * - an exception was thrown from client side
- */
- std::atomic<bool> was_cancelled { false };
- std::mutex was_cancelled_mutex;
-
- /** An exception from replica was received. No need in receiving more packets or
- * requesting to cancel query execution
- */
- std::atomic<bool> got_exception_from_replica { false };
-
- /** Unknown packet was received from replica. No need in receiving more packets or
- * requesting to cancel query execution
- */
- std::atomic<bool> got_unknown_packet_from_replica { false };
-
+ /// Streams for reading from temporary tables and following sending of data
+ /// to remote servers for GLOBAL-subqueries
+ std::vector<ExternalTablesData> external_tables_data;
+ std::mutex external_tables_mutex;
+
+ /// Connections to replicas are established, but no queries are sent yet
+ std::atomic<bool> established { false };
+
+ /// Query is sent (used before getting first block)
+ std::atomic<bool> sent_query { false };
+
+ /** All data from all replicas are received, before EndOfStream packet.
+ * To prevent desynchronization, if not all data is read before object
+ * destruction, it's required to send cancel query request to replicas and
+ * read all packets before EndOfStream
+ */
+ std::atomic<bool> finished { false };
+
+ /** Cancel query request was sent to all replicas because data is not needed anymore
+ * This behaviour may occur when:
+ * - data size is already satisfactory (when using LIMIT, for example)
+ * - an exception was thrown from client side
+ */
+ std::atomic<bool> was_cancelled { false };
+ std::mutex was_cancelled_mutex;
+
+ /** An exception from replica was received. No need in receiving more packets or
+ * requesting to cancel query execution
+ */
+ std::atomic<bool> got_exception_from_replica { false };
+
+ /** Unknown packet was received from replica. No need in receiving more packets or
+ * requesting to cancel query execution
+ */
+ std::atomic<bool> got_unknown_packet_from_replica { false };
+
/** Got duplicated uuids from replica
*/
std::atomic<bool> got_duplicated_part_uuids{ false };
@@ -186,17 +186,17 @@ private:
std::mutex duplicated_part_uuids_mutex;
std::vector<UUID> duplicated_part_uuids;
- PoolMode pool_mode = PoolMode::GET_MANY;
- StorageID main_table = StorageID::createEmpty();
-
- Poco::Logger * log = nullptr;
-
- /// Send all scalars to remote servers
- void sendScalars();
-
- /// Send all temporary tables to remote servers
- void sendExternalTables();
-
+ PoolMode pool_mode = PoolMode::GET_MANY;
+ StorageID main_table = StorageID::createEmpty();
+
+ Poco::Logger * log = nullptr;
+
+ /// Send all scalars to remote servers
+ void sendScalars();
+
+ /// Send all temporary tables to remote servers
+ void sendExternalTables();
+
/// Set part uuids to a query context, collected from remote replicas.
/// Return true if duplicates found.
bool setPartUUIDs(const std::vector<UUID> & uuids);
@@ -207,14 +207,14 @@ private:
/// only for `allow_experimental_query_deduplication`.
std::variant<Block, int> restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context = nullptr);
- /// If wasn't sent yet, send request to cancel all connections to replicas
+ /// If wasn't sent yet, send request to cancel all connections to replicas
void tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context);
-
- /// Returns true if query was sent
- bool isQueryPending() const;
-
- /// Returns true if exception was thrown
- bool hasThrownException() const;
+
+ /// Returns true if query was sent
+ bool isQueryPending() const;
+
+ /// Returns true if exception was thrown
+ bool hasThrownException() const;
/// Process packet for read and return data block if possible.
std::optional<Block> processPacket(Packet packet);
@@ -222,6 +222,6 @@ private:
/// Reads packet by packet
Block readPackets();
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp
index f7d05fa3be..fd8a6f54a3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomGeo.cpp
@@ -1,43 +1,43 @@
#include <DataTypes/DataTypeCustomGeo.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeCustom.h>
-#include <DataTypes/DataTypeFactory.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypesNumber.h>
-
-namespace DB
-{
-
-void registerDataTypeDomainGeo(DataTypeFactory & factory)
-{
- // Custom type for point represented as its coordinates stored as Tuple(Float64, Float64)
- factory.registerSimpleDataTypeCustom("Point", []
- {
- return std::make_pair(DataTypeFactory::instance().get("Tuple(Float64, Float64)"),
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeCustom.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesNumber.h>
+
+namespace DB
+{
+
+void registerDataTypeDomainGeo(DataTypeFactory & factory)
+{
+ // Custom type for point represented as its coordinates stored as Tuple(Float64, Float64)
+ factory.registerSimpleDataTypeCustom("Point", []
+ {
+ return std::make_pair(DataTypeFactory::instance().get("Tuple(Float64, Float64)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePointName>()));
- });
-
- // Custom type for simple polygon without holes stored as Array(Point)
- factory.registerSimpleDataTypeCustom("Ring", []
- {
- return std::make_pair(DataTypeFactory::instance().get("Array(Point)"),
+ });
+
+ // Custom type for simple polygon without holes stored as Array(Point)
+ factory.registerSimpleDataTypeCustom("Ring", []
+ {
+ return std::make_pair(DataTypeFactory::instance().get("Array(Point)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeRingName>()));
- });
-
- // Custom type for polygon with holes stored as Array(Ring)
- // First element of outer array is outer shape of polygon and all the following are holes
- factory.registerSimpleDataTypeCustom("Polygon", []
- {
- return std::make_pair(DataTypeFactory::instance().get("Array(Ring)"),
+ });
+
+ // Custom type for polygon with holes stored as Array(Ring)
+ // First element of outer array is outer shape of polygon and all the following are holes
+ factory.registerSimpleDataTypeCustom("Polygon", []
+ {
+ return std::make_pair(DataTypeFactory::instance().get("Array(Ring)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePolygonName>()));
- });
-
- // Custom type for multiple polygons with holes stored as Array(Polygon)
- factory.registerSimpleDataTypeCustom("MultiPolygon", []
- {
- return std::make_pair(DataTypeFactory::instance().get("Array(Polygon)"),
+ });
+
+ // Custom type for multiple polygons with holes stored as Array(Polygon)
+ factory.registerSimpleDataTypeCustom("MultiPolygon", []
+ {
+ return std::make_pair(DataTypeFactory::instance().get("Array(Polygon)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeMultiPolygonName>()));
- });
-}
-
-}
+ });
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp
index 808aa43528..85204eb05d 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp
@@ -1,29 +1,29 @@
#include <DataTypes/Serializations/SerializationIP.h>
-#include <DataTypes/DataTypeFactory.h>
-#include <DataTypes/DataTypeCustom.h>
-
-namespace DB
-{
-
-void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
-{
- factory.registerSimpleDataTypeCustom("IPv4", []
- {
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeCustom.h>
+
+namespace DB
+{
+
+void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
+{
+ factory.registerSimpleDataTypeCustom("IPv4", []
+ {
auto type = DataTypeFactory::instance().get("UInt32");
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<SerializationIPv4>(type->getDefaultSerialization())));
- });
-
- factory.registerSimpleDataTypeCustom("IPv6", []
- {
+ });
+
+ factory.registerSimpleDataTypeCustom("IPv6", []
+ {
auto type = DataTypeFactory::instance().get("FixedString(16)");
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<SerializationIPv6>(type->getDefaultSerialization())));
- });
-
- /// MySQL, MariaDB
- factory.registerAlias("INET4", "IPv4", DataTypeFactory::CaseInsensitive);
- factory.registerAlias("INET6", "IPv6", DataTypeFactory::CaseInsensitive);
-}
-
-}
+ });
+
+ /// MySQL, MariaDB
+ factory.registerAlias("INET4", "IPv4", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INET6", "IPv6", DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
index 023629fc69..f1ee5c670f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
@@ -1,38 +1,38 @@
#include <Common/FieldVisitorToString.h>
-#include <Common/typeid_cast.h>
-
-#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
-#include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeFactory.h>
-
-#include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTIdentifier.h>
-
-#include <boost/algorithm/string/join.hpp>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int SYNTAX_ERROR;
- extern const int BAD_ARGUMENTS;
- extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS;
- extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
- extern const int LOGICAL_ERROR;
-}
-
+#include <Common/typeid_cast.h>
+
+#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeFactory.h>
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTIdentifier.h>
+
+#include <boost/algorithm/string/join.hpp>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int SYNTAX_ERROR;
+ extern const int BAD_ARGUMENTS;
+ extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS;
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int LOGICAL_ERROR;
+}
+
void DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(const AggregateFunctionPtr & function)
{
/// TODO Make it sane.
static const std::vector<String> supported_functions{"any", "anyLast", "min",
"max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor",
"sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray"};
-
+
// check function
if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions))
{
@@ -40,54 +40,54 @@ void DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(const Aggreg
ErrorCodes::BAD_ARGUMENTS);
}
}
-
-String DataTypeCustomSimpleAggregateFunction::getName() const
-{
+
+String DataTypeCustomSimpleAggregateFunction::getName() const
+{
WriteBufferFromOwnString stream;
- stream << "SimpleAggregateFunction(" << function->getName();
-
- if (!parameters.empty())
- {
- stream << "(";
- for (size_t i = 0; i < parameters.size(); ++i)
- {
- if (i)
- stream << ", ";
+ stream << "SimpleAggregateFunction(" << function->getName();
+
+ if (!parameters.empty())
+ {
+ stream << "(";
+ for (size_t i = 0; i < parameters.size(); ++i)
+ {
+ if (i)
+ stream << ", ";
stream << applyVisitor(FieldVisitorToString(), parameters[i]);
- }
- stream << ")";
- }
-
- for (const auto & argument_type : argument_types)
- stream << ", " << argument_type->getName();
-
- stream << ")";
- return stream.str();
-}
-
-
-static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
-{
- String function_name;
- AggregateFunctionPtr function;
- DataTypes argument_types;
- Array params_row;
-
- if (!arguments || arguments->children.empty())
- throw Exception("Data type SimpleAggregateFunction requires parameters: "
- "name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-
- if (const ASTFunction * parametric = arguments->children[0]->as<ASTFunction>())
- {
- if (parametric->parameters)
- throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR);
- function_name = parametric->name;
-
+ }
+ stream << ")";
+ }
+
+ for (const auto & argument_type : argument_types)
+ stream << ", " << argument_type->getName();
+
+ stream << ")";
+ return stream.str();
+}
+
+
+static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
+{
+ String function_name;
+ AggregateFunctionPtr function;
+ DataTypes argument_types;
+ Array params_row;
+
+ if (!arguments || arguments->children.empty())
+ throw Exception("Data type SimpleAggregateFunction requires parameters: "
+ "name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+ if (const ASTFunction * parametric = arguments->children[0]->as<ASTFunction>())
+ {
+ if (parametric->parameters)
+ throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR);
+ function_name = parametric->name;
+
if (parametric->arguments)
- {
+ {
const ASTs & parameters = parametric->arguments->as<ASTExpressionList &>().children;
params_row.resize(parameters.size());
-
+
for (size_t i = 0; i < parameters.size(); ++i)
{
const ASTLiteral * lit = parameters[i]->as<ASTLiteral>();
@@ -101,48 +101,48 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum
params_row[i] = lit->value;
}
- }
- }
- else if (auto opt_name = tryGetIdentifierName(arguments->children[0]))
- {
- function_name = *opt_name;
- }
- else if (arguments->children[0]->as<ASTLiteral>())
- {
- throw Exception("Aggregate function name for data type SimpleAggregateFunction must be passed as identifier (without quotes) or function",
- ErrorCodes::BAD_ARGUMENTS);
- }
- else
- throw Exception("Unexpected AST element passed as aggregate function name for data type SimpleAggregateFunction. Must be identifier or function.",
- ErrorCodes::BAD_ARGUMENTS);
-
- for (size_t i = 1; i < arguments->children.size(); ++i)
- argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
-
- if (function_name.empty())
- throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR);
-
- AggregateFunctionProperties properties;
- function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties);
-
+ }
+ }
+ else if (auto opt_name = tryGetIdentifierName(arguments->children[0]))
+ {
+ function_name = *opt_name;
+ }
+ else if (arguments->children[0]->as<ASTLiteral>())
+ {
+ throw Exception("Aggregate function name for data type SimpleAggregateFunction must be passed as identifier (without quotes) or function",
+ ErrorCodes::BAD_ARGUMENTS);
+ }
+ else
+ throw Exception("Unexpected AST element passed as aggregate function name for data type SimpleAggregateFunction. Must be identifier or function.",
+ ErrorCodes::BAD_ARGUMENTS);
+
+ for (size_t i = 1; i < arguments->children.size(); ++i)
+ argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
+
+ if (function_name.empty())
+ throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR);
+
+ AggregateFunctionProperties properties;
+ function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties);
+
DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(function);
-
- DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName());
-
- if (!function->getReturnType()->equals(*removeLowCardinality(storage_type)))
- {
- throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(),
- ErrorCodes::BAD_ARGUMENTS);
- }
-
- DataTypeCustomNamePtr custom_name = std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, argument_types, params_row);
-
- return std::make_pair(storage_type, std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
-}
-
-void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory)
-{
- factory.registerDataTypeCustom("SimpleAggregateFunction", create);
-}
-
-}
+
+ DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName());
+
+ if (!function->getReturnType()->equals(*removeLowCardinality(storage_type)))
+ {
+ throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(),
+ ErrorCodes::BAD_ARGUMENTS);
+ }
+
+ DataTypeCustomNamePtr custom_name = std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, argument_types, params_row);
+
+ return std::make_pair(storage_type, std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
+}
+
+void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory)
+{
+ factory.registerDataTypeCustom("SimpleAggregateFunction", create);
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
index dc054144e1..bd153a4f59 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
@@ -1,42 +1,42 @@
-#pragma once
-
-#include <DataTypes/DataTypeCustom.h>
-#include <AggregateFunctions/IAggregateFunction.h>
-
-#include <IO/ReadHelpers.h>
-
-namespace DB
-{
-
-/** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard
- * data type but when rows are merged, an aggregation function is applied.
- *
- * The aggregation function is limited to simple functions whose merge state is the final result:
- * any, anyLast, min, max, sum
- *
- * Examples:
- *
- * SimpleAggregateFunction(sum, Nullable(Float64))
- * SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String)))
- * SimpleAggregateFunction(anyLast, IPv4)
- *
+#pragma once
+
+#include <DataTypes/DataTypeCustom.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <IO/ReadHelpers.h>
+
+namespace DB
+{
+
+/** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard
+ * data type but when rows are merged, an aggregation function is applied.
+ *
+ * The aggregation function is limited to simple functions whose merge state is the final result:
+ * any, anyLast, min, max, sum
+ *
+ * Examples:
+ *
+ * SimpleAggregateFunction(sum, Nullable(Float64))
+ * SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String)))
+ * SimpleAggregateFunction(anyLast, IPv4)
+ *
* Technically, a standard IDataType is instantiated and customized with IDataTypeCustomName and DataTypeCustomDesc.
- */
-
-class DataTypeCustomSimpleAggregateFunction : public IDataTypeCustomName
-{
-private:
- const AggregateFunctionPtr function;
- const DataTypes argument_types;
- const Array parameters;
-
-public:
- DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_)
- : function(function_), argument_types(argument_types_), parameters(parameters_) {}
-
- const AggregateFunctionPtr getFunction() const { return function; }
- String getName() const override;
+ */
+
+class DataTypeCustomSimpleAggregateFunction : public IDataTypeCustomName
+{
+private:
+ const AggregateFunctionPtr function;
+ const DataTypes argument_types;
+ const Array parameters;
+
+public:
+ DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_)
+ : function(function_), argument_types(argument_types_), parameters(parameters_) {}
+
+ const AggregateFunctionPtr getFunction() const { return function; }
+ String getName() const override;
static void checkSupportedFunctions(const AggregateFunctionPtr & function);
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
index 41ba81814d..141fa08dc3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
@@ -1,90 +1,90 @@
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnTuple.h>
#include <Columns/ColumnMap.h>
-#include <Columns/ColumnLowCardinality.h>
-
-#include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
+#include <Columns/ColumnLowCardinality.h>
+
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h>
-
-#include <Common/assert_cast.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int ILLEGAL_COLUMN;
- extern const int TYPE_MISMATCH;
-}
-
-DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
-{
- if (!type)
- return type;
-
- if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get()))
- return std::make_shared<DataTypeArray>(recursiveRemoveLowCardinality(array_type->getNestedType()));
-
- if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
- {
- DataTypes elements = tuple_type->getElements();
- for (auto & element : elements)
- element = recursiveRemoveLowCardinality(element);
-
- if (tuple_type->haveExplicitNames())
+
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_COLUMN;
+ extern const int TYPE_MISMATCH;
+}
+
+DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
+{
+ if (!type)
+ return type;
+
+ if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get()))
+ return std::make_shared<DataTypeArray>(recursiveRemoveLowCardinality(array_type->getNestedType()));
+
+ if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
+ {
+ DataTypes elements = tuple_type->getElements();
+ for (auto & element : elements)
+ element = recursiveRemoveLowCardinality(element);
+
+ if (tuple_type->haveExplicitNames())
return std::make_shared<DataTypeTuple>(elements, tuple_type->getElementNames(), tuple_type->serializeNames());
- else
- return std::make_shared<DataTypeTuple>(elements);
- }
-
+ else
+ return std::make_shared<DataTypeTuple>(elements);
+ }
+
if (const auto * map_type = typeid_cast<const DataTypeMap *>(type.get()))
{
return std::make_shared<DataTypeMap>(recursiveRemoveLowCardinality(map_type->getKeyType()), recursiveRemoveLowCardinality(map_type->getValueType()));
}
- if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
- return low_cardinality_type->getDictionaryType();
-
- return type;
-}
-
-ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
-{
- if (!column)
- return column;
-
- if (const auto * column_array = typeid_cast<const ColumnArray *>(column.get()))
- {
- const auto & data = column_array->getDataPtr();
- auto data_no_lc = recursiveRemoveLowCardinality(data);
- if (data.get() == data_no_lc.get())
- return column;
-
- return ColumnArray::create(data_no_lc, column_array->getOffsetsPtr());
- }
-
- if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
- {
- const auto & nested = column_const->getDataColumnPtr();
- auto nested_no_lc = recursiveRemoveLowCardinality(nested);
- if (nested.get() == nested_no_lc.get())
- return column;
-
- return ColumnConst::create(nested_no_lc, column_const->size());
- }
-
- if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
- {
- auto columns = column_tuple->getColumns();
- for (auto & element : columns)
- element = recursiveRemoveLowCardinality(element);
- return ColumnTuple::create(columns);
- }
-
+ if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
+ return low_cardinality_type->getDictionaryType();
+
+ return type;
+}
+
+ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
+{
+ if (!column)
+ return column;
+
+ if (const auto * column_array = typeid_cast<const ColumnArray *>(column.get()))
+ {
+ const auto & data = column_array->getDataPtr();
+ auto data_no_lc = recursiveRemoveLowCardinality(data);
+ if (data.get() == data_no_lc.get())
+ return column;
+
+ return ColumnArray::create(data_no_lc, column_array->getOffsetsPtr());
+ }
+
+ if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
+ {
+ const auto & nested = column_const->getDataColumnPtr();
+ auto nested_no_lc = recursiveRemoveLowCardinality(nested);
+ if (nested.get() == nested_no_lc.get())
+ return column;
+
+ return ColumnConst::create(nested_no_lc, column_const->size());
+ }
+
+ if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
+ {
+ auto columns = column_tuple->getColumns();
+ for (auto & element : columns)
+ element = recursiveRemoveLowCardinality(element);
+ return ColumnTuple::create(columns);
+ }
+
if (const auto * column_map = typeid_cast<const ColumnMap *>(column.get()))
{
const auto & nested = column_map->getNestedColumnPtr();
@@ -95,102 +95,102 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
return ColumnMap::create(nested_no_lc);
}
- if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get()))
- return column_low_cardinality->convertToFullColumn();
-
- return column;
-}
-
-ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
-{
- if (!column)
- return column;
-
- if (from_type->equals(*to_type))
- return column;
-
- /// We can allow insert enum column if it's numeric type is the same as the column's type in table.
- if (WhichDataType(to_type).isEnum() && from_type->getTypeId() == to_type->getTypeId())
- return column;
-
- if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
- {
- const auto & nested = column_const->getDataColumnPtr();
- auto nested_no_lc = recursiveTypeConversion(nested, from_type, to_type);
- if (nested.get() == nested_no_lc.get())
- return column;
-
- return ColumnConst::create(nested_no_lc, column_const->size());
- }
-
- if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(from_type.get()))
- {
- if (to_type->equals(*low_cardinality_type->getDictionaryType()))
- return column->convertToFullColumnIfLowCardinality();
- }
-
- if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(to_type.get()))
- {
- if (from_type->equals(*low_cardinality_type->getDictionaryType()))
- {
- auto col = low_cardinality_type->createColumn();
- assert_cast<ColumnLowCardinality &>(*col).insertRangeFromFullColumn(*column, 0, column->size());
- return col;
- }
- }
-
- if (const auto * from_array_type = typeid_cast<const DataTypeArray *>(from_type.get()))
- {
- if (const auto * to_array_type = typeid_cast<const DataTypeArray *>(to_type.get()))
- {
- const auto * column_array = typeid_cast<const ColumnArray *>(column.get());
- if (!column_array)
- throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(),
- ErrorCodes::ILLEGAL_COLUMN);
-
- const auto & nested_from = from_array_type->getNestedType();
- const auto & nested_to = to_array_type->getNestedType();
-
- return ColumnArray::create(
- recursiveTypeConversion(column_array->getDataPtr(), nested_from, nested_to),
- column_array->getOffsetsPtr());
- }
- }
-
- if (const auto * from_tuple_type = typeid_cast<const DataTypeTuple *>(from_type.get()))
- {
- if (const auto * to_tuple_type = typeid_cast<const DataTypeTuple *>(to_type.get()))
- {
- const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get());
- if (!column_tuple)
- throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(),
- ErrorCodes::ILLEGAL_COLUMN);
-
- auto columns = column_tuple->getColumns();
- const auto & from_elements = from_tuple_type->getElements();
- const auto & to_elements = to_tuple_type->getElements();
-
- bool has_converted = false;
-
- for (size_t i = 0; i < columns.size(); ++i)
- {
- auto & element = columns[i];
- auto element_no_lc = recursiveTypeConversion(element, from_elements.at(i), to_elements.at(i));
- if (element.get() != element_no_lc.get())
- {
- element = element_no_lc;
- has_converted = true;
- }
- }
-
- if (!has_converted)
- return column;
-
- return ColumnTuple::create(columns);
- }
- }
-
- throw Exception("Cannot convert: " + from_type->getName() + " to " + to_type->getName(), ErrorCodes::TYPE_MISMATCH);
-}
-
-}
+ if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get()))
+ return column_low_cardinality->convertToFullColumn();
+
+ return column;
+}
+
+ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
+{
+ if (!column)
+ return column;
+
+ if (from_type->equals(*to_type))
+ return column;
+
+ /// We can allow insert enum column if it's numeric type is the same as the column's type in table.
+ if (WhichDataType(to_type).isEnum() && from_type->getTypeId() == to_type->getTypeId())
+ return column;
+
+ if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
+ {
+ const auto & nested = column_const->getDataColumnPtr();
+ auto nested_no_lc = recursiveTypeConversion(nested, from_type, to_type);
+ if (nested.get() == nested_no_lc.get())
+ return column;
+
+ return ColumnConst::create(nested_no_lc, column_const->size());
+ }
+
+ if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(from_type.get()))
+ {
+ if (to_type->equals(*low_cardinality_type->getDictionaryType()))
+ return column->convertToFullColumnIfLowCardinality();
+ }
+
+ if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(to_type.get()))
+ {
+ if (from_type->equals(*low_cardinality_type->getDictionaryType()))
+ {
+ auto col = low_cardinality_type->createColumn();
+ assert_cast<ColumnLowCardinality &>(*col).insertRangeFromFullColumn(*column, 0, column->size());
+ return col;
+ }
+ }
+
+ if (const auto * from_array_type = typeid_cast<const DataTypeArray *>(from_type.get()))
+ {
+ if (const auto * to_array_type = typeid_cast<const DataTypeArray *>(to_type.get()))
+ {
+ const auto * column_array = typeid_cast<const ColumnArray *>(column.get());
+ if (!column_array)
+ throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(),
+ ErrorCodes::ILLEGAL_COLUMN);
+
+ const auto & nested_from = from_array_type->getNestedType();
+ const auto & nested_to = to_array_type->getNestedType();
+
+ return ColumnArray::create(
+ recursiveTypeConversion(column_array->getDataPtr(), nested_from, nested_to),
+ column_array->getOffsetsPtr());
+ }
+ }
+
+ if (const auto * from_tuple_type = typeid_cast<const DataTypeTuple *>(from_type.get()))
+ {
+ if (const auto * to_tuple_type = typeid_cast<const DataTypeTuple *>(to_type.get()))
+ {
+ const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get());
+ if (!column_tuple)
+ throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(),
+ ErrorCodes::ILLEGAL_COLUMN);
+
+ auto columns = column_tuple->getColumns();
+ const auto & from_elements = from_tuple_type->getElements();
+ const auto & to_elements = to_tuple_type->getElements();
+
+ bool has_converted = false;
+
+ for (size_t i = 0; i < columns.size(); ++i)
+ {
+ auto & element = columns[i];
+ auto element_no_lc = recursiveTypeConversion(element, from_elements.at(i), to_elements.at(i));
+ if (element.get() != element_no_lc.get())
+ {
+ element = element_no_lc;
+ has_converted = true;
+ }
+ }
+
+ if (!has_converted)
+ return column;
+
+ return ColumnTuple::create(columns);
+ }
+ }
+
+ throw Exception("Cannot convert: " + from_type->getName() + " to " + to_type->getName(), ErrorCodes::TYPE_MISMATCH);
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h
index ae938b1104..ff692abf1e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/DataTypes/Serializations/SerializationCustomSimpleText.h
@@ -1,57 +1,57 @@
-#pragma once
-
+#pragma once
+
#include <DataTypes/Serializations/SerializationWrapper.h>
-
-namespace DB
-{
-
-class ReadBuffer;
-class WriteBuffer;
-struct FormatSettings;
-class IColumn;
-
-/** Simple IDataTypeCustomTextSerialization that uses serializeText/deserializeText
- * for all serialization and deserialization. */
+
+namespace DB
+{
+
+class ReadBuffer;
+class WriteBuffer;
+struct FormatSettings;
+class IColumn;
+
+/** Simple IDataTypeCustomTextSerialization that uses serializeText/deserializeText
+ * for all serialization and deserialization. */
class SerializationCustomSimpleText : public SerializationWrapper
-{
-public:
+{
+public:
SerializationCustomSimpleText(const SerializationPtr & nested_);
- // Methods that subclasses must override in order to get full serialization/deserialization support.
- virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0;
- virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
-
- /** Text deserialization without quoting or escaping.
- */
- void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
-
- /** Text serialization with escaping but without quoting.
- */
- void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
- void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
-
- /** Text serialization as a literal that may be inserted into a query.
- */
- void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
- void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
-
- /** Text serialization for the CSV format.
- */
- void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
- /** delimiter - the delimiter we expect when reading a string value that is not double-quoted
- * (the delimiter is not consumed).
- */
- void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
-
- /** Text serialization intended for using in JSON format.
- * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes.
- */
- void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
- void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
-
- /** Text serialization for putting into the XML format.
- */
- void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
-};
-
-}
+ // Methods that subclasses must override in order to get full serialization/deserialization support.
+ virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0;
+ virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+
+ /** Text deserialization without quoting or escaping.
+ */
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization with escaping but without quoting.
+ */
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization as a literal that may be inserted into a query.
+ */
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization for the CSV format.
+ */
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ /** delimiter - the delimiter we expect when reading a string value that is not double-quoted
+ * (the delimiter is not consumed).
+ */
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization intended for using in JSON format.
+ * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes.
+ */
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization for putting into the XML format.
+ */
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h
index 7fcac8928c..45fd1483ee 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h
@@ -1,48 +1,48 @@
-#pragma once
-
+#pragma once
+
#include <Disks/IDisk.h>
#include <Interpreters/Context_fwd.h>
#include <common/types.h>
-
-#include <boost/noncopyable.hpp>
-#include <Poco/Util/AbstractConfiguration.h>
-
+
+#include <boost/noncopyable.hpp>
+#include <Poco/Util/AbstractConfiguration.h>
+
#include <functional>
#include <map>
#include <unordered_map>
+
-
-namespace DB
-{
-
+namespace DB
+{
+
using DisksMap = std::map<String, DiskPtr>;
-/**
- * Disk factory. Responsible for creating new disk objects.
- */
-class DiskFactory final : private boost::noncopyable
-{
-public:
- using Creator = std::function<DiskPtr(
- const String & name,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
+/**
+ * Disk factory. Responsible for creating new disk objects.
+ */
+class DiskFactory final : private boost::noncopyable
+{
+public:
+ using Creator = std::function<DiskPtr(
+ const String & name,
+ const Poco::Util::AbstractConfiguration & config,
+ const String & config_prefix,
ContextPtr context,
const DisksMap & map)>;
-
- static DiskFactory & instance();
-
- void registerDiskType(const String & disk_type, Creator creator);
-
- DiskPtr create(
- const String & name,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
+
+ static DiskFactory & instance();
+
+ void registerDiskType(const String & disk_type, Creator creator);
+
+ DiskPtr create(
+ const String & name,
+ const Poco::Util::AbstractConfiguration & config,
+ const String & config_prefix,
ContextPtr context,
const DisksMap & map) const;
-
-private:
- using DiskTypeRegistry = std::unordered_map<String, Creator>;
- DiskTypeRegistry registry;
-};
-
-}
+
+private:
+ using DiskTypeRegistry = std::unordered_map<String, Creator>;
+ DiskTypeRegistry registry;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp
index 37d758c538..202d3a4c6b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp
@@ -1,36 +1,36 @@
-#include "DiskLocal.h"
-#include <Common/createHardLink.h>
-#include "DiskFactory.h"
-
+#include "DiskLocal.h"
+#include <Common/createHardLink.h>
+#include "DiskFactory.h"
+
#include <Disks/LocalDirectorySyncGuard.h>
-#include <Interpreters/Context.h>
-#include <Common/filesystemHelpers.h>
-#include <Common/quoteString.h>
-#include <IO/createReadBufferFromFileBase.h>
+#include <Interpreters/Context.h>
+#include <Common/filesystemHelpers.h>
+#include <Common/quoteString.h>
+#include <IO/createReadBufferFromFileBase.h>
#include <fstream>
#include <unistd.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int UNKNOWN_ELEMENT_IN_CONFIG;
- extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
- extern const int PATH_ACCESS_DENIED;
- extern const int INCORRECT_DISK_INDEX;
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int UNKNOWN_ELEMENT_IN_CONFIG;
+ extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
+ extern const int PATH_ACCESS_DENIED;
+ extern const int INCORRECT_DISK_INDEX;
extern const int CANNOT_TRUNCATE_FILE;
extern const int CANNOT_UNLINK;
extern const int CANNOT_RMDIR;
-}
-
-std::mutex DiskLocal::reservation_mutex;
-
-
-using DiskLocalPtr = std::shared_ptr<DiskLocal>;
-
+}
+
+std::mutex DiskLocal::reservation_mutex;
+
+
+using DiskLocalPtr = std::shared_ptr<DiskLocal>;
+
static void loadDiskLocalConfig(const String & name,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
@@ -81,203 +81,203 @@ static void loadDiskLocalConfig(const String & name,
}
}
-class DiskLocalReservation : public IReservation
-{
-public:
- DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_)
- : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_)
- {
- }
-
- UInt64 getSize() const override { return size; }
-
- DiskPtr getDisk(size_t i) const override;
-
- Disks getDisks() const override { return {disk}; }
-
- void update(UInt64 new_size) override;
-
- ~DiskLocalReservation() override;
-
-private:
- DiskLocalPtr disk;
- UInt64 size;
- CurrentMetrics::Increment metric_increment;
-};
-
-
-class DiskLocalDirectoryIterator : public IDiskDirectoryIterator
-{
-public:
- explicit DiskLocalDirectoryIterator(const String & disk_path_, const String & dir_path_)
+class DiskLocalReservation : public IReservation
+{
+public:
+ DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_)
+ : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_)
+ {
+ }
+
+ UInt64 getSize() const override { return size; }
+
+ DiskPtr getDisk(size_t i) const override;
+
+ Disks getDisks() const override { return {disk}; }
+
+ void update(UInt64 new_size) override;
+
+ ~DiskLocalReservation() override;
+
+private:
+ DiskLocalPtr disk;
+ UInt64 size;
+ CurrentMetrics::Increment metric_increment;
+};
+
+
+class DiskLocalDirectoryIterator : public IDiskDirectoryIterator
+{
+public:
+ explicit DiskLocalDirectoryIterator(const String & disk_path_, const String & dir_path_)
: dir_path(dir_path_), entry(fs::path(disk_path_) / dir_path_)
- {
- }
-
+ {
+ }
+
void next() override { ++entry; }
-
+
bool isValid() const override { return entry != fs::directory_iterator(); }
-
- String path() const override
- {
+
+ String path() const override
+ {
if (entry->is_directory())
return dir_path / entry->path().filename() / "";
- else
+ else
return dir_path / entry->path().filename();
- }
-
-
+ }
+
+
String name() const override { return entry->path().filename(); }
-private:
+private:
fs::path dir_path;
fs::directory_iterator entry;
-};
-
-
-ReservationPtr DiskLocal::reserve(UInt64 bytes)
-{
- if (!tryReserve(bytes))
- return {};
- return std::make_unique<DiskLocalReservation>(std::static_pointer_cast<DiskLocal>(shared_from_this()), bytes);
-}
-
-bool DiskLocal::tryReserve(UInt64 bytes)
-{
- std::lock_guard lock(DiskLocal::reservation_mutex);
- if (bytes == 0)
- {
+};
+
+
+ReservationPtr DiskLocal::reserve(UInt64 bytes)
+{
+ if (!tryReserve(bytes))
+ return {};
+ return std::make_unique<DiskLocalReservation>(std::static_pointer_cast<DiskLocal>(shared_from_this()), bytes);
+}
+
+bool DiskLocal::tryReserve(UInt64 bytes)
+{
+ std::lock_guard lock(DiskLocal::reservation_mutex);
+ if (bytes == 0)
+ {
LOG_DEBUG(log, "Reserving 0 bytes on disk {}", backQuote(name));
- ++reservation_count;
- return true;
- }
-
- auto available_space = getAvailableSpace();
- UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes);
- if (unreserved_space >= bytes)
- {
+ ++reservation_count;
+ return true;
+ }
+
+ auto available_space = getAvailableSpace();
+ UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes);
+ if (unreserved_space >= bytes)
+ {
LOG_DEBUG(log, "Reserving {} on disk {}, having unreserved {}.",
- ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space));
- ++reservation_count;
- reserved_bytes += bytes;
- return true;
- }
- return false;
-}
-
-UInt64 DiskLocal::getTotalSpace() const
-{
- struct statvfs fs;
- if (name == "default") /// for default disk we get space from path/data/
+ ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space));
+ ++reservation_count;
+ reserved_bytes += bytes;
+ return true;
+ }
+ return false;
+}
+
+UInt64 DiskLocal::getTotalSpace() const
+{
+ struct statvfs fs;
+ if (name == "default") /// for default disk we get space from path/data/
fs = getStatVFS((fs::path(disk_path) / "data/").string());
- else
- fs = getStatVFS(disk_path);
- UInt64 total_size = fs.f_blocks * fs.f_bsize;
- if (total_size < keep_free_space_bytes)
- return 0;
- return total_size - keep_free_space_bytes;
-}
-
-UInt64 DiskLocal::getAvailableSpace() const
-{
- /// we use f_bavail, because part of b_free space is
- /// available for superuser only and for system purposes
- struct statvfs fs;
- if (name == "default") /// for default disk we get space from path/data/
+ else
+ fs = getStatVFS(disk_path);
+ UInt64 total_size = fs.f_blocks * fs.f_bsize;
+ if (total_size < keep_free_space_bytes)
+ return 0;
+ return total_size - keep_free_space_bytes;
+}
+
+UInt64 DiskLocal::getAvailableSpace() const
+{
+ /// we use f_bavail, because part of b_free space is
+ /// available for superuser only and for system purposes
+ struct statvfs fs;
+ if (name == "default") /// for default disk we get space from path/data/
fs = getStatVFS((fs::path(disk_path) / "data/").string());
- else
- fs = getStatVFS(disk_path);
- UInt64 total_size = fs.f_bavail * fs.f_bsize;
- if (total_size < keep_free_space_bytes)
- return 0;
- return total_size - keep_free_space_bytes;
-}
-
-UInt64 DiskLocal::getUnreservedSpace() const
-{
- std::lock_guard lock(DiskLocal::reservation_mutex);
- auto available_space = getAvailableSpace();
- available_space -= std::min(available_space, reserved_bytes);
- return available_space;
-}
-
-bool DiskLocal::exists(const String & path) const
-{
+ else
+ fs = getStatVFS(disk_path);
+ UInt64 total_size = fs.f_bavail * fs.f_bsize;
+ if (total_size < keep_free_space_bytes)
+ return 0;
+ return total_size - keep_free_space_bytes;
+}
+
+UInt64 DiskLocal::getUnreservedSpace() const
+{
+ std::lock_guard lock(DiskLocal::reservation_mutex);
+ auto available_space = getAvailableSpace();
+ available_space -= std::min(available_space, reserved_bytes);
+ return available_space;
+}
+
+bool DiskLocal::exists(const String & path) const
+{
return fs::exists(fs::path(disk_path) / path);
-}
-
-bool DiskLocal::isFile(const String & path) const
-{
+}
+
+bool DiskLocal::isFile(const String & path) const
+{
return fs::is_regular_file(fs::path(disk_path) / path);
-}
-
-bool DiskLocal::isDirectory(const String & path) const
-{
+}
+
+bool DiskLocal::isDirectory(const String & path) const
+{
return fs::is_directory(fs::path(disk_path) / path);
-}
-
-size_t DiskLocal::getFileSize(const String & path) const
-{
+}
+
+size_t DiskLocal::getFileSize(const String & path) const
+{
return fs::file_size(fs::path(disk_path) / path);
-}
-
-void DiskLocal::createDirectory(const String & path)
-{
+}
+
+void DiskLocal::createDirectory(const String & path)
+{
fs::create_directory(fs::path(disk_path) / path);
-}
-
-void DiskLocal::createDirectories(const String & path)
-{
+}
+
+void DiskLocal::createDirectories(const String & path)
+{
fs::create_directories(fs::path(disk_path) / path);
-}
-
-void DiskLocal::clearDirectory(const String & path)
-{
+}
+
+void DiskLocal::clearDirectory(const String & path)
+{
for (const auto & entry : fs::directory_iterator(fs::path(disk_path) / path))
fs::remove(entry.path());
-}
-
-void DiskLocal::moveDirectory(const String & from_path, const String & to_path)
-{
+}
+
+void DiskLocal::moveDirectory(const String & from_path, const String & to_path)
+{
fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path);
-}
-
-DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path)
-{
- return std::make_unique<DiskLocalDirectoryIterator>(disk_path, path);
-}
-
-void DiskLocal::moveFile(const String & from_path, const String & to_path)
-{
+}
+
+DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path)
+{
+ return std::make_unique<DiskLocalDirectoryIterator>(disk_path, path);
+}
+
+void DiskLocal::moveFile(const String & from_path, const String & to_path)
+{
fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path);
-}
-
-void DiskLocal::replaceFile(const String & from_path, const String & to_path)
-{
+}
+
+void DiskLocal::replaceFile(const String & from_path, const String & to_path)
+{
fs::path from_file = fs::path(disk_path) / from_path;
fs::path to_file = fs::path(disk_path) / to_path;
fs::rename(from_file, to_file);
-}
-
+}
+
std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, size_t estimated_size) const
-{
+{
return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, estimated_size);
-}
-
-std::unique_ptr<WriteBufferFromFileBase>
+}
+
+std::unique_ptr<WriteBufferFromFileBase>
DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode)
-{
- int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
+{
+ int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
return std::make_unique<WriteBufferFromFile>(fs::path(disk_path) / path, buf_size, flags);
-}
-
+}
+
void DiskLocal::removeFile(const String & path)
-{
+{
auto fs_path = fs::path(disk_path) / path;
if (0 != unlink(fs_path.c_str()))
throwFromErrnoWithPath("Cannot unlink file " + fs_path.string(), fs_path, ErrorCodes::CANNOT_UNLINK);
-}
-
+}
+
void DiskLocal::removeFileIfExists(const String & path)
{
auto fs_path = fs::path(disk_path) / path;
@@ -292,33 +292,33 @@ void DiskLocal::removeDirectory(const String & path)
throwFromErrnoWithPath("Cannot rmdir " + fs_path.string(), fs_path, ErrorCodes::CANNOT_RMDIR);
}
-void DiskLocal::removeRecursive(const String & path)
-{
+void DiskLocal::removeRecursive(const String & path)
+{
fs::remove_all(fs::path(disk_path) / path);
-}
-
-void DiskLocal::listFiles(const String & path, std::vector<String> & file_names)
-{
+}
+
+void DiskLocal::listFiles(const String & path, std::vector<String> & file_names)
+{
file_names.clear();
for (const auto & entry : fs::directory_iterator(fs::path(disk_path) / path))
file_names.emplace_back(entry.path().filename());
-}
-
-void DiskLocal::setLastModified(const String & path, const Poco::Timestamp & timestamp)
-{
+}
+
+void DiskLocal::setLastModified(const String & path, const Poco::Timestamp & timestamp)
+{
FS::setModificationTime(fs::path(disk_path) / path, timestamp.epochTime());
-}
-
-Poco::Timestamp DiskLocal::getLastModified(const String & path)
-{
+}
+
+Poco::Timestamp DiskLocal::getLastModified(const String & path)
+{
return FS::getModificationTimestamp(fs::path(disk_path) / path);
-}
-
-void DiskLocal::createHardLink(const String & src_path, const String & dst_path)
-{
+}
+
+void DiskLocal::createHardLink(const String & src_path, const String & dst_path)
+{
DB::createHardLink(fs::path(disk_path) / src_path, fs::path(disk_path) / dst_path);
-}
-
+}
+
void DiskLocal::truncateFile(const String & path, size_t size)
{
int res = truncate((fs::path(disk_path) / path).string().data(), size);
@@ -326,26 +326,26 @@ void DiskLocal::truncateFile(const String & path, size_t size)
throwFromErrnoWithPath("Cannot truncate file " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE);
}
-void DiskLocal::createFile(const String & path)
-{
+void DiskLocal::createFile(const String & path)
+{
FS::createFile(fs::path(disk_path) / path);
-}
-
-void DiskLocal::setReadOnly(const String & path)
-{
+}
+
+void DiskLocal::setReadOnly(const String & path)
+{
fs::permissions(fs::path(disk_path) / path,
fs::perms::owner_write | fs::perms::group_write | fs::perms::others_write,
fs::perm_options::remove);
-}
-
-bool inline isSameDiskType(const IDisk & one, const IDisk & another)
-{
- return typeid(one) == typeid(another);
-}
-
-void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
-{
- if (isSameDiskType(*this, *to_disk))
+}
+
+bool inline isSameDiskType(const IDisk & one, const IDisk & another)
+{
+ return typeid(one) == typeid(another);
+}
+
+void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
+{
+ if (isSameDiskType(*this, *to_disk))
{
fs::path to = fs::path(to_disk->getPath()) / to_path;
fs::path from = fs::path(disk_path) / from_path;
@@ -356,10 +356,10 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to
fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
}
- else
+ else
copyThroughBuffers(from_path, to_disk, to_path); /// Base implementation.
-}
-
+}
+
SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
{
return std::make_unique<LocalDirectorySyncGuard>(fs::path(disk_path) / path);
@@ -380,63 +380,63 @@ void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & confi
keep_free_space_bytes = new_keep_free_space_bytes;
}
-DiskPtr DiskLocalReservation::getDisk(size_t i) const
-{
- if (i != 0)
- {
- throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX);
- }
- return disk;
-}
-
-void DiskLocalReservation::update(UInt64 new_size)
-{
- std::lock_guard lock(DiskLocal::reservation_mutex);
- disk->reserved_bytes -= size;
- size = new_size;
- disk->reserved_bytes += size;
-}
-
-DiskLocalReservation::~DiskLocalReservation()
-{
- try
- {
- std::lock_guard lock(DiskLocal::reservation_mutex);
- if (disk->reserved_bytes < size)
- {
- disk->reserved_bytes = 0;
+DiskPtr DiskLocalReservation::getDisk(size_t i) const
+{
+ if (i != 0)
+ {
+ throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX);
+ }
+ return disk;
+}
+
+void DiskLocalReservation::update(UInt64 new_size)
+{
+ std::lock_guard lock(DiskLocal::reservation_mutex);
+ disk->reserved_bytes -= size;
+ size = new_size;
+ disk->reserved_bytes += size;
+}
+
+DiskLocalReservation::~DiskLocalReservation()
+{
+ try
+ {
+ std::lock_guard lock(DiskLocal::reservation_mutex);
+ if (disk->reserved_bytes < size)
+ {
+ disk->reserved_bytes = 0;
LOG_ERROR(disk->log, "Unbalanced reservations size for disk '{}'.", disk->getName());
- }
- else
- {
- disk->reserved_bytes -= size;
- }
-
- if (disk->reservation_count == 0)
+ }
+ else
+ {
+ disk->reserved_bytes -= size;
+ }
+
+ if (disk->reservation_count == 0)
LOG_ERROR(disk->log, "Unbalanced reservation count for disk '{}'.", disk->getName());
- else
- --disk->reservation_count;
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
-}
-
-
-void registerDiskLocal(DiskFactory & factory)
-{
- auto creator = [](const String & name,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
+ else
+ --disk->reservation_count;
+ }
+ catch (...)
+ {
+ tryLogCurrentException(__PRETTY_FUNCTION__);
+ }
+}
+
+
+void registerDiskLocal(DiskFactory & factory)
+{
+ auto creator = [](const String & name,
+ const Poco::Util::AbstractConfiguration & config,
+ const String & config_prefix,
ContextPtr context,
const DisksMap & /*map*/) -> DiskPtr {
String path;
UInt64 keep_free_space_bytes;
loadDiskLocalConfig(name, config, config_prefix, context, path, keep_free_space_bytes);
- return std::make_shared<DiskLocal>(name, path, keep_free_space_bytes);
- };
- factory.registerDiskType("local", creator);
-}
-
-}
+ return std::make_shared<DiskLocal>(name, path, keep_free_space_bytes);
+ };
+ factory.registerDiskType("local", creator);
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h
index 145211a83e..f7ebbf9416 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h
@@ -1,125 +1,125 @@
-#pragma once
-
+#pragma once
+
#include <common/logger_useful.h>
-#include <Disks/IDisk.h>
-#include <IO/ReadBufferFromFile.h>
-#include <IO/ReadBufferFromFileBase.h>
-#include <IO/WriteBufferFromFile.h>
+#include <Disks/IDisk.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromFileBase.h>
+#include <IO/WriteBufferFromFile.h>
#include <Poco/Util/AbstractConfiguration.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-class DiskLocalReservation;
-
-class DiskLocal : public IDisk
-{
-public:
- friend class DiskLocalReservation;
-
- DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_)
- : name(name_), disk_path(path_), keep_free_space_bytes(keep_free_space_bytes_)
- {
- if (disk_path.back() != '/')
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+class DiskLocalReservation;
+
+class DiskLocal : public IDisk
+{
+public:
+ friend class DiskLocalReservation;
+
+ DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_)
+ : name(name_), disk_path(path_), keep_free_space_bytes(keep_free_space_bytes_)
+ {
+ if (disk_path.back() != '/')
throw Exception("Disk path must end with '/', but '" + disk_path + "' doesn't.", ErrorCodes::LOGICAL_ERROR);
- }
-
- const String & getName() const override { return name; }
-
- const String & getPath() const override { return disk_path; }
-
- ReservationPtr reserve(UInt64 bytes) override;
-
- UInt64 getTotalSpace() const override;
-
- UInt64 getAvailableSpace() const override;
-
- UInt64 getUnreservedSpace() const override;
-
- UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; }
-
- bool exists(const String & path) const override;
-
- bool isFile(const String & path) const override;
-
- bool isDirectory(const String & path) const override;
-
- size_t getFileSize(const String & path) const override;
-
- void createDirectory(const String & path) override;
-
- void createDirectories(const String & path) override;
-
- void clearDirectory(const String & path) override;
-
- void moveDirectory(const String & from_path, const String & to_path) override;
-
- DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
-
- void createFile(const String & path) override;
-
- void moveFile(const String & from_path, const String & to_path) override;
-
- void replaceFile(const String & from_path, const String & to_path) override;
-
- void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
-
- void listFiles(const String & path, std::vector<String> & file_names) override;
-
- std::unique_ptr<ReadBufferFromFileBase> readFile(
- const String & path,
+ }
+
+ const String & getName() const override { return name; }
+
+ const String & getPath() const override { return disk_path; }
+
+ ReservationPtr reserve(UInt64 bytes) override;
+
+ UInt64 getTotalSpace() const override;
+
+ UInt64 getAvailableSpace() const override;
+
+ UInt64 getUnreservedSpace() const override;
+
+ UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; }
+
+ bool exists(const String & path) const override;
+
+ bool isFile(const String & path) const override;
+
+ bool isDirectory(const String & path) const override;
+
+ size_t getFileSize(const String & path) const override;
+
+ void createDirectory(const String & path) override;
+
+ void createDirectories(const String & path) override;
+
+ void clearDirectory(const String & path) override;
+
+ void moveDirectory(const String & from_path, const String & to_path) override;
+
+ DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
+
+ void createFile(const String & path) override;
+
+ void moveFile(const String & from_path, const String & to_path) override;
+
+ void replaceFile(const String & from_path, const String & to_path) override;
+
+ void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
+
+ void listFiles(const String & path, std::vector<String> & file_names) override;
+
+ std::unique_ptr<ReadBufferFromFileBase> readFile(
+ const String & path,
const ReadSettings & settings,
size_t estimated_size) const override;
-
- std::unique_ptr<WriteBufferFromFileBase> writeFile(
- const String & path,
- size_t buf_size,
+
+ std::unique_ptr<WriteBufferFromFileBase> writeFile(
+ const String & path,
+ size_t buf_size,
WriteMode mode) override;
-
+
void removeFile(const String & path) override;
void removeFileIfExists(const String & path) override;
void removeDirectory(const String & path) override;
- void removeRecursive(const String & path) override;
-
- void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
-
- Poco::Timestamp getLastModified(const String & path) override;
-
- void setReadOnly(const String & path) override;
-
- void createHardLink(const String & src_path, const String & dst_path) override;
-
+ void removeRecursive(const String & path) override;
+
+ void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
+
+ Poco::Timestamp getLastModified(const String & path) override;
+
+ void setReadOnly(const String & path) override;
+
+ void createHardLink(const String & src_path, const String & dst_path) override;
+
void truncateFile(const String & path, size_t size) override;
DiskType getType() const override { return DiskType::Local; }
bool isRemote() const override { return false; }
-
+
bool supportZeroCopyReplication() const override { return false; }
SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &) override;
-private:
- bool tryReserve(UInt64 bytes);
-
-private:
- const String name;
- const String disk_path;
+private:
+ bool tryReserve(UInt64 bytes);
+
+private:
+ const String name;
+ const String disk_path;
std::atomic<UInt64> keep_free_space_bytes;
-
- UInt64 reserved_bytes = 0;
- UInt64 reservation_count = 0;
-
- static std::mutex reservation_mutex;
+
+ UInt64 reserved_bytes = 0;
+ UInt64 reservation_count = 0;
+
+ static std::mutex reservation_mutex;
Poco::Logger * log = &Poco::Logger::get("DiskLocal");
-};
-
+};
+
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp
index 9407c582c2..a9d81c2761 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp
@@ -1,115 +1,115 @@
-#include "DiskLocal.h"
-#include "DiskSelector.h"
-
-#include <IO/WriteHelpers.h>
-#include <Common/escapeForFileName.h>
-#include <Common/quoteString.h>
+#include "DiskLocal.h"
+#include "DiskSelector.h"
+
+#include <IO/WriteHelpers.h>
+#include <Common/escapeForFileName.h>
+#include <Common/quoteString.h>
#include <Common/StringUtils/StringUtils.h>
-#include <common/logger_useful.h>
-#include <Interpreters/Context.h>
-
-#include <set>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
- extern const int UNKNOWN_DISK;
-}
-
+#include <common/logger_useful.h>
+#include <Interpreters/Context.h>
+
+#include <set>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
+ extern const int UNKNOWN_DISK;
+}
+
DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
-{
- Poco::Util::AbstractConfiguration::Keys keys;
- config.keys(config_prefix, keys);
-
- auto & factory = DiskFactory::instance();
-
- constexpr auto default_disk_name = "default";
- bool has_default_disk = false;
- for (const auto & disk_name : keys)
- {
- if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
- throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
-
- if (disk_name == default_disk_name)
- has_default_disk = true;
-
- auto disk_config_prefix = config_prefix + "." + disk_name;
-
+{
+ Poco::Util::AbstractConfiguration::Keys keys;
+ config.keys(config_prefix, keys);
+
+ auto & factory = DiskFactory::instance();
+
+ constexpr auto default_disk_name = "default";
+ bool has_default_disk = false;
+ for (const auto & disk_name : keys)
+ {
+ if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
+ throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
+
+ if (disk_name == default_disk_name)
+ has_default_disk = true;
+
+ auto disk_config_prefix = config_prefix + "." + disk_name;
+
disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks));
- }
- if (!has_default_disk)
+ }
+ if (!has_default_disk)
disks.emplace(default_disk_name, std::make_shared<DiskLocal>(default_disk_name, context->getPath(), 0));
-}
-
-
-DiskSelectorPtr DiskSelector::updateFromConfig(
+}
+
+
+DiskSelectorPtr DiskSelector::updateFromConfig(
const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) const
-{
- Poco::Util::AbstractConfiguration::Keys keys;
- config.keys(config_prefix, keys);
-
- auto & factory = DiskFactory::instance();
-
- std::shared_ptr<DiskSelector> result = std::make_shared<DiskSelector>(*this);
-
- constexpr auto default_disk_name = "default";
+{
+ Poco::Util::AbstractConfiguration::Keys keys;
+ config.keys(config_prefix, keys);
+
+ auto & factory = DiskFactory::instance();
+
+ std::shared_ptr<DiskSelector> result = std::make_shared<DiskSelector>(*this);
+
+ constexpr auto default_disk_name = "default";
DisksMap old_disks_minus_new_disks (result->getDisksMap());
-
- for (const auto & disk_name : keys)
- {
- if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
- throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
-
+
+ for (const auto & disk_name : keys)
+ {
+ if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
+ throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
+
auto disk_config_prefix = config_prefix + "." + disk_name;
- if (result->getDisksMap().count(disk_name) == 0)
- {
+ if (result->getDisksMap().count(disk_name) == 0)
+ {
result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap()));
- }
- else
- {
+ }
+ else
+ {
auto disk = old_disks_minus_new_disks[disk_name];
disk->applyNewSettings(config, context, disk_config_prefix, result->getDisksMap());
- old_disks_minus_new_disks.erase(disk_name);
- }
- }
-
- old_disks_minus_new_disks.erase(default_disk_name);
-
- if (!old_disks_minus_new_disks.empty())
- {
- WriteBufferFromOwnString warning;
- if (old_disks_minus_new_disks.size() == 1)
- writeString("Disk ", warning);
- else
- writeString("Disks ", warning);
-
- int index = 0;
+ old_disks_minus_new_disks.erase(disk_name);
+ }
+ }
+
+ old_disks_minus_new_disks.erase(default_disk_name);
+
+ if (!old_disks_minus_new_disks.empty())
+ {
+ WriteBufferFromOwnString warning;
+ if (old_disks_minus_new_disks.size() == 1)
+ writeString("Disk ", warning);
+ else
+ writeString("Disks ", warning);
+
+ int index = 0;
for (const auto & [name, _] : old_disks_minus_new_disks)
- {
- if (index++ > 0)
- writeString(", ", warning);
- writeBackQuotedString(name, warning);
- }
-
- writeString(" disappeared from configuration, this change will be applied after restart of ClickHouse", warning);
- LOG_WARNING(&Poco::Logger::get("DiskSelector"), warning.str());
- }
-
- return result;
-}
-
-
-DiskPtr DiskSelector::get(const String & name) const
-{
- auto it = disks.find(name);
- if (it == disks.end())
- throw Exception("Unknown disk " + name, ErrorCodes::UNKNOWN_DISK);
- return it->second;
-}
-
-}
+ {
+ if (index++ > 0)
+ writeString(", ", warning);
+ writeBackQuotedString(name, warning);
+ }
+
+ writeString(" disappeared from configuration, this change will be applied after restart of ClickHouse", warning);
+ LOG_WARNING(&Poco::Logger::get("DiskSelector"), warning.str());
+ }
+
+ return result;
+}
+
+
+DiskPtr DiskSelector::get(const String & name) const
+{
+ auto it = disks.find(name);
+ if (it == disks.end())
+ throw Exception("Unknown disk " + name, ErrorCodes::UNKNOWN_DISK);
+ return it->second;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h
index 5475221544..c662a9de15 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h
@@ -1,44 +1,44 @@
-#pragma once
-
-#include <Disks/DiskFactory.h>
-#include <Disks/IDisk.h>
-
-#include <Poco/Util/AbstractConfiguration.h>
-
-#include <map>
-
-namespace DB
-{
-
-class DiskSelector;
-using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;
-
-/// Parse .xml configuration and store information about disks
-/// Mostly used for introspection.
-class DiskSelector
-{
-public:
+#pragma once
+
+#include <Disks/DiskFactory.h>
+#include <Disks/IDisk.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+
+#include <map>
+
+namespace DB
+{
+
+class DiskSelector;
+using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;
+
+/// Parse .xml configuration and store information about disks
+/// Mostly used for introspection.
+class DiskSelector
+{
+public:
DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
- DiskSelector(const DiskSelector & from) : disks(from.disks) { }
-
+ DiskSelector(const DiskSelector & from) : disks(from.disks) { }
+
DiskSelectorPtr updateFromConfig(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context
) const;
-
- /// Get disk by name
- DiskPtr get(const String & name) const;
-
- /// Get all disks with names
- const DisksMap & getDisksMap() const { return disks; }
+
+ /// Get disk by name
+ DiskPtr get(const String & name) const;
+
+ /// Get all disks with names
+ const DisksMap & getDisksMap() const { return disks; }
void addToDiskMap(const String & name, DiskPtr disk)
- {
- disks.emplace(name, disk);
- }
-
-private:
- DisksMap disks;
-};
-
-}
+ {
+ disks.emplace(name, disk);
+ }
+
+private:
+ DisksMap disks;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp
index df0f921389..7113df561b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp
@@ -1,43 +1,43 @@
-#include "IDisk.h"
+#include "IDisk.h"
#include "Disks/Executor.h"
-#include <IO/ReadBufferFromFileBase.h>
-#include <IO/WriteBufferFromFileBase.h>
+#include <IO/ReadBufferFromFileBase.h>
+#include <IO/WriteBufferFromFileBase.h>
#include <IO/copyData.h>
-#include <Poco/Logger.h>
-#include <common/logger_useful.h>
+#include <Poco/Logger.h>
+#include <common/logger_useful.h>
#include <Common/setThreadName.h>
-
-namespace DB
-{
-
+
+namespace DB
+{
+
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
-bool IDisk::isDirectoryEmpty(const String & path)
-{
- return !iterateDirectory(path)->isValid();
-}
-
-void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, const String & to_path)
-{
+bool IDisk::isDirectoryEmpty(const String & path)
+{
+ return !iterateDirectory(path)->isValid();
+}
+
+void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, const String & to_path)
+{
LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.",
from_disk.getName(), from_disk.getPath(), from_path, to_disk.getName(), to_disk.getPath(), to_path);
-
- auto in = from_disk.readFile(from_path);
- auto out = to_disk.writeFile(to_path);
- copyData(*in, *out);
+
+ auto in = from_disk.readFile(from_path);
+ auto out = to_disk.writeFile(to_path);
+ copyData(*in, *out);
out->finalize();
-}
-
+}
+
using ResultsCollector = std::vector<std::future<void>>;
void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results)
-{
+{
if (from_disk.isFile(from_path))
- {
+ {
auto result = exec.execute(
[&from_disk, from_path, &to_disk, to_path]()
{
@@ -46,18 +46,18 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
});
results.push_back(std::move(result));
- }
- else
- {
+ }
+ else
+ {
fs::path dir_name = fs::path(from_path).parent_path().filename();
fs::path dest(fs::path(to_path) / dir_name);
to_disk.createDirectories(dest);
-
+
for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next())
asyncCopy(from_disk, it->path(), to_disk, dest, exec, results);
- }
-}
-
+ }
+}
+
void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
{
auto & exec = to_disk->getExecutor();
@@ -79,7 +79,7 @@ void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_dis
void IDisk::truncateFile(const String &, size_t)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate operation is not implemented for disk of type {}", getType());
-}
+}
SyncGuardPtr IDisk::getDirectorySyncGuard(const String & /* path */) const
{
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h
index db07987572..b62a0436bd 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h
@@ -1,25 +1,25 @@
-#pragma once
-
+#pragma once
+
#include <Interpreters/Context_fwd.h>
#include <Interpreters/Context.h>
-#include <Core/Defines.h>
+#include <Core/Defines.h>
#include <common/types.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/Exception.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/Exception.h>
#include <Disks/Executor.h>
#include <Disks/DiskType.h>
#include <IO/ReadSettings.h>
-
-#include <memory>
-#include <mutex>
-#include <utility>
-#include <boost/noncopyable.hpp>
-#include <Poco/Timestamp.h>
+
+#include <memory>
+#include <mutex>
+#include <utility>
+#include <boost/noncopyable.hpp>
+#include <Poco/Timestamp.h>
#include <filesystem>
-
+
namespace fs = std::filesystem;
-
+
namespace Poco
{
namespace Util
@@ -28,52 +28,52 @@ namespace Poco
}
}
-namespace CurrentMetrics
-{
+namespace CurrentMetrics
+{
extern const Metric DiskSpaceReservedForMerge;
-}
-
-namespace DB
-{
-
-class IDiskDirectoryIterator;
-using DiskDirectoryIteratorPtr = std::unique_ptr<IDiskDirectoryIterator>;
-
-class IReservation;
-using ReservationPtr = std::unique_ptr<IReservation>;
+}
+
+namespace DB
+{
+
+class IDiskDirectoryIterator;
+using DiskDirectoryIteratorPtr = std::unique_ptr<IDiskDirectoryIterator>;
+
+class IReservation;
+using ReservationPtr = std::unique_ptr<IReservation>;
using Reservations = std::vector<ReservationPtr>;
-
-class ReadBufferFromFileBase;
-class WriteBufferFromFileBase;
+
+class ReadBufferFromFileBase;
+class WriteBufferFromFileBase;
class MMappedFileCache;
-
-/**
- * Mode of opening a file for write.
- */
-enum class WriteMode
-{
- Rewrite,
- Append
-};
-
-/**
- * Provide interface for reservation.
- */
-class Space : public std::enable_shared_from_this<Space>
-{
-public:
- /// Return the name of the space object.
- virtual const String & getName() const = 0;
-
- /// Reserve the specified number of bytes.
- virtual ReservationPtr reserve(UInt64 bytes) = 0;
-
- virtual ~Space() = default;
-};
-
-using SpacePtr = std::shared_ptr<Space>;
-
-/**
+
+/**
+ * Mode of opening a file for write.
+ */
+enum class WriteMode
+{
+ Rewrite,
+ Append
+};
+
+/**
+ * Provide interface for reservation.
+ */
+class Space : public std::enable_shared_from_this<Space>
+{
+public:
+ /// Return the name of the space object.
+ virtual const String & getName() const = 0;
+
+ /// Reserve the specified number of bytes.
+ virtual ReservationPtr reserve(UInt64 bytes) = 0;
+
+ virtual ~Space() = default;
+};
+
+using SpacePtr = std::shared_ptr<Space>;
+
+/**
* A guard, that should synchronize file's or directory's state
* with storage device (e.g. fsync in POSIX) in its destructor.
*/
@@ -87,105 +87,105 @@ public:
using SyncGuardPtr = std::unique_ptr<ISyncGuard>;
/**
- * A unit of storage persisting data and metadata.
- * Abstract underlying storage technology.
- * Responsible for:
- * - file management;
- * - space accounting and reservation.
- */
-class IDisk : public Space
-{
-public:
+ * A unit of storage persisting data and metadata.
+ * Abstract underlying storage technology.
+ * Responsible for:
+ * - file management;
+ * - space accounting and reservation.
+ */
+class IDisk : public Space
+{
+public:
/// Default constructor.
explicit IDisk(std::unique_ptr<Executor> executor_ = std::make_unique<SyncExecutor>()) : executor(std::move(executor_)) { }
- /// Root path for all files stored on the disk.
- /// It's not required to be a local filesystem path.
- virtual const String & getPath() const = 0;
-
- /// Total available space on the disk.
- virtual UInt64 getTotalSpace() const = 0;
-
- /// Space currently available on the disk.
- virtual UInt64 getAvailableSpace() const = 0;
-
- /// Space available for reservation (available space minus reserved space).
- virtual UInt64 getUnreservedSpace() const = 0;
-
- /// Amount of bytes which should be kept free on the disk.
- virtual UInt64 getKeepingFreeSpace() const { return 0; }
-
- /// Return `true` if the specified file exists.
- virtual bool exists(const String & path) const = 0;
-
- /// Return `true` if the specified file exists and it's a regular file (not a directory or special file type).
- virtual bool isFile(const String & path) const = 0;
-
- /// Return `true` if the specified file exists and it's a directory.
- virtual bool isDirectory(const String & path) const = 0;
-
- /// Return size of the specified file.
- virtual size_t getFileSize(const String & path) const = 0;
-
- /// Create directory.
- virtual void createDirectory(const String & path) = 0;
-
- /// Create directory and all parent directories if necessary.
- virtual void createDirectories(const String & path) = 0;
-
- /// Remove all files from the directory. Directories are not removed.
- virtual void clearDirectory(const String & path) = 0;
-
- /// Move directory from `from_path` to `to_path`.
- virtual void moveDirectory(const String & from_path, const String & to_path) = 0;
-
- /// Return iterator to the contents of the specified directory.
- virtual DiskDirectoryIteratorPtr iterateDirectory(const String & path) = 0;
-
- /// Return `true` if the specified directory is empty.
- bool isDirectoryEmpty(const String & path);
-
- /// Create empty file at `path`.
- virtual void createFile(const String & path) = 0;
-
- /// Move the file from `from_path` to `to_path`.
- /// If a file with `to_path` path already exists, an exception will be thrown .
- virtual void moveFile(const String & from_path, const String & to_path) = 0;
-
- /// Move the file from `from_path` to `to_path`.
- /// If a file with `to_path` path already exists, it will be replaced.
- virtual void replaceFile(const String & from_path, const String & to_path) = 0;
-
- /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`.
- virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
-
- /// List files at `path` and add their names to `file_names`
- virtual void listFiles(const String & path, std::vector<String> & file_names) = 0;
-
- /// Open the file for read and return ReadBufferFromFileBase object.
- virtual std::unique_ptr<ReadBufferFromFileBase> readFile(
- const String & path,
+ /// Root path for all files stored on the disk.
+ /// It's not required to be a local filesystem path.
+ virtual const String & getPath() const = 0;
+
+ /// Total available space on the disk.
+ virtual UInt64 getTotalSpace() const = 0;
+
+ /// Space currently available on the disk.
+ virtual UInt64 getAvailableSpace() const = 0;
+
+ /// Space available for reservation (available space minus reserved space).
+ virtual UInt64 getUnreservedSpace() const = 0;
+
+ /// Amount of bytes which should be kept free on the disk.
+ virtual UInt64 getKeepingFreeSpace() const { return 0; }
+
+ /// Return `true` if the specified file exists.
+ virtual bool exists(const String & path) const = 0;
+
+ /// Return `true` if the specified file exists and it's a regular file (not a directory or special file type).
+ virtual bool isFile(const String & path) const = 0;
+
+ /// Return `true` if the specified file exists and it's a directory.
+ virtual bool isDirectory(const String & path) const = 0;
+
+ /// Return size of the specified file.
+ virtual size_t getFileSize(const String & path) const = 0;
+
+ /// Create directory.
+ virtual void createDirectory(const String & path) = 0;
+
+ /// Create directory and all parent directories if necessary.
+ virtual void createDirectories(const String & path) = 0;
+
+ /// Remove all files from the directory. Directories are not removed.
+ virtual void clearDirectory(const String & path) = 0;
+
+ /// Move directory from `from_path` to `to_path`.
+ virtual void moveDirectory(const String & from_path, const String & to_path) = 0;
+
+ /// Return iterator to the contents of the specified directory.
+ virtual DiskDirectoryIteratorPtr iterateDirectory(const String & path) = 0;
+
+ /// Return `true` if the specified directory is empty.
+ bool isDirectoryEmpty(const String & path);
+
+ /// Create empty file at `path`.
+ virtual void createFile(const String & path) = 0;
+
+ /// Move the file from `from_path` to `to_path`.
+ /// If a file with `to_path` path already exists, an exception will be thrown .
+ virtual void moveFile(const String & from_path, const String & to_path) = 0;
+
+ /// Move the file from `from_path` to `to_path`.
+ /// If a file with `to_path` path already exists, it will be replaced.
+ virtual void replaceFile(const String & from_path, const String & to_path) = 0;
+
+ /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`.
+ virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
+
+ /// List files at `path` and add their names to `file_names`
+ virtual void listFiles(const String & path, std::vector<String> & file_names) = 0;
+
+ /// Open the file for read and return ReadBufferFromFileBase object.
+ virtual std::unique_ptr<ReadBufferFromFileBase> readFile(
+ const String & path,
const ReadSettings & settings = ReadSettings{},
size_t estimated_size = 0) const = 0;
-
- /// Open the file for write and return WriteBufferFromFileBase object.
- virtual std::unique_ptr<WriteBufferFromFileBase> writeFile(
- const String & path,
- size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+
+ /// Open the file for write and return WriteBufferFromFileBase object.
+ virtual std::unique_ptr<WriteBufferFromFileBase> writeFile(
+ const String & path,
+ size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
WriteMode mode = WriteMode::Rewrite) = 0;
-
+
/// Remove file. Throws exception if file doesn't exists or it's a directory.
virtual void removeFile(const String & path) = 0;
-
+
/// Remove file if it exists.
virtual void removeFileIfExists(const String & path) = 0;
/// Remove directory. Throws exception if it's not a directory or if directory is not empty.
virtual void removeDirectory(const String & path) = 0;
- /// Remove file or directory with all children. Use with extra caution. Throws exception if file doesn't exists.
- virtual void removeRecursive(const String & path) = 0;
-
+ /// Remove file or directory with all children. Use with extra caution. Throws exception if file doesn't exists.
+ virtual void removeRecursive(const String & path) = 0;
+
/// Remove file. Throws exception if file doesn't exists or if directory is not empty.
/// Differs from removeFile for S3/HDFS disks
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3
@@ -201,22 +201,22 @@ public:
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3
virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); }
- /// Set last modified time to file or directory at `path`.
- virtual void setLastModified(const String & path, const Poco::Timestamp & timestamp) = 0;
-
- /// Get last modified time of file or directory at `path`.
- virtual Poco::Timestamp getLastModified(const String & path) = 0;
-
- /// Set file at `path` as read-only.
- virtual void setReadOnly(const String & path) = 0;
-
- /// Create hardlink from `src_path` to `dst_path`.
- virtual void createHardLink(const String & src_path, const String & dst_path) = 0;
-
+ /// Set last modified time to file or directory at `path`.
+ virtual void setLastModified(const String & path, const Poco::Timestamp & timestamp) = 0;
+
+ /// Get last modified time of file or directory at `path`.
+ virtual Poco::Timestamp getLastModified(const String & path) = 0;
+
+ /// Set file at `path` as read-only.
+ virtual void setReadOnly(const String & path) = 0;
+
+ /// Create hardlink from `src_path` to `dst_path`.
+ virtual void createHardLink(const String & src_path, const String & dst_path) = 0;
+
/// Truncate file to specified size.
virtual void truncateFile(const String & path, size_t size);
- /// Return disk type - "local", "s3", etc.
+ /// Return disk type - "local", "s3", etc.
virtual DiskType getType() const = 0;
/// Involves network interaction.
@@ -265,78 +265,78 @@ protected:
private:
std::unique_ptr<Executor> executor;
-};
-
-using DiskPtr = std::shared_ptr<IDisk>;
-using Disks = std::vector<DiskPtr>;
-
-/**
- * Iterator of directory contents on particular disk.
- */
-class IDiskDirectoryIterator
-{
-public:
- /// Iterate to the next file.
- virtual void next() = 0;
-
- /// Return `true` if the iterator points to a valid element.
- virtual bool isValid() const = 0;
-
- /// Path to the file that the iterator currently points to.
- virtual String path() const = 0;
-
- /// Name of the file that the iterator currently points to.
- virtual String name() const = 0;
-
- virtual ~IDiskDirectoryIterator() = default;
-};
-
-/**
- * Information about reserved size on particular disk.
- */
-class IReservation : boost::noncopyable
-{
-public:
- /// Get reservation size.
- virtual UInt64 getSize() const = 0;
-
- /// Get i-th disk where reservation take place.
- virtual DiskPtr getDisk(size_t i = 0) const = 0;
-
- /// Get all disks, used in reservation
- virtual Disks getDisks() const = 0;
-
- /// Changes amount of reserved space.
- virtual void update(UInt64 new_size) = 0;
-
- /// Unreserves reserved space.
- virtual ~IReservation() = default;
-};
-
-/// Return full path to a file on disk.
-inline String fullPath(const DiskPtr & disk, const String & path)
-{
+};
+
+using DiskPtr = std::shared_ptr<IDisk>;
+using Disks = std::vector<DiskPtr>;
+
+/**
+ * Iterator of directory contents on particular disk.
+ */
+class IDiskDirectoryIterator
+{
+public:
+ /// Iterate to the next file.
+ virtual void next() = 0;
+
+ /// Return `true` if the iterator points to a valid element.
+ virtual bool isValid() const = 0;
+
+ /// Path to the file that the iterator currently points to.
+ virtual String path() const = 0;
+
+ /// Name of the file that the iterator currently points to.
+ virtual String name() const = 0;
+
+ virtual ~IDiskDirectoryIterator() = default;
+};
+
+/**
+ * Information about reserved size on particular disk.
+ */
+class IReservation : boost::noncopyable
+{
+public:
+ /// Get reservation size.
+ virtual UInt64 getSize() const = 0;
+
+ /// Get i-th disk where reservation take place.
+ virtual DiskPtr getDisk(size_t i = 0) const = 0;
+
+ /// Get all disks, used in reservation
+ virtual Disks getDisks() const = 0;
+
+ /// Changes amount of reserved space.
+ virtual void update(UInt64 new_size) = 0;
+
+ /// Unreserves reserved space.
+ virtual ~IReservation() = default;
+};
+
+/// Return full path to a file on disk.
+inline String fullPath(const DiskPtr & disk, const String & path)
+{
return fs::path(disk->getPath()) / path;
-}
-
-/// Return parent path for the specified path.
-inline String parentPath(const String & path)
-{
+}
+
+/// Return parent path for the specified path.
+inline String parentPath(const String & path)
+{
if (path.ends_with('/'))
return fs::path(path).parent_path().parent_path() / "";
return fs::path(path).parent_path() / "";
-}
-
-/// Return file name for the specified path.
-inline String fileName(const String & path)
-{
+}
+
+/// Return file name for the specified path.
+inline String fileName(const String & path)
+{
return fs::path(path).filename();
-}
+}
/// Return directory path for the specified path.
inline String directoryPath(const String & path)
{
return fs::path(path).parent_path() / "";
-}
+}
}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp
index 586ba81b12..dbe276f467 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp
@@ -1,19 +1,19 @@
-#include "IVolume.h"
-
-#include <Common/quoteString.h>
-
-#include <memory>
-
-namespace DB
-{
-namespace ErrorCodes
-{
+#include "IVolume.h"
+
+#include <Common/quoteString.h>
+
+#include <memory>
+
+namespace DB
+{
+namespace ErrorCodes
+{
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int INCONSISTENT_RESERVATIONS;
extern const int NO_RESERVATIONS_PROVIDED;
extern const int UNKNOWN_VOLUME_TYPE;
-}
-
+}
+
String volumeTypeToString(VolumeType type)
{
switch (type)
@@ -30,37 +30,37 @@ String volumeTypeToString(VolumeType type)
throw Exception("Unknown volume type, please add it to DB::volumeTypeToString", ErrorCodes::UNKNOWN_VOLUME_TYPE);
}
-IVolume::IVolume(
+IVolume::IVolume(
String name_,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
DiskSelectorPtr disk_selector)
- : name(std::move(name_))
-{
- Poco::Util::AbstractConfiguration::Keys keys;
- config.keys(config_prefix, keys);
-
- for (const auto & disk : keys)
- {
+ : name(std::move(name_))
+{
+ Poco::Util::AbstractConfiguration::Keys keys;
+ config.keys(config_prefix, keys);
+
+ for (const auto & disk : keys)
+ {
if (disk.starts_with("disk"))
- {
- auto disk_name = config.getString(config_prefix + "." + disk);
- disks.push_back(disk_selector->get(disk_name));
- }
- }
-
- if (disks.empty())
+ {
+ auto disk_name = config.getString(config_prefix + "." + disk);
+ disks.push_back(disk_selector->get(disk_name));
+ }
+ }
+
+ if (disks.empty())
throw Exception("Volume must contain at least one disk", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
-}
-
-UInt64 IVolume::getMaxUnreservedFreeSpace() const
-{
- UInt64 res = 0;
- for (const auto & disk : disks)
- res = std::max(res, disk->getUnreservedSpace());
- return res;
-}
-
+}
+
+UInt64 IVolume::getMaxUnreservedFreeSpace() const
+{
+ UInt64 res = 0;
+ for (const auto & disk : disks)
+ res = std::max(res, disk->getUnreservedSpace());
+ return res;
+}
+
MultiDiskReservation::MultiDiskReservation(Reservations & reservations_, UInt64 size_)
: reservations(std::move(reservations_))
, size(size_)
@@ -77,7 +77,7 @@ MultiDiskReservation::MultiDiskReservation(Reservations & reservations_, UInt64
throw Exception("Reservations must have same size", ErrorCodes::INCONSISTENT_RESERVATIONS);
}
}
-}
+}
Disks MultiDiskReservation::getDisks() const
{
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h
index c040d9d58e..c02888ae19 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h
@@ -1,78 +1,78 @@
-#pragma once
-
-#include <Disks/IDisk.h>
-#include <Disks/DiskSelector.h>
-
-#include <Poco/Util/AbstractConfiguration.h>
-
-namespace DB
-{
-
-enum class VolumeType
-{
- JBOD,
+#pragma once
+
+#include <Disks/IDisk.h>
+#include <Disks/DiskSelector.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+
+namespace DB
+{
+
+enum class VolumeType
+{
+ JBOD,
RAID1,
- SINGLE_DISK,
- UNKNOWN
-};
-
+ SINGLE_DISK,
+ UNKNOWN
+};
+
String volumeTypeToString(VolumeType t);
-class IVolume;
-using VolumePtr = std::shared_ptr<IVolume>;
-using Volumes = std::vector<VolumePtr>;
-
-/**
- * Disks group by some (user) criteria. For example,
- * - VolumeJBOD("slow_disks", [d1, d2], 100)
- * - VolumeJBOD("fast_disks", [d3, d4], 200)
- *
- * Here VolumeJBOD is one of implementations of IVolume.
- *
- * Different of implementations of this interface implement different reserve behaviour —
- * VolumeJBOD reserves space on the next disk after the last used, other future implementations
- * will reserve, for example, equal spaces on all disks.
- */
-class IVolume : public Space
-{
-public:
+class IVolume;
+using VolumePtr = std::shared_ptr<IVolume>;
+using Volumes = std::vector<VolumePtr>;
+
+/**
+ * Disks group by some (user) criteria. For example,
+ * - VolumeJBOD("slow_disks", [d1, d2], 100)
+ * - VolumeJBOD("fast_disks", [d3, d4], 200)
+ *
+ * Here VolumeJBOD is one of implementations of IVolume.
+ *
+ * Different of implementations of this interface implement different reserve behaviour —
+ * VolumeJBOD reserves space on the next disk after the last used, other future implementations
+ * will reserve, for example, equal spaces on all disks.
+ */
+class IVolume : public Space
+{
+public:
IVolume(String name_, Disks disks_, size_t max_data_part_size_ = 0, bool perform_ttl_move_on_insert_ = true)
: disks(std::move(disks_))
, name(name_)
, max_data_part_size(max_data_part_size_)
, perform_ttl_move_on_insert(perform_ttl_move_on_insert_)
- {
- }
-
- IVolume(
- String name_,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- DiskSelectorPtr disk_selector
- );
-
- virtual ReservationPtr reserve(UInt64 bytes) override = 0;
-
- /// Volume name from config
- const String & getName() const override { return name; }
- virtual VolumeType getType() const = 0;
-
- /// Return biggest unreserved space across all disks
- UInt64 getMaxUnreservedFreeSpace() const;
-
+ {
+ }
+
+ IVolume(
+ String name_,
+ const Poco::Util::AbstractConfiguration & config,
+ const String & config_prefix,
+ DiskSelectorPtr disk_selector
+ );
+
+ virtual ReservationPtr reserve(UInt64 bytes) override = 0;
+
+ /// Volume name from config
+ const String & getName() const override { return name; }
+ virtual VolumeType getType() const = 0;
+
+ /// Return biggest unreserved space across all disks
+ UInt64 getMaxUnreservedFreeSpace() const;
+
DiskPtr getDisk() const { return getDisk(0); }
virtual DiskPtr getDisk(size_t i) const { return disks[i]; }
- const Disks & getDisks() const { return disks; }
-
+ const Disks & getDisks() const { return disks; }
+
/// Returns effective value of whether merges are allowed on this volume (true) or not (false).
virtual bool areMergesAvoided() const { return false; }
/// User setting for enabling and disabling merges on volume.
virtual void setAvoidMergesUserOverride(bool /*avoid*/) {}
-protected:
- Disks disks;
- const String name;
+protected:
+ Disks disks;
+ const String name;
public:
/// Max size of reservation, zero means unlimited size
@@ -80,8 +80,8 @@ public:
/// Should a new data part be synchronously moved to a volume according to ttl on insert
/// or move this part in background task asynchronously after insert.
bool perform_ttl_move_on_insert = true;
-};
-
+};
+
/// Reservation for multiple disks at once. Can be used in RAID1 implementation.
class MultiDiskReservation : public IReservation
{
@@ -100,4 +100,4 @@ private:
UInt64 size;
};
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h
index bade6041ea..3a7d9a9393 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h
@@ -1,27 +1,27 @@
-#pragma once
-
-#include <Disks/IVolume.h>
-
-namespace DB
-{
-
-class SingleDiskVolume : public IVolume
-{
-public:
+#pragma once
+
+#include <Disks/IVolume.h>
+
+namespace DB
+{
+
+class SingleDiskVolume : public IVolume
+{
+public:
SingleDiskVolume(const String & name_, DiskPtr disk, size_t max_data_part_size_ = 0): IVolume(name_, {disk}, max_data_part_size_)
- {
- }
-
- ReservationPtr reserve(UInt64 bytes) override
- {
- return disks[0]->reserve(bytes);
- }
-
- VolumeType getType() const override { return VolumeType::SINGLE_DISK; }
-
-};
-
-using VolumeSingleDiskPtr = std::shared_ptr<SingleDiskVolume>;
+ {
+ }
+
+ ReservationPtr reserve(UInt64 bytes) override
+ {
+ return disks[0]->reserve(bytes);
+ }
+
+ VolumeType getType() const override { return VolumeType::SINGLE_DISK; }
+
+};
+
+using VolumeSingleDiskPtr = std::shared_ptr<SingleDiskVolume>;
using VolumesSingleDiskPtr = std::vector<VolumeSingleDiskPtr>;
-
-}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp
index f9cafbe545..1c69f1a8eb 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Formats/NativeFormat.cpp
@@ -1,34 +1,34 @@
-#include <DataStreams/NativeBlockInputStream.h>
-#include <DataStreams/NativeBlockOutputStream.h>
-#include <Formats/FormatFactory.h>
-
-
-namespace DB
-{
-
-void registerInputFormatNative(FormatFactory & factory)
-{
- factory.registerInputFormat("Native", [](
- ReadBuffer & buf,
- const Block & sample,
- UInt64 /* max_block_size */,
- FormatFactory::ReadCallback /* callback */,
- const FormatSettings &)
- {
- return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
- });
-}
-
-void registerOutputFormatNative(FormatFactory & factory)
-{
- factory.registerOutputFormat("Native", [](
- WriteBuffer & buf,
- const Block & sample,
- FormatFactory::WriteCallback,
- const FormatSettings &)
- {
- return std::make_shared<NativeBlockOutputStream>(buf, 0, sample);
- });
-}
-
-}
+#include <DataStreams/NativeBlockInputStream.h>
+#include <DataStreams/NativeBlockOutputStream.h>
+#include <Formats/FormatFactory.h>
+
+
+namespace DB
+{
+
+void registerInputFormatNative(FormatFactory & factory)
+{
+ factory.registerInputFormat("Native", [](
+ ReadBuffer & buf,
+ const Block & sample,
+ UInt64 /* max_block_size */,
+ FormatFactory::ReadCallback /* callback */,
+ const FormatSettings &)
+ {
+ return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
+ });
+}
+
+void registerOutputFormatNative(FormatFactory & factory)
+{
+ factory.registerOutputFormat("Native", [](
+ WriteBuffer & buf,
+ const Block & sample,
+ FormatFactory::WriteCallback,
+ const FormatSettings &)
+ {
+ return std::make_shared<NativeBlockOutputStream>(buf, 0, sample);
+ });
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp
index cfc357a055..0e31886898 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Functions/toFixedString.cpp
@@ -1,13 +1,13 @@
-#include <Functions/FunctionFactory.h>
-#include <Functions/toFixedString.h>
-
-
-namespace DB
-{
-
-void registerFunctionFixedString(FunctionFactory & factory)
-{
- factory.registerFunction<FunctionToFixedString>();
-}
-
-}
+#include <Functions/FunctionFactory.h>
+#include <Functions/toFixedString.h>
+
+
+namespace DB
+{
+
+void registerFunctionFixedString(FunctionFactory & factory)
+{
+ factory.registerFunction<FunctionToFixedString>();
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp
index b3354c42fb..d8e195329a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.cpp
@@ -1,79 +1,79 @@
-#include <unistd.h>
-#include <fcntl.h>
-
-#include <Common/ProfileEvents.h>
-#include <Common/formatReadable.h>
-#include <IO/MMapReadBufferFromFile.h>
-
-
-namespace ProfileEvents
-{
- extern const Event FileOpen;
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int FILE_DOESNT_EXIST;
- extern const int CANNOT_OPEN_FILE;
- extern const int CANNOT_CLOSE_FILE;
-}
-
-
-void MMapReadBufferFromFile::open()
-{
- ProfileEvents::increment(ProfileEvents::FileOpen);
-
- fd = ::open(file_name.c_str(), O_RDONLY | O_CLOEXEC);
-
- if (-1 == fd)
- throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
- errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
-}
-
-
-std::string MMapReadBufferFromFile::getFileName() const
-{
- return file_name;
-}
-
-
-MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_)
- : file_name(file_name_)
-{
- open();
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <Common/ProfileEvents.h>
+#include <Common/formatReadable.h>
+#include <IO/MMapReadBufferFromFile.h>
+
+
+namespace ProfileEvents
+{
+ extern const Event FileOpen;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int FILE_DOESNT_EXIST;
+ extern const int CANNOT_OPEN_FILE;
+ extern const int CANNOT_CLOSE_FILE;
+}
+
+
+void MMapReadBufferFromFile::open()
+{
+ ProfileEvents::increment(ProfileEvents::FileOpen);
+
+ fd = ::open(file_name.c_str(), O_RDONLY | O_CLOEXEC);
+
+ if (-1 == fd)
+ throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
+ errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+}
+
+
+std::string MMapReadBufferFromFile::getFileName() const
+{
+ return file_name;
+}
+
+
+MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_)
+ : file_name(file_name_)
+{
+ open();
mapped.set(fd, offset, length_);
init();
-}
-
-
-MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset)
- : file_name(file_name_)
-{
- open();
+}
+
+
+MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset)
+ : file_name(file_name_)
+{
+ open();
mapped.set(fd, offset);
init();
-}
-
-
-MMapReadBufferFromFile::~MMapReadBufferFromFile()
-{
- if (fd != -1)
- close(); /// Exceptions will lead to std::terminate and that's Ok.
-}
-
-
-void MMapReadBufferFromFile::close()
-{
- finish();
-
- if (0 != ::close(fd))
- throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);
-
- fd = -1;
- metric_increment.destroy();
-}
-
-}
+}
+
+
+MMapReadBufferFromFile::~MMapReadBufferFromFile()
+{
+ if (fd != -1)
+ close(); /// Exceptions will lead to std::terminate and that's Ok.
+}
+
+
+void MMapReadBufferFromFile::close()
+{
+ finish();
+
+ if (0 != ::close(fd))
+ throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);
+
+ fd = -1;
+ metric_increment.destroy();
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h
index bc566a0489..c2ca6b726f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/MMapReadBufferFromFile.h
@@ -1,40 +1,40 @@
-#pragma once
-
-#include <Common/CurrentMetrics.h>
-#include <IO/MMapReadBufferFromFileDescriptor.h>
-
-
-namespace CurrentMetrics
-{
- extern const Metric OpenFileForRead;
-}
-
-
-namespace DB
-{
-
-class MMapReadBufferFromFile : public MMapReadBufferFromFileDescriptor
-{
-public:
- MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_);
-
- /// Map till end of file.
- MMapReadBufferFromFile(const std::string & file_name_, size_t offset);
-
- ~MMapReadBufferFromFile() override;
-
- void close();
-
- std::string getFileName() const override;
-
-private:
- int fd = -1;
- std::string file_name;
-
- CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
-
- void open();
-};
-
-}
-
+#pragma once
+
+#include <Common/CurrentMetrics.h>
+#include <IO/MMapReadBufferFromFileDescriptor.h>
+
+
+namespace CurrentMetrics
+{
+ extern const Metric OpenFileForRead;
+}
+
+
+namespace DB
+{
+
+class MMapReadBufferFromFile : public MMapReadBufferFromFileDescriptor
+{
+public:
+ MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_);
+
+ /// Map till end of file.
+ MMapReadBufferFromFile(const std::string & file_name_, size_t offset);
+
+ ~MMapReadBufferFromFile() override;
+
+ void close();
+
+ std::string getFileName() const override;
+
+private:
+ int fd = -1;
+ std::string file_name;
+
+ CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
+
+ void open();
+};
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp
index 50e0fad026..c44c8cad2c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.cpp
@@ -1,43 +1,43 @@
-#include <Poco/Net/NetException.h>
-
-#include <IO/ReadBufferFromPocoSocket.h>
-#include <Common/Exception.h>
-#include <Common/NetException.h>
-#include <Common/Stopwatch.h>
+#include <Poco/Net/NetException.h>
+
+#include <IO/ReadBufferFromPocoSocket.h>
+#include <Common/Exception.h>
+#include <Common/NetException.h>
+#include <Common/Stopwatch.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
-
-
-namespace ProfileEvents
-{
- extern const Event NetworkReceiveElapsedMicroseconds;
+
+
+namespace ProfileEvents
+{
+ extern const Event NetworkReceiveElapsedMicroseconds;
extern const Event NetworkReceiveBytes;
-}
-
+}
+
namespace CurrentMetrics
{
extern const Metric NetworkReceive;
}
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int NETWORK_ERROR;
- extern const int SOCKET_TIMEOUT;
- extern const int CANNOT_READ_FROM_SOCKET;
-}
-
-
-bool ReadBufferFromPocoSocket::nextImpl()
-{
- ssize_t bytes_read = 0;
- Stopwatch watch;
-
- /// Add more details to exceptions.
- try
- {
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int NETWORK_ERROR;
+ extern const int SOCKET_TIMEOUT;
+ extern const int CANNOT_READ_FROM_SOCKET;
+}
+
+
+bool ReadBufferFromPocoSocket::nextImpl()
+{
+ ssize_t bytes_read = 0;
+ Stopwatch watch;
+
+ /// Add more details to exceptions.
+ try
+ {
CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive);
/// If async_callback is specified, and read will block, run async_callback and try again later.
@@ -47,45 +47,45 @@ bool ReadBufferFromPocoSocket::nextImpl()
async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description);
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size());
- }
- catch (const Poco::Net::NetException & e)
- {
- throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
- }
- catch (const Poco::TimeoutException &)
- {
- throw NetException("Timeout exceeded while reading from socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT);
- }
- catch (const Poco::IOException & e)
- {
- throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
- }
-
- if (bytes_read < 0)
- throw NetException("Cannot read from socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET);
-
- /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one
- ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
+ }
+ catch (const Poco::Net::NetException & e)
+ {
+ throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
+ }
+ catch (const Poco::TimeoutException &)
+ {
+ throw NetException("Timeout exceeded while reading from socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT);
+ }
+ catch (const Poco::IOException & e)
+ {
+ throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
+ }
+
+ if (bytes_read < 0)
+ throw NetException("Cannot read from socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+
+ /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one
+ ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read);
-
- if (bytes_read)
- working_buffer.resize(bytes_read);
- else
- return false;
-
- return true;
-}
-
-ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size)
+
+ if (bytes_read)
+ working_buffer.resize(bytes_read);
+ else
+ return false;
+
+ return true;
+}
+
+ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size)
: BufferWithOwnMemory<ReadBuffer>(buf_size)
, socket(socket_)
, peer_address(socket.peerAddress())
, socket_description("socket (" + peer_address.toString() + ")")
-{
-}
-
+{
+}
+
bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const
-{
+{
if (available())
return true;
@@ -93,6 +93,6 @@ bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const
bool res = socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
return res;
-}
-
-}
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h
index c60aafc7e2..d55a009db3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ReadBufferFromPocoSocket.h
@@ -1,32 +1,32 @@
-#pragma once
-
+#pragma once
+
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
-#include <Poco/Net/Socket.h>
-
-namespace DB
-{
-
+#include <Poco/Net/Socket.h>
+
+namespace DB
+{
+
using AsyncCallback = std::function<void(int, Poco::Timespan, const std::string &)>;
/// Works with the ready Poco::Net::Socket. Blocking operations.
-class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer>
-{
-protected:
- Poco::Net::Socket & socket;
-
- /** For error messages. It is necessary to receive this address in advance, because,
- * for example, if the connection is broken, the address will not be received anymore
- * (getpeername will return an error).
- */
- Poco::Net::SocketAddress peer_address;
-
- bool nextImpl() override;
-
-public:
+class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer>
+{
+protected:
+ Poco::Net::Socket & socket;
+
+ /** For error messages. It is necessary to receive this address in advance, because,
+ * for example, if the connection is broken, the address will not be received anymore
+ * (getpeername will return an error).
+ */
+ Poco::Net::SocketAddress peer_address;
+
+ bool nextImpl() override;
+
+public:
explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
-
+
bool poll(size_t timeout_microseconds) const;
void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }
@@ -34,6 +34,6 @@ public:
private:
AsyncCallback async_callback;
std::string socket_description;
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp
index a82e526bf3..5eb714c21c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.cpp
@@ -1,45 +1,45 @@
#include <IO/TimeoutSetter.h>
+
+#include <common/logger_useful.h>
+
-#include <common/logger_useful.h>
+namespace DB
+{
-
-namespace DB
-{
-
-TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_,
+TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
- bool limit_max_timeout)
- : socket(socket_), send_timeout(send_timeout_), receive_timeout(receive_timeout_)
-{
- old_send_timeout = socket.getSendTimeout();
- old_receive_timeout = socket.getReceiveTimeout();
-
- if (!limit_max_timeout || old_send_timeout > send_timeout)
- socket.setSendTimeout(send_timeout);
-
- if (!limit_max_timeout || old_receive_timeout > receive_timeout)
- socket.setReceiveTimeout(receive_timeout);
-}
-
+ bool limit_max_timeout)
+ : socket(socket_), send_timeout(send_timeout_), receive_timeout(receive_timeout_)
+{
+ old_send_timeout = socket.getSendTimeout();
+ old_receive_timeout = socket.getReceiveTimeout();
+
+ if (!limit_max_timeout || old_send_timeout > send_timeout)
+ socket.setSendTimeout(send_timeout);
+
+ if (!limit_max_timeout || old_receive_timeout > receive_timeout)
+ socket.setReceiveTimeout(receive_timeout);
+}
+
TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan timeout_, bool limit_max_timeout)
- : TimeoutSetter(socket_, timeout_, timeout_, limit_max_timeout)
-{
-}
-
-TimeoutSetter::~TimeoutSetter()
-{
- try
- {
- socket.setSendTimeout(old_send_timeout);
- socket.setReceiveTimeout(old_receive_timeout);
- }
+ : TimeoutSetter(socket_, timeout_, timeout_, limit_max_timeout)
+{
+}
+
+TimeoutSetter::~TimeoutSetter()
+{
+ try
+ {
+ socket.setSendTimeout(old_send_timeout);
+ socket.setReceiveTimeout(old_receive_timeout);
+ }
catch (...)
- {
+ {
/// Sometimes caught on Mac OS X. This message can be safely ignored.
/// If you are developer using Mac, please debug this error message by yourself.
tryLogCurrentException("Client", "TimeoutSetter: Can't reset timeouts");
- }
-}
+ }
+}
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h
index 31c37ea07a..3fef4b1c12 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/TimeoutSetter.h
@@ -1,30 +1,30 @@
-#pragma once
-
-#include <Poco/Net/StreamSocket.h>
-#include <Poco/Timespan.h>
-
-
-namespace DB
-{
-/// Temporarily overrides socket send/receive timeouts and reset them back into destructor
-/// If "limit_max_timeout" is true, timeouts could be only decreased (maxed by previous value).
-struct TimeoutSetter
-{
- TimeoutSetter(Poco::Net::StreamSocket & socket_,
+#pragma once
+
+#include <Poco/Net/StreamSocket.h>
+#include <Poco/Timespan.h>
+
+
+namespace DB
+{
+/// Temporarily overrides socket send/receive timeouts and reset them back into destructor
+/// If "limit_max_timeout" is true, timeouts could be only decreased (maxed by previous value).
+struct TimeoutSetter
+{
+ TimeoutSetter(Poco::Net::StreamSocket & socket_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
- bool limit_max_timeout = false);
-
+ bool limit_max_timeout = false);
+
TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan timeout_, bool limit_max_timeout = false);
-
- ~TimeoutSetter();
-
- Poco::Net::StreamSocket & socket;
-
- Poco::Timespan send_timeout;
- Poco::Timespan receive_timeout;
-
- Poco::Timespan old_send_timeout;
- Poco::Timespan old_receive_timeout;
-};
-}
+
+ ~TimeoutSetter();
+
+ Poco::Net::StreamSocket & socket;
+
+ Poco::Timespan send_timeout;
+ Poco::Timespan receive_timeout;
+
+ Poco::Timespan old_send_timeout;
+ Poco::Timespan old_receive_timeout;
+};
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp
index 67cd7ba27d..4f92572bdf 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.cpp
@@ -1,105 +1,105 @@
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include <Common/ProfileEvents.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <Common/ProfileEvents.h>
#include <Common/MemoryTracker.h>
-
-#include <IO/WriteBufferFromFile.h>
-#include <IO/WriteHelpers.h>
-
-
-namespace ProfileEvents
-{
- extern const Event FileOpen;
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int FILE_DOESNT_EXIST;
- extern const int CANNOT_OPEN_FILE;
- extern const int CANNOT_CLOSE_FILE;
-}
-
-
-WriteBufferFromFile::WriteBufferFromFile(
- const std::string & file_name_,
- size_t buf_size,
- int flags,
- mode_t mode,
- char * existing_memory,
- size_t alignment)
- : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_)
-{
- ProfileEvents::increment(ProfileEvents::FileOpen);
-
-#ifdef __APPLE__
- bool o_direct = (flags != -1) && (flags & O_DIRECT);
- if (o_direct)
- flags = flags & ~O_DIRECT;
-#endif
-
- fd = ::open(file_name.c_str(), flags == -1 ? O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC : flags | O_CLOEXEC, mode);
-
- if (-1 == fd)
- throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
- errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
-
-#ifdef __APPLE__
- if (o_direct)
- {
- if (fcntl(fd, F_NOCACHE, 1) == -1)
- throwFromErrnoWithPath("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE);
- }
-#endif
-}
-
-
-/// Use pre-opened file descriptor.
-WriteBufferFromFile::WriteBufferFromFile(
+
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteHelpers.h>
+
+
+namespace ProfileEvents
+{
+ extern const Event FileOpen;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int FILE_DOESNT_EXIST;
+ extern const int CANNOT_OPEN_FILE;
+ extern const int CANNOT_CLOSE_FILE;
+}
+
+
+WriteBufferFromFile::WriteBufferFromFile(
+ const std::string & file_name_,
+ size_t buf_size,
+ int flags,
+ mode_t mode,
+ char * existing_memory,
+ size_t alignment)
+ : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_)
+{
+ ProfileEvents::increment(ProfileEvents::FileOpen);
+
+#ifdef __APPLE__
+ bool o_direct = (flags != -1) && (flags & O_DIRECT);
+ if (o_direct)
+ flags = flags & ~O_DIRECT;
+#endif
+
+ fd = ::open(file_name.c_str(), flags == -1 ? O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC : flags | O_CLOEXEC, mode);
+
+ if (-1 == fd)
+ throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
+ errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+
+#ifdef __APPLE__
+ if (o_direct)
+ {
+ if (fcntl(fd, F_NOCACHE, 1) == -1)
+ throwFromErrnoWithPath("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE);
+ }
+#endif
+}
+
+
+/// Use pre-opened file descriptor.
+WriteBufferFromFile::WriteBufferFromFile(
int & fd_,
- const std::string & original_file_name,
- size_t buf_size,
- char * existing_memory,
- size_t alignment)
- :
- WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
- file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
-{
+ const std::string & original_file_name,
+ size_t buf_size,
+ char * existing_memory,
+ size_t alignment)
+ :
+ WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
+ file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
+{
fd_ = -1;
-}
-
-
-WriteBufferFromFile::~WriteBufferFromFile()
-{
- if (fd < 0)
- return;
-
+}
+
+
+WriteBufferFromFile::~WriteBufferFromFile()
+{
+ if (fd < 0)
+ return;
+
/// FIXME move final flush into the caller
MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
-
+
next();
- ::close(fd);
-}
-
-
-/// Close file before destruction of object.
-void WriteBufferFromFile::close()
-{
+ ::close(fd);
+}
+
+
+/// Close file before destruction of object.
+void WriteBufferFromFile::close()
+{
if (fd < 0)
return;
- next();
-
- if (0 != ::close(fd))
- throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);
-
- fd = -1;
- metric_increment.destroy();
-}
-
-}
+ next();
+
+ if (0 != ::close(fd))
+ throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);
+
+ fd = -1;
+ metric_increment.destroy();
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h
index b7d5863811..d28bc441c1 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFile.h
@@ -1,59 +1,59 @@
-#pragma once
-
-#include <sys/types.h>
-
-#include <Common/CurrentMetrics.h>
-#include <IO/WriteBufferFromFileDescriptor.h>
-
-
-namespace CurrentMetrics
-{
- extern const Metric OpenFileForWrite;
-}
-
-
-#ifndef O_DIRECT
-#define O_DIRECT 00040000
-#endif
-
-namespace DB
-{
-
-/** Accepts path to file and opens it, or pre-opened file descriptor.
- * Closes file by himself (thus "owns" a file descriptor).
- */
-class WriteBufferFromFile : public WriteBufferFromFileDescriptor
-{
-protected:
- std::string file_name;
- CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite};
-
-public:
- WriteBufferFromFile(
- const std::string & file_name_,
- size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
- int flags = -1,
- mode_t mode = 0666,
- char * existing_memory = nullptr,
- size_t alignment = 0);
-
- /// Use pre-opened file descriptor.
- WriteBufferFromFile(
+#pragma once
+
+#include <sys/types.h>
+
+#include <Common/CurrentMetrics.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+
+
+namespace CurrentMetrics
+{
+ extern const Metric OpenFileForWrite;
+}
+
+
+#ifndef O_DIRECT
+#define O_DIRECT 00040000
+#endif
+
+namespace DB
+{
+
+/** Accepts path to file and opens it, or pre-opened file descriptor.
+ * Closes file by himself (thus "owns" a file descriptor).
+ */
+class WriteBufferFromFile : public WriteBufferFromFileDescriptor
+{
+protected:
+ std::string file_name;
+ CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite};
+
+public:
+ WriteBufferFromFile(
+ const std::string & file_name_,
+ size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+ int flags = -1,
+ mode_t mode = 0666,
+ char * existing_memory = nullptr,
+ size_t alignment = 0);
+
+ /// Use pre-opened file descriptor.
+ WriteBufferFromFile(
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
- const std::string & original_file_name = {},
- size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
- char * existing_memory = nullptr,
- size_t alignment = 0);
-
- ~WriteBufferFromFile() override;
-
- /// Close file before destruction of object.
- void close();
-
- std::string getFileName() const override
- {
- return file_name;
- }
-};
-
-}
+ const std::string & original_file_name = {},
+ size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+ char * existing_memory = nullptr,
+ size_t alignment = 0);
+
+ ~WriteBufferFromFile() override;
+
+ /// Close file before destruction of object.
+ void close();
+
+ std::string getFileName() const override
+ {
+ return file_name;
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp
index 2b9cbb88cd..4c5e620c0e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.cpp
@@ -1,11 +1,11 @@
-#include <IO/WriteBufferFromFileBase.h>
-
-namespace DB
-{
-
-WriteBufferFromFileBase::WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment)
- : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
-{
-}
-
-}
+#include <IO/WriteBufferFromFileBase.h>
+
+namespace DB
+{
+
+WriteBufferFromFileBase::WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment)
+ : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
+{
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h
index d35b69a7df..fc43dbcd2b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileBase.h
@@ -1,22 +1,22 @@
-#pragma once
-
-#include <string>
-#include <fcntl.h>
-
-#include <IO/WriteBuffer.h>
-#include <IO/BufferWithOwnMemory.h>
-
-namespace DB
-{
-
-class WriteBufferFromFileBase : public BufferWithOwnMemory<WriteBuffer>
-{
-public:
- WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment);
- ~WriteBufferFromFileBase() override = default;
-
- void sync() override = 0;
- virtual std::string getFileName() const = 0;
-};
-
-}
+#pragma once
+
+#include <string>
+#include <fcntl.h>
+
+#include <IO/WriteBuffer.h>
+#include <IO/BufferWithOwnMemory.h>
+
+namespace DB
+{
+
+class WriteBufferFromFileBase : public BufferWithOwnMemory<WriteBuffer>
+{
+public:
+ WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment);
+ ~WriteBufferFromFileBase() override = default;
+
+ void sync() override = 0;
+ virtual std::string getFileName() const = 0;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp
index cd265653bb..52fab90351 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.cpp
@@ -1,137 +1,137 @@
-#include <unistd.h>
-#include <errno.h>
-#include <cassert>
+#include <unistd.h>
+#include <errno.h>
+#include <cassert>
#include <sys/types.h>
#include <sys/stat.h>
-
-#include <Common/Exception.h>
-#include <Common/ProfileEvents.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/Stopwatch.h>
+
+#include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/Stopwatch.h>
#include <Common/MemoryTracker.h>
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteHelpers.h>
-
-
-namespace ProfileEvents
-{
- extern const Event WriteBufferFromFileDescriptorWrite;
- extern const Event WriteBufferFromFileDescriptorWriteFailed;
- extern const Event WriteBufferFromFileDescriptorWriteBytes;
- extern const Event DiskWriteElapsedMicroseconds;
-}
-
-namespace CurrentMetrics
-{
- extern const Metric Write;
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR;
- extern const int CANNOT_FSYNC;
- extern const int CANNOT_SEEK_THROUGH_FILE;
- extern const int CANNOT_TRUNCATE_FILE;
+
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/WriteHelpers.h>
+
+
+namespace ProfileEvents
+{
+ extern const Event WriteBufferFromFileDescriptorWrite;
+ extern const Event WriteBufferFromFileDescriptorWriteFailed;
+ extern const Event WriteBufferFromFileDescriptorWriteBytes;
+ extern const Event DiskWriteElapsedMicroseconds;
+}
+
+namespace CurrentMetrics
+{
+ extern const Metric Write;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR;
+ extern const int CANNOT_FSYNC;
+ extern const int CANNOT_SEEK_THROUGH_FILE;
+ extern const int CANNOT_TRUNCATE_FILE;
extern const int CANNOT_FSTAT;
-}
-
-
-void WriteBufferFromFileDescriptor::nextImpl()
-{
- if (!offset())
- return;
-
- Stopwatch watch;
-
- size_t bytes_written = 0;
- while (bytes_written != offset())
- {
- ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWrite);
-
- ssize_t res = 0;
- {
- CurrentMetrics::Increment metric_increment{CurrentMetrics::Write};
- res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written);
- }
-
- if ((-1 == res || 0 == res) && errno != EINTR)
- {
- ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed);
- throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(),
- ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR);
- }
-
- if (res > 0)
- bytes_written += res;
- }
-
- ProfileEvents::increment(ProfileEvents::DiskWriteElapsedMicroseconds, watch.elapsedMicroseconds());
- ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteBytes, bytes_written);
-}
-
-
-/// Name or some description of file.
-std::string WriteBufferFromFileDescriptor::getFileName() const
-{
- return "(fd = " + toString(fd) + ")";
-}
-
-
-WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
- int fd_,
- size_t buf_size,
- char * existing_memory,
- size_t alignment)
- : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {}
-
-
-WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor()
-{
+}
+
+
+void WriteBufferFromFileDescriptor::nextImpl()
+{
+ if (!offset())
+ return;
+
+ Stopwatch watch;
+
+ size_t bytes_written = 0;
+ while (bytes_written != offset())
+ {
+ ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWrite);
+
+ ssize_t res = 0;
+ {
+ CurrentMetrics::Increment metric_increment{CurrentMetrics::Write};
+ res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written);
+ }
+
+ if ((-1 == res || 0 == res) && errno != EINTR)
+ {
+ ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed);
+ throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(),
+ ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR);
+ }
+
+ if (res > 0)
+ bytes_written += res;
+ }
+
+ ProfileEvents::increment(ProfileEvents::DiskWriteElapsedMicroseconds, watch.elapsedMicroseconds());
+ ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteBytes, bytes_written);
+}
+
+
+/// Name or some description of file.
+std::string WriteBufferFromFileDescriptor::getFileName() const
+{
+ return "(fd = " + toString(fd) + ")";
+}
+
+
+WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
+ int fd_,
+ size_t buf_size,
+ char * existing_memory,
+ size_t alignment)
+ : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {}
+
+
+WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor()
+{
if (fd < 0)
- {
+ {
assert(!offset() && "attempt to write after close");
return;
- }
+ }
/// FIXME move final flush into the caller
MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
next();
-}
-
-
-void WriteBufferFromFileDescriptor::sync()
-{
- /// If buffer has pending data - write it.
- next();
-
- /// Request OS to sync data with storage medium.
- int res = fsync(fd);
- if (-1 == res)
- throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC);
-}
-
-
-off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence)
-{
- off_t res = lseek(fd, offset, whence);
- if (-1 == res)
- throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(),
- ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
- return res;
-}
-
-
-void WriteBufferFromFileDescriptor::truncate(off_t length)
-{
- int res = ftruncate(fd, length);
- if (-1 == res)
- throwFromErrnoWithPath("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE);
-}
-
+}
+
+
+void WriteBufferFromFileDescriptor::sync()
+{
+ /// If buffer has pending data - write it.
+ next();
+
+ /// Request OS to sync data with storage medium.
+ int res = fsync(fd);
+ if (-1 == res)
+ throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC);
+}
+
+
+off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence)
+{
+ off_t res = lseek(fd, offset, whence);
+ if (-1 == res)
+ throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(),
+ ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+ return res;
+}
+
+
+void WriteBufferFromFileDescriptor::truncate(off_t length)
+{
+ int res = ftruncate(fd, length);
+ if (-1 == res)
+ throwFromErrnoWithPath("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE);
+}
+
off_t WriteBufferFromFileDescriptor::size()
{
@@ -140,6 +140,6 @@ off_t WriteBufferFromFileDescriptor::size()
if (-1 == res)
throwFromErrnoWithPath("Cannot execute fstat " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSTAT);
return buf.st_size;
-}
+}
}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h
index 18c0ac64f6..ff64661faa 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptor.h
@@ -1,51 +1,51 @@
-#pragma once
-
-#include <IO/WriteBufferFromFileBase.h>
-
-
-namespace DB
-{
-
-/** Use ready file descriptor. Does not open or close a file.
- */
-class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase
-{
-protected:
- int fd;
-
- void nextImpl() override;
-
- /// Name or some description of file.
- std::string getFileName() const override;
-
-public:
- WriteBufferFromFileDescriptor(
- int fd_ = -1,
- size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
- char * existing_memory = nullptr,
- size_t alignment = 0);
-
- /** Could be used before initialization if needed 'fd' was not passed to constructor.
- * It's not possible to change 'fd' during work.
- */
- void setFD(int fd_)
- {
- fd = fd_;
- }
-
- ~WriteBufferFromFileDescriptor() override;
-
- int getFD() const
- {
- return fd;
- }
-
- void sync() override;
-
- off_t seek(off_t offset, int whence);
- void truncate(off_t length);
+#pragma once
+
+#include <IO/WriteBufferFromFileBase.h>
+
+
+namespace DB
+{
+
+/** Use ready file descriptor. Does not open or close a file.
+ */
+class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase
+{
+protected:
+ int fd;
+
+ void nextImpl() override;
+
+ /// Name or some description of file.
+ std::string getFileName() const override;
+
+public:
+ WriteBufferFromFileDescriptor(
+ int fd_ = -1,
+ size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+ char * existing_memory = nullptr,
+ size_t alignment = 0);
+
+ /** Could be used before initialization if needed 'fd' was not passed to constructor.
+ * It's not possible to change 'fd' during work.
+ */
+ void setFD(int fd_)
+ {
+ fd = fd_;
+ }
+
+ ~WriteBufferFromFileDescriptor() override;
+
+ int getFD() const
+ {
+ return fd;
+ }
+
+ void sync() override;
+
+ off_t seek(off_t offset, int whence);
+ void truncate(off_t length);
off_t size();
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp
index 3d9c70f039..49aed03c03 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.cpp
@@ -1,29 +1,29 @@
-#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
-
-namespace ProfileEvents
-{
- extern const Event CannotWriteToWriteBufferDiscard;
-}
-
-namespace DB
-{
-
-void WriteBufferFromFileDescriptorDiscardOnFailure::nextImpl()
-{
- size_t bytes_written = 0;
- while (bytes_written != offset())
- {
- ssize_t res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written);
-
- if ((-1 == res || 0 == res) && errno != EINTR)
- {
- ProfileEvents::increment(ProfileEvents::CannotWriteToWriteBufferDiscard);
- break; /// Discard
- }
-
- if (res > 0)
- bytes_written += res;
- }
-}
-
-}
+#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
+
+namespace ProfileEvents
+{
+ extern const Event CannotWriteToWriteBufferDiscard;
+}
+
+namespace DB
+{
+
+void WriteBufferFromFileDescriptorDiscardOnFailure::nextImpl()
+{
+ size_t bytes_written = 0;
+ while (bytes_written != offset())
+ {
+ ssize_t res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written);
+
+ if ((-1 == res || 0 == res) && errno != EINTR)
+ {
+ ProfileEvents::increment(ProfileEvents::CannotWriteToWriteBufferDiscard);
+ break; /// Discard
+ }
+
+ if (res > 0)
+ bytes_written += res;
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h
index 53e01c3cb2..9c621095c2 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h
@@ -1,23 +1,23 @@
-#pragma once
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-
-
-namespace DB
-{
-
-/** Write to file descriptor but drop the data if write would block or fail.
- * To use within signal handler. Motivating example: a signal handler invoked during execution of malloc
- * should not block because some mutex (or even worse - a spinlock) may be held.
- */
-class WriteBufferFromFileDescriptorDiscardOnFailure : public WriteBufferFromFileDescriptor
-{
-protected:
- void nextImpl() override;
-
-public:
- using WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor;
- ~WriteBufferFromFileDescriptorDiscardOnFailure() override {}
-};
-
-}
+#pragma once
+
+#include <IO/WriteBufferFromFileDescriptor.h>
+
+
+namespace DB
+{
+
+/** Write to file descriptor but drop the data if write would block or fail.
+ * To use within signal handler. Motivating example: a signal handler invoked during execution of malloc
+ * should not block because some mutex (or even worse - a spinlock) may be held.
+ */
+class WriteBufferFromFileDescriptorDiscardOnFailure : public WriteBufferFromFileDescriptor
+{
+protected:
+ void nextImpl() override;
+
+public:
+ using WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor;
+ ~WriteBufferFromFileDescriptorDiscardOnFailure() override {}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp
index a0e4de4c83..3db8d785c6 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.cpp
@@ -1,89 +1,89 @@
-#include <Poco/Net/NetException.h>
-
-#include <IO/WriteBufferFromPocoSocket.h>
-
-#include <Common/Exception.h>
-#include <Common/NetException.h>
-#include <Common/Stopwatch.h>
+#include <Poco/Net/NetException.h>
+
+#include <IO/WriteBufferFromPocoSocket.h>
+
+#include <Common/Exception.h>
+#include <Common/NetException.h>
+#include <Common/Stopwatch.h>
#include <Common/MemoryTracker.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
-
-
-namespace ProfileEvents
-{
- extern const Event NetworkSendElapsedMicroseconds;
+
+
+namespace ProfileEvents
+{
+ extern const Event NetworkSendElapsedMicroseconds;
extern const Event NetworkSendBytes;
-}
-
+}
+
namespace CurrentMetrics
{
extern const Metric NetworkSend;
}
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int NETWORK_ERROR;
- extern const int SOCKET_TIMEOUT;
- extern const int CANNOT_WRITE_TO_SOCKET;
-}
-
-
-void WriteBufferFromPocoSocket::nextImpl()
-{
- if (!offset())
- return;
-
- Stopwatch watch;
-
- size_t bytes_written = 0;
- while (bytes_written < offset())
- {
- ssize_t res = 0;
-
- /// Add more details to exceptions.
- try
- {
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NETWORK_ERROR;
+ extern const int SOCKET_TIMEOUT;
+ extern const int CANNOT_WRITE_TO_SOCKET;
+}
+
+
+void WriteBufferFromPocoSocket::nextImpl()
+{
+ if (!offset())
+ return;
+
+ Stopwatch watch;
+
+ size_t bytes_written = 0;
+ while (bytes_written < offset())
+ {
+ ssize_t res = 0;
+
+ /// Add more details to exceptions.
+ try
+ {
CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkSend);
- res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written);
- }
- catch (const Poco::Net::NetException & e)
- {
- throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
- }
- catch (const Poco::TimeoutException &)
- {
- throw NetException("Timeout exceeded while writing to socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT);
- }
- catch (const Poco::IOException & e)
- {
- throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
- }
-
- if (res < 0)
- throw NetException("Cannot write to socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_WRITE_TO_SOCKET);
-
- bytes_written += res;
- }
-
- ProfileEvents::increment(ProfileEvents::NetworkSendElapsedMicroseconds, watch.elapsedMicroseconds());
+ res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written);
+ }
+ catch (const Poco::Net::NetException & e)
+ {
+ throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
+ }
+ catch (const Poco::TimeoutException &)
+ {
+ throw NetException("Timeout exceeded while writing to socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT);
+ }
+ catch (const Poco::IOException & e)
+ {
+ throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
+ }
+
+ if (res < 0)
+ throw NetException("Cannot write to socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_WRITE_TO_SOCKET);
+
+ bytes_written += res;
+ }
+
+ ProfileEvents::increment(ProfileEvents::NetworkSendElapsedMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::NetworkSendBytes, bytes_written);
-}
-
-WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size)
- : BufferWithOwnMemory<WriteBuffer>(buf_size), socket(socket_), peer_address(socket.peerAddress())
-{
-}
-
-WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket()
-{
+}
+
+WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size)
+ : BufferWithOwnMemory<WriteBuffer>(buf_size), socket(socket_), peer_address(socket.peerAddress())
+{
+}
+
+WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket()
+{
/// FIXME move final flush into the caller
MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
next();
-}
-
-}
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h
index 6f5142086b..bb0a8bd980 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/WriteBufferFromPocoSocket.h
@@ -1,34 +1,34 @@
-#pragma once
-
-#include <Poco/Net/Socket.h>
-
-#include <IO/WriteBuffer.h>
-#include <IO/BufferWithOwnMemory.h>
-
-
-namespace DB
-{
-
-/** Works with the ready Poco::Net::Socket. Blocking operations.
- */
-class WriteBufferFromPocoSocket : public BufferWithOwnMemory<WriteBuffer>
-{
-protected:
- Poco::Net::Socket & socket;
-
- /** For error messages. It is necessary to receive this address in advance, because,
- * for example, if the connection is broken, the address will not be received anymore
- * (getpeername will return an error).
- */
- Poco::Net::SocketAddress peer_address;
-
-
- void nextImpl() override;
-
-public:
- WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
-
- ~WriteBufferFromPocoSocket() override;
-};
-
-}
+#pragma once
+
+#include <Poco/Net/Socket.h>
+
+#include <IO/WriteBuffer.h>
+#include <IO/BufferWithOwnMemory.h>
+
+
+namespace DB
+{
+
+/** Works with the ready Poco::Net::Socket. Blocking operations.
+ */
+class WriteBufferFromPocoSocket : public BufferWithOwnMemory<WriteBuffer>
+{
+protected:
+ Poco::Net::Socket & socket;
+
+ /** For error messages. It is necessary to receive this address in advance, because,
+ * for example, if the connection is broken, the address will not be received anymore
+ * (getpeername will return an error).
+ */
+ Poco::Net::SocketAddress peer_address;
+
+
+ void nextImpl() override;
+
+public:
+ WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+
+ ~WriteBufferFromPocoSocket() override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h
index dc94c2981b..512d168ffc 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h
@@ -1,69 +1,69 @@
-#pragma once
-
-#include <array>
-
-#include <Common/SipHash.h>
-#include <Common/Arena.h>
-#include <Common/HashTable/Hash.h>
-#include <Common/memcpySmall.h>
-#include <Common/assert_cast.h>
-#include <Core/Defines.h>
-#include <common/StringRef.h>
-#include <Columns/IColumn.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnLowCardinality.h>
-
+#pragma once
+
+#include <array>
+
+#include <Common/SipHash.h>
+#include <Common/Arena.h>
+#include <Common/HashTable/Hash.h>
+#include <Common/memcpySmall.h>
+#include <Common/assert_cast.h>
+#include <Core/Defines.h>
+#include <common/StringRef.h>
+#include <Columns/IColumn.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnLowCardinality.h>
+
#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
#include <tmmintrin.h>
#endif
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-using Sizes = std::vector<size_t>;
-
-/// When packing the values of nullable columns at a given row, we have to
-/// store the fact that these values are nullable or not. This is achieved
-/// by encoding this information as a bitmap. Let S be the size in bytes of
-/// a packed values binary blob and T the number of bytes we may place into
-/// this blob, the size that the bitmap shall occupy in the blob is equal to:
-/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for
-/// each value of S, the corresponding value of T, and the bitmap size:
-///
-/// 32,28,4
-/// 16,14,2
-/// 8,7,1
-/// 4,3,1
-/// 2,1,1
-///
-
-namespace
-{
-
-template <typename T>
-constexpr auto getBitmapSize()
-{
- return
- (sizeof(T) == 32) ?
- 4 :
- (sizeof(T) == 16) ?
- 2 :
- ((sizeof(T) == 8) ?
- 1 :
- ((sizeof(T) == 4) ?
- 1 :
- ((sizeof(T) == 2) ?
- 1 :
- 0)));
-}
-
-}
-
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+using Sizes = std::vector<size_t>;
+
+/// When packing the values of nullable columns at a given row, we have to
+/// store the fact that these values are nullable or not. This is achieved
+/// by encoding this information as a bitmap. Let S be the size in bytes of
+/// a packed values binary blob and T the number of bytes we may place into
+/// this blob, the size that the bitmap shall occupy in the blob is equal to:
+/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for
+/// each value of S, the corresponding value of T, and the bitmap size:
+///
+/// 32,28,4
+/// 16,14,2
+/// 8,7,1
+/// 4,3,1
+/// 2,1,1
+///
+
+namespace
+{
+
+template <typename T>
+constexpr auto getBitmapSize()
+{
+ return
+ (sizeof(T) == 32) ?
+ 4 :
+ (sizeof(T) == 16) ?
+ 2 :
+ ((sizeof(T) == 8) ?
+ 1 :
+ ((sizeof(T) == 4) ?
+ 1 :
+ ((sizeof(T) == 2) ?
+ 1 :
+ 0)));
+}
+
+}
+
template<typename T, size_t step>
void fillFixedBatch(size_t num_rows, const T * source, T * dest)
{
@@ -104,7 +104,7 @@ void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S
/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the
/// binary blob. Keys are placed starting from the longest one.
-template <typename T>
+template <typename T>
void packFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, PaddedPODArray<T> & out)
{
size_t offset = 0;
@@ -116,194 +116,194 @@ void packFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S
}
template <typename T>
-using KeysNullMap = std::array<UInt8, getBitmapSize<T>()>;
-
-/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the
-/// binary blob, they are disposed in it consecutively.
-template <typename T, bool has_low_cardinality = false>
-static inline T ALWAYS_INLINE packFixed(
- size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes,
- const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr,
- const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr)
-{
+using KeysNullMap = std::array<UInt8, getBitmapSize<T>()>;
+
+/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the
+/// binary blob, they are disposed in it consecutively.
+template <typename T, bool has_low_cardinality = false>
+static inline T ALWAYS_INLINE packFixed(
+ size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes,
+ const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr,
+ const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr)
+{
T key{};
char * bytes = reinterpret_cast<char *>(&key);
- size_t offset = 0;
-
- for (size_t j = 0; j < keys_size; ++j)
- {
- size_t index = i;
- const IColumn * column = key_columns[j];
- if constexpr (has_low_cardinality)
- {
- if (const IColumn * positions = (*low_cardinality_positions)[j])
- {
- switch ((*low_cardinality_sizes)[j])
- {
- case sizeof(UInt8): index = assert_cast<const ColumnUInt8 *>(positions)->getElement(i); break;
- case sizeof(UInt16): index = assert_cast<const ColumnUInt16 *>(positions)->getElement(i); break;
- case sizeof(UInt32): index = assert_cast<const ColumnUInt32 *>(positions)->getElement(i); break;
- case sizeof(UInt64): index = assert_cast<const ColumnUInt64 *>(positions)->getElement(i); break;
- default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
- }
- }
- }
-
- switch (key_sizes[j])
- {
- case 1:
- {
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index, 1);
- offset += 1;
- }
- break;
- case 2:
- if constexpr (sizeof(T) >= 2) /// To avoid warning about memcpy exceeding object size.
- {
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<2>() + index * 2, 2);
- offset += 2;
- }
- break;
- case 4:
- if constexpr (sizeof(T) >= 4)
- {
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<4>() + index * 4, 4);
- offset += 4;
- }
- break;
- case 8:
- if constexpr (sizeof(T) >= 8)
- {
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<8>() + index * 8, 8);
- offset += 8;
- }
- break;
- default:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]);
- offset += key_sizes[j];
- }
- }
-
- return key;
-}
-
-/// Similar as above but supports nullable values.
-template <typename T>
-static inline T ALWAYS_INLINE packFixed(
- size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes,
- const KeysNullMap<T> & bitmap)
-{
- union
- {
- T key;
- char bytes[sizeof(key)] = {};
- };
-
- size_t offset = 0;
-
- static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value;
- static constexpr bool has_bitmap = bitmap_size > 0;
-
- if (has_bitmap)
- {
- memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8));
- offset += bitmap_size;
- }
-
- for (size_t j = 0; j < keys_size; ++j)
- {
- bool is_null;
-
- if (!has_bitmap)
- is_null = false;
- else
- {
- size_t bucket = j / 8;
- size_t off = j % 8;
- is_null = ((bitmap[bucket] >> off) & 1) == 1;
- }
-
- if (is_null)
- continue;
-
- switch (key_sizes[j])
- {
- case 1:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i, 1);
- offset += 1;
- break;
- case 2:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<2>() + i * 2, 2);
- offset += 2;
- break;
- case 4:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<4>() + i * 4, 4);
- offset += 4;
- break;
- case 8:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<8>() + i * 8, 8);
- offset += 8;
- break;
- default:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]);
- offset += key_sizes[j];
- }
- }
-
- return key;
-}
-
-
-/// Hash a set of keys into a UInt128 value.
-static inline UInt128 ALWAYS_INLINE hash128(
- size_t i, size_t keys_size, const ColumnRawPtrs & key_columns)
-{
- UInt128 key;
- SipHash hash;
-
- for (size_t j = 0; j < keys_size; ++j)
- key_columns[j]->updateHashWithValue(i, hash);
-
+ size_t offset = 0;
+
+ for (size_t j = 0; j < keys_size; ++j)
+ {
+ size_t index = i;
+ const IColumn * column = key_columns[j];
+ if constexpr (has_low_cardinality)
+ {
+ if (const IColumn * positions = (*low_cardinality_positions)[j])
+ {
+ switch ((*low_cardinality_sizes)[j])
+ {
+ case sizeof(UInt8): index = assert_cast<const ColumnUInt8 *>(positions)->getElement(i); break;
+ case sizeof(UInt16): index = assert_cast<const ColumnUInt16 *>(positions)->getElement(i); break;
+ case sizeof(UInt32): index = assert_cast<const ColumnUInt32 *>(positions)->getElement(i); break;
+ case sizeof(UInt64): index = assert_cast<const ColumnUInt64 *>(positions)->getElement(i); break;
+ default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
+ }
+ }
+ }
+
+ switch (key_sizes[j])
+ {
+ case 1:
+ {
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index, 1);
+ offset += 1;
+ }
+ break;
+ case 2:
+ if constexpr (sizeof(T) >= 2) /// To avoid warning about memcpy exceeding object size.
+ {
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<2>() + index * 2, 2);
+ offset += 2;
+ }
+ break;
+ case 4:
+ if constexpr (sizeof(T) >= 4)
+ {
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<4>() + index * 4, 4);
+ offset += 4;
+ }
+ break;
+ case 8:
+ if constexpr (sizeof(T) >= 8)
+ {
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<8>() + index * 8, 8);
+ offset += 8;
+ }
+ break;
+ default:
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]);
+ offset += key_sizes[j];
+ }
+ }
+
+ return key;
+}
+
+/// Similar as above but supports nullable values.
+template <typename T>
+static inline T ALWAYS_INLINE packFixed(
+ size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes,
+ const KeysNullMap<T> & bitmap)
+{
+ union
+ {
+ T key;
+ char bytes[sizeof(key)] = {};
+ };
+
+ size_t offset = 0;
+
+ static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value;
+ static constexpr bool has_bitmap = bitmap_size > 0;
+
+ if (has_bitmap)
+ {
+ memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8));
+ offset += bitmap_size;
+ }
+
+ for (size_t j = 0; j < keys_size; ++j)
+ {
+ bool is_null;
+
+ if (!has_bitmap)
+ is_null = false;
+ else
+ {
+ size_t bucket = j / 8;
+ size_t off = j % 8;
+ is_null = ((bitmap[bucket] >> off) & 1) == 1;
+ }
+
+ if (is_null)
+ continue;
+
+ switch (key_sizes[j])
+ {
+ case 1:
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i, 1);
+ offset += 1;
+ break;
+ case 2:
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<2>() + i * 2, 2);
+ offset += 2;
+ break;
+ case 4:
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<4>() + i * 4, 4);
+ offset += 4;
+ break;
+ case 8:
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<8>() + i * 8, 8);
+ offset += 8;
+ break;
+ default:
+ memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]);
+ offset += key_sizes[j];
+ }
+ }
+
+ return key;
+}
+
+
+/// Hash a set of keys into a UInt128 value.
+static inline UInt128 ALWAYS_INLINE hash128(
+ size_t i, size_t keys_size, const ColumnRawPtrs & key_columns)
+{
+ UInt128 key;
+ SipHash hash;
+
+ for (size_t j = 0; j < keys_size; ++j)
+ key_columns[j]->updateHashWithValue(i, hash);
+
hash.get128(key);
-
- return key;
-}
-
-
-/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first.
-static inline StringRef * ALWAYS_INLINE placeKeysInPool(
- size_t keys_size, StringRefs & keys, Arena & pool)
-{
- for (size_t j = 0; j < keys_size; ++j)
- {
- char * place = pool.alloc(keys[j].size);
- memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size);
- keys[j].data = place;
- }
-
- /// Place the StringRefs on the newly copied keys in the pool.
- char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef));
- memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef));
-
- return reinterpret_cast<StringRef *>(res);
-}
-
-
-/** Serialize keys into a continuous chunk of memory.
- */
-static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous(
- size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool)
-{
- const char * begin = nullptr;
-
- size_t sum_size = 0;
- for (size_t j = 0; j < keys_size; ++j)
- sum_size += key_columns[j]->serializeValueIntoArena(i, pool, begin).size;
-
- return {begin, sum_size};
-}
-
-
+
+ return key;
+}
+
+
+/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first.
+static inline StringRef * ALWAYS_INLINE placeKeysInPool(
+ size_t keys_size, StringRefs & keys, Arena & pool)
+{
+ for (size_t j = 0; j < keys_size; ++j)
+ {
+ char * place = pool.alloc(keys[j].size);
+ memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size);
+ keys[j].data = place;
+ }
+
+ /// Place the StringRefs on the newly copied keys in the pool.
+ char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef));
+ memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef));
+
+ return reinterpret_cast<StringRef *>(res);
+}
+
+
+/** Serialize keys into a continuous chunk of memory.
+ */
+static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous(
+ size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool)
+{
+ const char * begin = nullptr;
+
+ size_t sum_size = 0;
+ for (size_t j = 0; j < keys_size; ++j)
+ sum_size += key_columns[j]->serializeValueIntoArena(i, pool, begin).size;
+
+ return {begin, sum_size};
+}
+
+
/** Pack elements with shuffle instruction.
* See the explanation in ColumnsHashing.h
*/
@@ -333,7 +333,7 @@ static T inline packFixedShuffle(
T out;
__builtin_memcpy(&out, &res, sizeof(T));
return out;
-}
+}
#endif
}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h
index d7b8ebca83..e64abbb16a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h
@@ -1,383 +1,383 @@
-#pragma once
-
-#include <mutex>
-#include <memory>
-#include <functional>
-
-#include <common/logger_useful.h>
-
-#include <common/StringRef.h>
-#include <Common/Arena.h>
-#include <Common/HashTable/FixedHashMap.h>
-#include <Common/HashTable/HashMap.h>
-#include <Common/HashTable/TwoLevelHashMap.h>
-#include <Common/HashTable/StringHashMap.h>
-#include <Common/HashTable/TwoLevelStringHashMap.h>
-
-#include <Common/ThreadPool.h>
-#include <Common/ColumnsHashing.h>
-#include <Common/assert_cast.h>
-#include <Common/filesystemHelpers.h>
-
-#include <DataStreams/IBlockStream_fwd.h>
-#include <DataStreams/SizeLimits.h>
-
+#pragma once
+
+#include <mutex>
+#include <memory>
+#include <functional>
+
+#include <common/logger_useful.h>
+
+#include <common/StringRef.h>
+#include <Common/Arena.h>
+#include <Common/HashTable/FixedHashMap.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/TwoLevelHashMap.h>
+#include <Common/HashTable/StringHashMap.h>
+#include <Common/HashTable/TwoLevelStringHashMap.h>
+
+#include <Common/ThreadPool.h>
+#include <Common/ColumnsHashing.h>
+#include <Common/assert_cast.h>
+#include <Common/filesystemHelpers.h>
+
+#include <DataStreams/IBlockStream_fwd.h>
+#include <DataStreams/SizeLimits.h>
+
#include <Disks/SingleDiskVolume.h>
-#include <Interpreters/AggregateDescription.h>
-#include <Interpreters/AggregationCommon.h>
+#include <Interpreters/AggregateDescription.h>
+#include <Interpreters/AggregationCommon.h>
//#include <Interpreters/JIT/compileFunction.h>
-
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnAggregateFunction.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnNullable.h>
-#include <Columns/ColumnLowCardinality.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
-}
-
-class IBlockOutputStream;
-
-/** Different data structures that can be used for aggregation
- * For efficiency, the aggregation data itself is put into the pool.
- * Data and pool ownership (states of aggregate functions)
- * is acquired later - in `convertToBlocks` function, by the ColumnAggregateFunction object.
- *
- * Most data structures exist in two versions: normal and two-level (TwoLevel).
- * A two-level hash table works a little slower with a small number of different keys,
- * but with a large number of different keys scales better, because it allows
- * parallelize some operations (merging, post-processing) in a natural way.
- *
- * To ensure efficient work over a wide range of conditions,
- * first single-level hash tables are used,
- * and when the number of different keys is large enough,
- * they are converted to two-level ones.
- *
- * PS. There are many different approaches to the effective implementation of parallel and distributed aggregation,
- * best suited for different cases, and this approach is just one of them, chosen for a combination of reasons.
- */
-
-using AggregatedDataWithoutKey = AggregateDataPtr;
-
+
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnAggregateFunction.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnLowCardinality.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
+}
+
+class IBlockOutputStream;
+
+/** Different data structures that can be used for aggregation
+ * For efficiency, the aggregation data itself is put into the pool.
+ * Data and pool ownership (states of aggregate functions)
+ * is acquired later - in `convertToBlocks` function, by the ColumnAggregateFunction object.
+ *
+ * Most data structures exist in two versions: normal and two-level (TwoLevel).
+ * A two-level hash table works a little slower with a small number of different keys,
+ * but with a large number of different keys scales better, because it allows
+ * parallelize some operations (merging, post-processing) in a natural way.
+ *
+ * To ensure efficient work over a wide range of conditions,
+ * first single-level hash tables are used,
+ * and when the number of different keys is large enough,
+ * they are converted to two-level ones.
+ *
+ * PS. There are many different approaches to the effective implementation of parallel and distributed aggregation,
+ * best suited for different cases, and this approach is just one of them, chosen for a combination of reasons.
+ */
+
+using AggregatedDataWithoutKey = AggregateDataPtr;
+
using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize<UInt8, AggregateDataPtr>;
using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap<UInt16, AggregateDataPtr>;
-
-using AggregatedDataWithUInt32Key = HashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>;
-using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
-
-using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>;
-
-using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
-
-using AggregatedDataWithKeys128 = HashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
+
+using AggregatedDataWithUInt32Key = HashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>;
+using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
+
+using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>;
+
+using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
+
+using AggregatedDataWithKeys128 = HashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
using AggregatedDataWithKeys256 = HashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>;
-
-using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>;
-using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
-
-using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap<AggregateDataPtr>;
-
-using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr>;
-
-using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
+
+using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>;
+using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
+
+using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap<AggregateDataPtr>;
+
+using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr>;
+
+using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>;
-
-/** Variants with better hash function, using more than 32 bits for hash.
- * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion,
- * but we keep in memory and merge only sub-partition of them simultaneously.
- * TODO We need to switch for better hash function not only for external aggregation,
- * but also for huge aggregation results on machines with terabytes of RAM.
- */
-
-using AggregatedDataWithUInt64KeyHash64 = HashMap<UInt64, AggregateDataPtr, DefaultHash<UInt64>>;
-using AggregatedDataWithStringKeyHash64 = HashMapWithSavedHash<StringRef, AggregateDataPtr, StringRefHash64>;
-using AggregatedDataWithKeys128Hash64 = HashMap<UInt128, AggregateDataPtr, UInt128Hash>;
+
+/** Variants with better hash function, using more than 32 bits for hash.
+ * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion,
+ * but we keep in memory and merge only sub-partition of them simultaneously.
+ * TODO We need to switch for better hash function not only for external aggregation,
+ * but also for huge aggregation results on machines with terabytes of RAM.
+ */
+
+using AggregatedDataWithUInt64KeyHash64 = HashMap<UInt64, AggregateDataPtr, DefaultHash<UInt64>>;
+using AggregatedDataWithStringKeyHash64 = HashMapWithSavedHash<StringRef, AggregateDataPtr, StringRefHash64>;
+using AggregatedDataWithKeys128Hash64 = HashMap<UInt128, AggregateDataPtr, UInt128Hash>;
using AggregatedDataWithKeys256Hash64 = HashMap<UInt256, AggregateDataPtr, UInt256Hash>;
-
-template <typename Base>
-struct AggregationDataWithNullKey : public Base
-{
- using Base::Base;
-
- bool & hasNullKeyData() { return has_null_key_data; }
- AggregateDataPtr & getNullKeyData() { return null_key_data; }
- bool hasNullKeyData() const { return has_null_key_data; }
- const AggregateDataPtr & getNullKeyData() const { return null_key_data; }
- size_t size() const { return Base::size() + (has_null_key_data ? 1 : 0); }
- bool empty() const { return Base::empty() && !has_null_key_data; }
- void clear()
- {
- Base::clear();
- has_null_key_data = false;
- }
- void clearAndShrink()
- {
- Base::clearAndShrink();
- has_null_key_data = false;
- }
-
-private:
- bool has_null_key_data = false;
- AggregateDataPtr null_key_data = nullptr;
-};
-
-template <typename Base>
-struct AggregationDataWithNullKeyTwoLevel : public Base
-{
- using Base::impls;
-
+
+template <typename Base>
+struct AggregationDataWithNullKey : public Base
+{
+ using Base::Base;
+
+ bool & hasNullKeyData() { return has_null_key_data; }
+ AggregateDataPtr & getNullKeyData() { return null_key_data; }
+ bool hasNullKeyData() const { return has_null_key_data; }
+ const AggregateDataPtr & getNullKeyData() const { return null_key_data; }
+ size_t size() const { return Base::size() + (has_null_key_data ? 1 : 0); }
+ bool empty() const { return Base::empty() && !has_null_key_data; }
+ void clear()
+ {
+ Base::clear();
+ has_null_key_data = false;
+ }
+ void clearAndShrink()
+ {
+ Base::clearAndShrink();
+ has_null_key_data = false;
+ }
+
+private:
+ bool has_null_key_data = false;
+ AggregateDataPtr null_key_data = nullptr;
+};
+
+template <typename Base>
+struct AggregationDataWithNullKeyTwoLevel : public Base
+{
+ using Base::impls;
+
AggregationDataWithNullKeyTwoLevel() = default;
-
- template <typename Other>
- explicit AggregationDataWithNullKeyTwoLevel(const Other & other) : Base(other)
- {
- impls[0].hasNullKeyData() = other.hasNullKeyData();
- impls[0].getNullKeyData() = other.getNullKeyData();
- }
-
- bool & hasNullKeyData() { return impls[0].hasNullKeyData(); }
- AggregateDataPtr & getNullKeyData() { return impls[0].getNullKeyData(); }
- bool hasNullKeyData() const { return impls[0].hasNullKeyData(); }
- const AggregateDataPtr & getNullKeyData() const { return impls[0].getNullKeyData(); }
-};
-
-template <typename ... Types>
-using HashTableWithNullKey = AggregationDataWithNullKey<HashMapTable<Types ...>>;
-template <typename ... Types>
-using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Types ...>>;
-
-using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>;
-using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>;
-
-using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey<AggregatedDataWithUInt64Key>;
-using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<AggregatedDataWithStringKey>;
-
-using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
- TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>,
- TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
-
-using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
- TwoLevelStringHashMap<AggregateDataPtr, HashTableAllocator, StringHashTableWithNullKey>>;
-
-using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
- TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>,
- TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
-
-
-/// For the case where there is one numeric key.
-/// FieldType is UInt8/16/32/64 for any type with corresponding bit width.
-template <typename FieldType, typename TData,
- bool consecutive_keys_optimization = true>
-struct AggregationMethodOneNumber
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
+
+ template <typename Other>
+ explicit AggregationDataWithNullKeyTwoLevel(const Other & other) : Base(other)
+ {
+ impls[0].hasNullKeyData() = other.hasNullKeyData();
+ impls[0].getNullKeyData() = other.getNullKeyData();
+ }
+
+ bool & hasNullKeyData() { return impls[0].hasNullKeyData(); }
+ AggregateDataPtr & getNullKeyData() { return impls[0].getNullKeyData(); }
+ bool hasNullKeyData() const { return impls[0].hasNullKeyData(); }
+ const AggregateDataPtr & getNullKeyData() const { return impls[0].getNullKeyData(); }
+};
+
+template <typename ... Types>
+using HashTableWithNullKey = AggregationDataWithNullKey<HashMapTable<Types ...>>;
+template <typename ... Types>
+using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Types ...>>;
+
+using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>;
+using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>;
+
+using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey<AggregatedDataWithUInt64Key>;
+using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<AggregatedDataWithStringKey>;
+
+using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
+ TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>,
+ TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
+
+using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
+ TwoLevelStringHashMap<AggregateDataPtr, HashTableAllocator, StringHashTableWithNullKey>>;
+
+using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
+ TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>,
+ TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
+
+
+/// For the case where there is one numeric key.
+/// FieldType is UInt8/16/32/64 for any type with corresponding bit width.
+template <typename FieldType, typename TData,
+ bool consecutive_keys_optimization = true>
+struct AggregationMethodOneNumber
+{
+ using Data = TData;
+ using Key = typename Data::key_type;
+ using Mapped = typename Data::mapped_type;
+
+ Data data;
+
AggregationMethodOneNumber() = default;
-
- template <typename Other>
- AggregationMethodOneNumber(const Other & other) : data(other.data) {}
-
- /// To use one `Method` in different threads, use different `State`.
- using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type,
- Mapped, FieldType, consecutive_keys_optimization>;
-
- /// Use optimization for low cardinality.
- static const bool low_cardinality_optimization = false;
-
+
+ template <typename Other>
+ AggregationMethodOneNumber(const Other & other) : data(other.data) {}
+
+ /// To use one `Method` in different threads, use different `State`.
+ using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type,
+ Mapped, FieldType, consecutive_keys_optimization>;
+
+ /// Use optimization for low cardinality.
+ static const bool low_cardinality_optimization = false;
+
/// Shuffle key columns before `insertKeyIntoColumns` call if needed.
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
- // Insert the key from the hash table into columns.
+ // Insert the key from the hash table into columns.
static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & /*key_sizes*/)
- {
+ {
const auto * key_holder = reinterpret_cast<const char *>(&key);
auto * column = static_cast<ColumnVectorHelper *>(key_columns[0]);
- column->insertRawData<sizeof(FieldType)>(key_holder);
- }
-};
-
-
-/// For the case where there is one string key.
-template <typename TData>
-struct AggregationMethodString
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
+ column->insertRawData<sizeof(FieldType)>(key_holder);
+ }
+};
+
+
+/// For the case where there is one string key.
+template <typename TData>
+struct AggregationMethodString
+{
+ using Data = TData;
+ using Key = typename Data::key_type;
+ using Mapped = typename Data::mapped_type;
+
+ Data data;
+
AggregationMethodString() = default;
-
- template <typename Other>
- AggregationMethodString(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
-
- static const bool low_cardinality_optimization = false;
-
+
+ template <typename Other>
+ AggregationMethodString(const Other & other) : data(other.data) {}
+
+ using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
+
+ static const bool low_cardinality_optimization = false;
+
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
+ {
static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size);
- }
-};
-
-
-/// Same as above but without cache
-template <typename TData>
-struct AggregationMethodStringNoCache
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
+ }
+};
+
+
+/// Same as above but without cache
+template <typename TData>
+struct AggregationMethodStringNoCache
+{
+ using Data = TData;
+ using Key = typename Data::key_type;
+ using Mapped = typename Data::mapped_type;
+
+ Data data;
+
AggregationMethodStringNoCache() = default;
-
- template <typename Other>
- AggregationMethodStringNoCache(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>;
-
- static const bool low_cardinality_optimization = false;
-
+
+ template <typename Other>
+ AggregationMethodStringNoCache(const Other & other) : data(other.data) {}
+
+ using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>;
+
+ static const bool low_cardinality_optimization = false;
+
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
+ {
static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size);
- }
-};
-
-
-/// For the case where there is one fixed-length string key.
-template <typename TData>
-struct AggregationMethodFixedString
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
+ }
+};
+
+
+/// For the case where there is one fixed-length string key.
+template <typename TData>
+struct AggregationMethodFixedString
+{
+ using Data = TData;
+ using Key = typename Data::key_type;
+ using Mapped = typename Data::mapped_type;
+
+ Data data;
+
AggregationMethodFixedString() = default;
-
- template <typename Other>
- AggregationMethodFixedString(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped>;
-
- static const bool low_cardinality_optimization = false;
-
+
+ template <typename Other>
+ AggregationMethodFixedString(const Other & other) : data(other.data) {}
+
+ using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped>;
+
+ static const bool low_cardinality_optimization = false;
+
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
+ {
static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size);
- }
-};
-
-/// Same as above but without cache
-template <typename TData>
-struct AggregationMethodFixedStringNoCache
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
+ }
+};
+
+/// Same as above but without cache
+template <typename TData>
+struct AggregationMethodFixedStringNoCache
+{
+ using Data = TData;
+ using Key = typename Data::key_type;
+ using Mapped = typename Data::mapped_type;
+
+ Data data;
+
AggregationMethodFixedStringNoCache() = default;
-
- template <typename Other>
- AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>;
-
- static const bool low_cardinality_optimization = false;
-
+
+ template <typename Other>
+ AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {}
+
+ using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>;
+
+ static const bool low_cardinality_optimization = false;
+
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
+ {
static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size);
- }
-};
-
-
-/// Single low cardinality column.
-template <typename SingleColumnMethod>
-struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod
-{
- using Base = SingleColumnMethod;
- using BaseState = typename Base::State;
-
- using Data = typename Base::Data;
- using Key = typename Base::Key;
- using Mapped = typename Base::Mapped;
-
- using Base::data;
-
- AggregationMethodSingleLowCardinalityColumn() = default;
-
- template <typename Other>
- explicit AggregationMethodSingleLowCardinalityColumn(const Other & other) : Base(other) {}
-
- using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn<BaseState, Mapped, true>;
-
- static const bool low_cardinality_optimization = true;
-
+ }
+};
+
+
+/// Single low cardinality column.
+template <typename SingleColumnMethod>
+struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod
+{
+ using Base = SingleColumnMethod;
+ using BaseState = typename Base::State;
+
+ using Data = typename Base::Data;
+ using Key = typename Base::Key;
+ using Mapped = typename Base::Mapped;
+
+ using Base::data;
+
+ AggregationMethodSingleLowCardinalityColumn() = default;
+
+ template <typename Other>
+ explicit AggregationMethodSingleLowCardinalityColumn(const Other & other) : Base(other) {}
+
+ using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn<BaseState, Mapped, true>;
+
+ static const bool low_cardinality_optimization = true;
+
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
- static void insertKeyIntoColumns(const Key & key,
+ static void insertKeyIntoColumns(const Key & key,
std::vector<IColumn *> & key_columns_low_cardinality, const Sizes & /*key_sizes*/)
- {
+ {
auto * col = assert_cast<ColumnLowCardinality *>(key_columns_low_cardinality[0]);
-
- if constexpr (std::is_same_v<Key, StringRef>)
- {
- col->insertData(key.data, key.size);
- }
- else
- {
- col->insertData(reinterpret_cast<const char *>(&key), sizeof(key));
- }
- }
-};
-
-
-/// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits.
-template <typename TData, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
-struct AggregationMethodKeysFixed
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
- static constexpr bool has_nullable_keys = has_nullable_keys_;
- static constexpr bool has_low_cardinality = has_low_cardinality_;
-
- Data data;
-
+
+ if constexpr (std::is_same_v<Key, StringRef>)
+ {
+ col->insertData(key.data, key.size);
+ }
+ else
+ {
+ col->insertData(reinterpret_cast<const char *>(&key), sizeof(key));
+ }
+ }
+};
+
+
+/// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits.
+template <typename TData, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
+struct AggregationMethodKeysFixed
+{
+ using Data = TData;
+ using Key = typename Data::key_type;
+ using Mapped = typename Data::mapped_type;
+ static constexpr bool has_nullable_keys = has_nullable_keys_;
+ static constexpr bool has_low_cardinality = has_low_cardinality_;
+
+ Data data;
+
AggregationMethodKeysFixed() = default;
-
- template <typename Other>
- AggregationMethodKeysFixed(const Other & other) : data(other.data) {}
-
+
+ template <typename Other>
+ AggregationMethodKeysFixed(const Other & other) : data(other.data) {}
+
using State = ColumnsHashing::HashMethodKeysFixed<
typename Data::value_type,
Key,
@@ -385,707 +385,707 @@ struct AggregationMethodKeysFixed
has_nullable_keys,
has_low_cardinality,
use_cache>;
-
- static const bool low_cardinality_optimization = false;
-
+
+ static const bool low_cardinality_optimization = false;
+
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> & key_columns, const Sizes & key_sizes)
- {
+ {
return State::shuffleKeyColumns(key_columns, key_sizes);
}
static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & key_sizes)
{
- size_t keys_size = key_columns.size();
-
- static constexpr auto bitmap_size = has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 0;
- /// In any hash key value, column values to be read start just after the bitmap, if it exists.
- size_t pos = bitmap_size;
-
- for (size_t i = 0; i < keys_size; ++i)
- {
- IColumn * observed_column;
- ColumnUInt8 * null_map;
-
- bool column_nullable = false;
- if constexpr (has_nullable_keys)
- column_nullable = isColumnNullable(*key_columns[i]);
-
- /// If we have a nullable column, get its nested column and its null map.
- if (column_nullable)
- {
- ColumnNullable & nullable_col = assert_cast<ColumnNullable &>(*key_columns[i]);
- observed_column = &nullable_col.getNestedColumn();
- null_map = assert_cast<ColumnUInt8 *>(&nullable_col.getNullMapColumn());
- }
- else
- {
+ size_t keys_size = key_columns.size();
+
+ static constexpr auto bitmap_size = has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 0;
+ /// In any hash key value, column values to be read start just after the bitmap, if it exists.
+ size_t pos = bitmap_size;
+
+ for (size_t i = 0; i < keys_size; ++i)
+ {
+ IColumn * observed_column;
+ ColumnUInt8 * null_map;
+
+ bool column_nullable = false;
+ if constexpr (has_nullable_keys)
+ column_nullable = isColumnNullable(*key_columns[i]);
+
+ /// If we have a nullable column, get its nested column and its null map.
+ if (column_nullable)
+ {
+ ColumnNullable & nullable_col = assert_cast<ColumnNullable &>(*key_columns[i]);
+ observed_column = &nullable_col.getNestedColumn();
+ null_map = assert_cast<ColumnUInt8 *>(&nullable_col.getNullMapColumn());
+ }
+ else
+ {
observed_column = key_columns[i];
- null_map = nullptr;
- }
-
- bool is_null = false;
- if (column_nullable)
- {
- /// The current column is nullable. Check if the value of the
- /// corresponding key is nullable. Update the null map accordingly.
- size_t bucket = i / 8;
- size_t offset = i % 8;
- UInt8 val = (reinterpret_cast<const UInt8 *>(&key)[bucket] >> offset) & 1;
- null_map->insertValue(val);
- is_null = val == 1;
- }
-
- if (has_nullable_keys && is_null)
- observed_column->insertDefault();
- else
- {
- size_t size = key_sizes[i];
- observed_column->insertData(reinterpret_cast<const char *>(&key) + pos, size);
- pos += size;
- }
- }
- }
-};
-
-
-/** Aggregates by concatenating serialized key values.
- * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
- * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
- * Therefore, when aggregating by several strings, there is no ambiguity.
- */
-template <typename TData>
-struct AggregationMethodSerialized
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
+ null_map = nullptr;
+ }
+
+ bool is_null = false;
+ if (column_nullable)
+ {
+ /// The current column is nullable. Check if the value of the
+ /// corresponding key is nullable. Update the null map accordingly.
+ size_t bucket = i / 8;
+ size_t offset = i % 8;
+ UInt8 val = (reinterpret_cast<const UInt8 *>(&key)[bucket] >> offset) & 1;
+ null_map->insertValue(val);
+ is_null = val == 1;
+ }
+
+ if (has_nullable_keys && is_null)
+ observed_column->insertDefault();
+ else
+ {
+ size_t size = key_sizes[i];
+ observed_column->insertData(reinterpret_cast<const char *>(&key) + pos, size);
+ pos += size;
+ }
+ }
+ }
+};
+
+
+/** Aggregates by concatenating serialized key values.
+ * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
+ * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
+ * Therefore, when aggregating by several strings, there is no ambiguity.
+ */
+template <typename TData>
+struct AggregationMethodSerialized
+{
+ using Data = TData;
+ using Key = typename Data::key_type;
+ using Mapped = typename Data::mapped_type;
+
+ Data data;
+
AggregationMethodSerialized() = default;
-
- template <typename Other>
- AggregationMethodSerialized(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>;
-
- static const bool low_cardinality_optimization = false;
-
+
+ template <typename Other>
+ AggregationMethodSerialized(const Other & other) : data(other.data) {}
+
+ using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>;
+
+ static const bool low_cardinality_optimization = false;
+
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
+ {
const auto * pos = key.data;
- for (auto & column : key_columns)
- pos = column->deserializeAndInsertFromArena(pos);
- }
-};
-
-
-class Aggregator;
-
-using ColumnsHashing::HashMethodContext;
-using ColumnsHashing::HashMethodContextPtr;
-
-struct AggregatedDataVariants : private boost::noncopyable
-{
- /** Working with states of aggregate functions in the pool is arranged in the following (inconvenient) way:
- * - when aggregating, states are created in the pool using IAggregateFunction::create (inside - `placement new` of arbitrary structure);
- * - they must then be destroyed using IAggregateFunction::destroy (inside - calling the destructor of arbitrary structure);
- * - if aggregation is complete, then, in the Aggregator::convertToBlocks function, pointers to the states of aggregate functions
- * are written to ColumnAggregateFunction; ColumnAggregateFunction "acquires ownership" of them, that is - calls `destroy` in its destructor.
- * - if during the aggregation, before call to Aggregator::convertToBlocks, an exception was thrown,
- * then the states of aggregate functions must still be destroyed,
- * otherwise, for complex states (eg, AggregateFunctionUniq), there will be memory leaks;
- * - in this case, to destroy states, the destructor calls Aggregator::destroyAggregateStates method,
- * but only if the variable aggregator (see below) is not nullptr;
- * - that is, until you transfer ownership of the aggregate function states in the ColumnAggregateFunction, set the variable `aggregator`,
- * so that when an exception occurs, the states are correctly destroyed.
- *
- * PS. This can be corrected by making a pool that knows about which states of aggregate functions and in which order are put in it, and knows how to destroy them.
- * But this can hardly be done simply because it is planned to put variable-length strings into the same pool.
- * In this case, the pool will not be able to know with what offsets objects are stored.
- */
+ for (auto & column : key_columns)
+ pos = column->deserializeAndInsertFromArena(pos);
+ }
+};
+
+
+class Aggregator;
+
+using ColumnsHashing::HashMethodContext;
+using ColumnsHashing::HashMethodContextPtr;
+
+struct AggregatedDataVariants : private boost::noncopyable
+{
+ /** Working with states of aggregate functions in the pool is arranged in the following (inconvenient) way:
+ * - when aggregating, states are created in the pool using IAggregateFunction::create (inside - `placement new` of arbitrary structure);
+ * - they must then be destroyed using IAggregateFunction::destroy (inside - calling the destructor of arbitrary structure);
+ * - if aggregation is complete, then, in the Aggregator::convertToBlocks function, pointers to the states of aggregate functions
+ * are written to ColumnAggregateFunction; ColumnAggregateFunction "acquires ownership" of them, that is - calls `destroy` in its destructor.
+ * - if during the aggregation, before call to Aggregator::convertToBlocks, an exception was thrown,
+ * then the states of aggregate functions must still be destroyed,
+ * otherwise, for complex states (eg, AggregateFunctionUniq), there will be memory leaks;
+ * - in this case, to destroy states, the destructor calls Aggregator::destroyAggregateStates method,
+ * but only if the variable aggregator (see below) is not nullptr;
+ * - that is, until you transfer ownership of the aggregate function states in the ColumnAggregateFunction, set the variable `aggregator`,
+ * so that when an exception occurs, the states are correctly destroyed.
+ *
+ * PS. This can be corrected by making a pool that knows about which states of aggregate functions and in which order are put in it, and knows how to destroy them.
+ * But this can hardly be done simply because it is planned to put variable-length strings into the same pool.
+ * In this case, the pool will not be able to know with what offsets objects are stored.
+ */
const Aggregator * aggregator = nullptr;
-
- size_t keys_size{}; /// Number of keys. NOTE do we need this field?
- Sizes key_sizes; /// Dimensions of keys, if keys of fixed length
-
- /// Pools for states of aggregate functions. Ownership will be later transferred to ColumnAggregateFunction.
- Arenas aggregates_pools;
- Arena * aggregates_pool{}; /// The pool that is currently used for allocation.
-
- /** Specialization for the case when there are no keys, and for keys not fitted into max_rows_to_group_by.
- */
- AggregatedDataWithoutKey without_key = nullptr;
-
- // Disable consecutive key optimization for Uint8/16, because they use a FixedHashMap
- // and the lookup there is almost free, so we don't need to cache the last lookup result
- std::unique_ptr<AggregationMethodOneNumber<UInt8, AggregatedDataWithUInt8Key, false>> key8;
- std::unique_ptr<AggregationMethodOneNumber<UInt16, AggregatedDataWithUInt16Key, false>> key16;
-
- std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64Key>> key32;
- std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>> key64;
- std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKey>> key_string;
- std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKey>> key_fixed_string;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt16Key, false, false, false>> keys16;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32Key>> keys32;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64Key>> keys64;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128>> keys128;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256>> keys256;
- std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKey>> serialized;
-
- std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64KeyTwoLevel>> key32_two_level;
- std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyTwoLevel>> key64_two_level;
- std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_string_two_level;
- std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_fixed_string_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32KeyTwoLevel>> keys32_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64KeyTwoLevel>> keys64_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel>> keys128_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel>> keys256_two_level;
- std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level;
-
- std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyHash64>> key64_hash64;
- std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKeyHash64>> key_string_hash64;
- std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKeyHash64>> key_fixed_string_hash64;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128Hash64>> keys128_hash64;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256Hash64>> keys256_hash64;
- std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyHash64>> serialized_hash64;
-
- /// Support for nullable keys.
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level;
-
- /// Support for low cardinality.
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false>>> low_cardinality_key8;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false>>> low_cardinality_key16;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key32;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key64;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_string;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_fixed_string;
-
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key32_two_level;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key64_two_level;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_string_two_level;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_fixed_string_two_level;
-
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, false, true>> low_cardinality_keys128;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, false, true>> low_cardinality_keys256;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, false, true>> low_cardinality_keys128_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, false, true>> low_cardinality_keys256_two_level;
-
- /// In this and similar macros, the option without_key is not considered.
- #define APPLY_FOR_AGGREGATED_VARIANTS(M) \
- M(key8, false) \
- M(key16, false) \
- M(key32, false) \
- M(key64, false) \
- M(key_string, false) \
- M(key_fixed_string, false) \
- M(keys16, false) \
- M(keys32, false) \
- M(keys64, false) \
- M(keys128, false) \
- M(keys256, false) \
- M(serialized, false) \
- M(key32_two_level, true) \
- M(key64_two_level, true) \
- M(key_string_two_level, true) \
- M(key_fixed_string_two_level, true) \
- M(keys32_two_level, true) \
- M(keys64_two_level, true) \
- M(keys128_two_level, true) \
- M(keys256_two_level, true) \
- M(serialized_two_level, true) \
- M(key64_hash64, false) \
- M(key_string_hash64, false) \
- M(key_fixed_string_hash64, false) \
- M(keys128_hash64, false) \
- M(keys256_hash64, false) \
- M(serialized_hash64, false) \
- M(nullable_keys128, false) \
- M(nullable_keys256, false) \
- M(nullable_keys128_two_level, true) \
- M(nullable_keys256_two_level, true) \
- M(low_cardinality_key8, false) \
- M(low_cardinality_key16, false) \
- M(low_cardinality_key32, false) \
- M(low_cardinality_key64, false) \
- M(low_cardinality_keys128, false) \
- M(low_cardinality_keys256, false) \
- M(low_cardinality_key_string, false) \
- M(low_cardinality_key_fixed_string, false) \
- M(low_cardinality_key32_two_level, true) \
- M(low_cardinality_key64_two_level, true) \
- M(low_cardinality_keys128_two_level, true) \
- M(low_cardinality_keys256_two_level, true) \
- M(low_cardinality_key_string_two_level, true) \
- M(low_cardinality_key_fixed_string_two_level, true) \
-
- enum class Type
- {
- EMPTY = 0,
- without_key,
-
- #define M(NAME, IS_TWO_LEVEL) NAME,
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- };
- Type type = Type::EMPTY;
-
- AggregatedDataVariants() : aggregates_pools(1, std::make_shared<Arena>()), aggregates_pool(aggregates_pools.back().get()) {}
- bool empty() const { return type == Type::EMPTY; }
- void invalidate() { type = Type::EMPTY; }
-
- ~AggregatedDataVariants();
-
- void init(Type type_)
- {
- switch (type_)
- {
- case Type::EMPTY: break;
- case Type::without_key: break;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: NAME = std::make_unique<decltype(NAME)::element_type>(); break;
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- type = type_;
- }
-
- /// Number of rows (different keys).
- size_t size() const
- {
- switch (type)
- {
- case Type::EMPTY: return 0;
- case Type::without_key: return 1;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: return NAME->data.size() + (without_key != nullptr);
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- __builtin_unreachable();
- }
-
- /// The size without taking into account the row in which data is written for the calculation of TOTALS.
- size_t sizeWithoutOverflowRow() const
- {
- switch (type)
- {
- case Type::EMPTY: return 0;
- case Type::without_key: return 1;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: return NAME->data.size();
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- __builtin_unreachable();
- }
-
- const char * getMethodName() const
- {
- switch (type)
- {
- case Type::EMPTY: return "EMPTY";
- case Type::without_key: return "without_key";
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: return #NAME;
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- __builtin_unreachable();
- }
-
- bool isTwoLevel() const
- {
- switch (type)
- {
- case Type::EMPTY: return false;
- case Type::without_key: return false;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: return IS_TWO_LEVEL;
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- __builtin_unreachable();
- }
-
- #define APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \
- M(key32) \
- M(key64) \
- M(key_string) \
- M(key_fixed_string) \
- M(keys32) \
- M(keys64) \
- M(keys128) \
- M(keys256) \
- M(serialized) \
- M(nullable_keys128) \
- M(nullable_keys256) \
- M(low_cardinality_key32) \
- M(low_cardinality_key64) \
- M(low_cardinality_keys128) \
- M(low_cardinality_keys256) \
- M(low_cardinality_key_string) \
- M(low_cardinality_key_fixed_string) \
-
- #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
- M(key8) \
- M(key16) \
- M(keys16) \
- M(key64_hash64) \
- M(key_string_hash64)\
- M(key_fixed_string_hash64) \
- M(keys128_hash64) \
- M(keys256_hash64) \
- M(serialized_hash64) \
- M(low_cardinality_key8) \
- M(low_cardinality_key16) \
-
- #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \
- APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
- APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \
-
- bool isConvertibleToTwoLevel() const
- {
- switch (type)
- {
- #define M(NAME) \
- case Type::NAME: return true;
-
- APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M)
-
- #undef M
- default:
- return false;
- }
- }
-
- void convertToTwoLevel();
-
- #define APPLY_FOR_VARIANTS_TWO_LEVEL(M) \
- M(key32_two_level) \
- M(key64_two_level) \
- M(key_string_two_level) \
- M(key_fixed_string_two_level) \
- M(keys32_two_level) \
- M(keys64_two_level) \
- M(keys128_two_level) \
- M(keys256_two_level) \
- M(serialized_two_level) \
- M(nullable_keys128_two_level) \
- M(nullable_keys256_two_level) \
- M(low_cardinality_key32_two_level) \
- M(low_cardinality_key64_two_level) \
- M(low_cardinality_keys128_two_level) \
- M(low_cardinality_keys256_two_level) \
- M(low_cardinality_key_string_two_level) \
- M(low_cardinality_key_fixed_string_two_level) \
-
- #define APPLY_FOR_LOW_CARDINALITY_VARIANTS(M) \
- M(low_cardinality_key8) \
- M(low_cardinality_key16) \
- M(low_cardinality_key32) \
- M(low_cardinality_key64) \
- M(low_cardinality_keys128) \
- M(low_cardinality_keys256) \
- M(low_cardinality_key_string) \
- M(low_cardinality_key_fixed_string) \
- M(low_cardinality_key32_two_level) \
- M(low_cardinality_key64_two_level) \
- M(low_cardinality_keys128_two_level) \
- M(low_cardinality_keys256_two_level) \
- M(low_cardinality_key_string_two_level) \
+
+ size_t keys_size{}; /// Number of keys. NOTE do we need this field?
+ Sizes key_sizes; /// Dimensions of keys, if keys of fixed length
+
+ /// Pools for states of aggregate functions. Ownership will be later transferred to ColumnAggregateFunction.
+ Arenas aggregates_pools;
+ Arena * aggregates_pool{}; /// The pool that is currently used for allocation.
+
+ /** Specialization for the case when there are no keys, and for keys not fitted into max_rows_to_group_by.
+ */
+ AggregatedDataWithoutKey without_key = nullptr;
+
+ // Disable consecutive key optimization for Uint8/16, because they use a FixedHashMap
+ // and the lookup there is almost free, so we don't need to cache the last lookup result
+ std::unique_ptr<AggregationMethodOneNumber<UInt8, AggregatedDataWithUInt8Key, false>> key8;
+ std::unique_ptr<AggregationMethodOneNumber<UInt16, AggregatedDataWithUInt16Key, false>> key16;
+
+ std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64Key>> key32;
+ std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>> key64;
+ std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKey>> key_string;
+ std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKey>> key_fixed_string;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt16Key, false, false, false>> keys16;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32Key>> keys32;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64Key>> keys64;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128>> keys128;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256>> keys256;
+ std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKey>> serialized;
+
+ std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64KeyTwoLevel>> key32_two_level;
+ std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyTwoLevel>> key64_two_level;
+ std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_string_two_level;
+ std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_fixed_string_two_level;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32KeyTwoLevel>> keys32_two_level;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64KeyTwoLevel>> keys64_two_level;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel>> keys128_two_level;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel>> keys256_two_level;
+ std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level;
+
+ std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyHash64>> key64_hash64;
+ std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKeyHash64>> key_string_hash64;
+ std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKeyHash64>> key_fixed_string_hash64;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128Hash64>> keys128_hash64;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256Hash64>> keys256_hash64;
+ std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyHash64>> serialized_hash64;
+
+ /// Support for nullable keys.
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level;
+
+ /// Support for low cardinality.
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false>>> low_cardinality_key8;
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false>>> low_cardinality_key16;
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key32;
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key64;
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_string;
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_fixed_string;
+
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key32_two_level;
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key64_two_level;
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_string_two_level;
+ std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_fixed_string_two_level;
+
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, false, true>> low_cardinality_keys128;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, false, true>> low_cardinality_keys256;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, false, true>> low_cardinality_keys128_two_level;
+ std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, false, true>> low_cardinality_keys256_two_level;
+
+ /// In this and similar macros, the option without_key is not considered.
+ #define APPLY_FOR_AGGREGATED_VARIANTS(M) \
+ M(key8, false) \
+ M(key16, false) \
+ M(key32, false) \
+ M(key64, false) \
+ M(key_string, false) \
+ M(key_fixed_string, false) \
+ M(keys16, false) \
+ M(keys32, false) \
+ M(keys64, false) \
+ M(keys128, false) \
+ M(keys256, false) \
+ M(serialized, false) \
+ M(key32_two_level, true) \
+ M(key64_two_level, true) \
+ M(key_string_two_level, true) \
+ M(key_fixed_string_two_level, true) \
+ M(keys32_two_level, true) \
+ M(keys64_two_level, true) \
+ M(keys128_two_level, true) \
+ M(keys256_two_level, true) \
+ M(serialized_two_level, true) \
+ M(key64_hash64, false) \
+ M(key_string_hash64, false) \
+ M(key_fixed_string_hash64, false) \
+ M(keys128_hash64, false) \
+ M(keys256_hash64, false) \
+ M(serialized_hash64, false) \
+ M(nullable_keys128, false) \
+ M(nullable_keys256, false) \
+ M(nullable_keys128_two_level, true) \
+ M(nullable_keys256_two_level, true) \
+ M(low_cardinality_key8, false) \
+ M(low_cardinality_key16, false) \
+ M(low_cardinality_key32, false) \
+ M(low_cardinality_key64, false) \
+ M(low_cardinality_keys128, false) \
+ M(low_cardinality_keys256, false) \
+ M(low_cardinality_key_string, false) \
+ M(low_cardinality_key_fixed_string, false) \
+ M(low_cardinality_key32_two_level, true) \
+ M(low_cardinality_key64_two_level, true) \
+ M(low_cardinality_keys128_two_level, true) \
+ M(low_cardinality_keys256_two_level, true) \
+ M(low_cardinality_key_string_two_level, true) \
+ M(low_cardinality_key_fixed_string_two_level, true) \
+
+ enum class Type
+ {
+ EMPTY = 0,
+ without_key,
+
+ #define M(NAME, IS_TWO_LEVEL) NAME,
+ APPLY_FOR_AGGREGATED_VARIANTS(M)
+ #undef M
+ };
+ Type type = Type::EMPTY;
+
+ AggregatedDataVariants() : aggregates_pools(1, std::make_shared<Arena>()), aggregates_pool(aggregates_pools.back().get()) {}
+ bool empty() const { return type == Type::EMPTY; }
+ void invalidate() { type = Type::EMPTY; }
+
+ ~AggregatedDataVariants();
+
+ void init(Type type_)
+ {
+ switch (type_)
+ {
+ case Type::EMPTY: break;
+ case Type::without_key: break;
+
+ #define M(NAME, IS_TWO_LEVEL) \
+ case Type::NAME: NAME = std::make_unique<decltype(NAME)::element_type>(); break;
+ APPLY_FOR_AGGREGATED_VARIANTS(M)
+ #undef M
+ }
+
+ type = type_;
+ }
+
+ /// Number of rows (different keys).
+ size_t size() const
+ {
+ switch (type)
+ {
+ case Type::EMPTY: return 0;
+ case Type::without_key: return 1;
+
+ #define M(NAME, IS_TWO_LEVEL) \
+ case Type::NAME: return NAME->data.size() + (without_key != nullptr);
+ APPLY_FOR_AGGREGATED_VARIANTS(M)
+ #undef M
+ }
+
+ __builtin_unreachable();
+ }
+
+ /// The size without taking into account the row in which data is written for the calculation of TOTALS.
+ size_t sizeWithoutOverflowRow() const
+ {
+ switch (type)
+ {
+ case Type::EMPTY: return 0;
+ case Type::without_key: return 1;
+
+ #define M(NAME, IS_TWO_LEVEL) \
+ case Type::NAME: return NAME->data.size();
+ APPLY_FOR_AGGREGATED_VARIANTS(M)
+ #undef M
+ }
+
+ __builtin_unreachable();
+ }
+
+ const char * getMethodName() const
+ {
+ switch (type)
+ {
+ case Type::EMPTY: return "EMPTY";
+ case Type::without_key: return "without_key";
+
+ #define M(NAME, IS_TWO_LEVEL) \
+ case Type::NAME: return #NAME;
+ APPLY_FOR_AGGREGATED_VARIANTS(M)
+ #undef M
+ }
+
+ __builtin_unreachable();
+ }
+
+ bool isTwoLevel() const
+ {
+ switch (type)
+ {
+ case Type::EMPTY: return false;
+ case Type::without_key: return false;
+
+ #define M(NAME, IS_TWO_LEVEL) \
+ case Type::NAME: return IS_TWO_LEVEL;
+ APPLY_FOR_AGGREGATED_VARIANTS(M)
+ #undef M
+ }
+
+ __builtin_unreachable();
+ }
+
+ #define APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \
+ M(key32) \
+ M(key64) \
+ M(key_string) \
+ M(key_fixed_string) \
+ M(keys32) \
+ M(keys64) \
+ M(keys128) \
+ M(keys256) \
+ M(serialized) \
+ M(nullable_keys128) \
+ M(nullable_keys256) \
+ M(low_cardinality_key32) \
+ M(low_cardinality_key64) \
+ M(low_cardinality_keys128) \
+ M(low_cardinality_keys256) \
+ M(low_cardinality_key_string) \
+ M(low_cardinality_key_fixed_string) \
+
+ #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
+ M(key8) \
+ M(key16) \
+ M(keys16) \
+ M(key64_hash64) \
+ M(key_string_hash64)\
+ M(key_fixed_string_hash64) \
+ M(keys128_hash64) \
+ M(keys256_hash64) \
+ M(serialized_hash64) \
+ M(low_cardinality_key8) \
+ M(low_cardinality_key16) \
+
+ #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \
+ APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
+ APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \
+
+ bool isConvertibleToTwoLevel() const
+ {
+ switch (type)
+ {
+ #define M(NAME) \
+ case Type::NAME: return true;
+
+ APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M)
+
+ #undef M
+ default:
+ return false;
+ }
+ }
+
+ void convertToTwoLevel();
+
+ #define APPLY_FOR_VARIANTS_TWO_LEVEL(M) \
+ M(key32_two_level) \
+ M(key64_two_level) \
+ M(key_string_two_level) \
+ M(key_fixed_string_two_level) \
+ M(keys32_two_level) \
+ M(keys64_two_level) \
+ M(keys128_two_level) \
+ M(keys256_two_level) \
+ M(serialized_two_level) \
+ M(nullable_keys128_two_level) \
+ M(nullable_keys256_two_level) \
+ M(low_cardinality_key32_two_level) \
+ M(low_cardinality_key64_two_level) \
+ M(low_cardinality_keys128_two_level) \
+ M(low_cardinality_keys256_two_level) \
+ M(low_cardinality_key_string_two_level) \
+ M(low_cardinality_key_fixed_string_two_level) \
+
+ #define APPLY_FOR_LOW_CARDINALITY_VARIANTS(M) \
+ M(low_cardinality_key8) \
+ M(low_cardinality_key16) \
+ M(low_cardinality_key32) \
+ M(low_cardinality_key64) \
+ M(low_cardinality_keys128) \
+ M(low_cardinality_keys256) \
+ M(low_cardinality_key_string) \
+ M(low_cardinality_key_fixed_string) \
+ M(low_cardinality_key32_two_level) \
+ M(low_cardinality_key64_two_level) \
+ M(low_cardinality_keys128_two_level) \
+ M(low_cardinality_keys256_two_level) \
+ M(low_cardinality_key_string_two_level) \
M(low_cardinality_key_fixed_string_two_level)
-
+
bool isLowCardinality() const
- {
- switch (type)
- {
- #define M(NAME) \
- case Type::NAME: return true;
-
- APPLY_FOR_LOW_CARDINALITY_VARIANTS(M)
- #undef M
- default:
- return false;
- }
- }
-
- static HashMethodContextPtr createCache(Type type, const HashMethodContext::Settings & settings)
- {
- switch (type)
- {
- case Type::without_key: return nullptr;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: \
- { \
- using TPtr ## NAME = decltype(AggregatedDataVariants::NAME); \
- using T ## NAME = typename TPtr ## NAME ::element_type; \
- return T ## NAME ::State::createContext(settings); \
- }
-
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
-
- default:
- throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
- }
- }
-};
-
-using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;
-using ManyAggregatedDataVariants = std::vector<AggregatedDataVariantsPtr>;
-using ManyAggregatedDataVariantsPtr = std::shared_ptr<ManyAggregatedDataVariants>;
-
+ {
+ switch (type)
+ {
+ #define M(NAME) \
+ case Type::NAME: return true;
+
+ APPLY_FOR_LOW_CARDINALITY_VARIANTS(M)
+ #undef M
+ default:
+ return false;
+ }
+ }
+
+ static HashMethodContextPtr createCache(Type type, const HashMethodContext::Settings & settings)
+ {
+ switch (type)
+ {
+ case Type::without_key: return nullptr;
+
+ #define M(NAME, IS_TWO_LEVEL) \
+ case Type::NAME: \
+ { \
+ using TPtr ## NAME = decltype(AggregatedDataVariants::NAME); \
+ using T ## NAME = typename TPtr ## NAME ::element_type; \
+ return T ## NAME ::State::createContext(settings); \
+ }
+
+ APPLY_FOR_AGGREGATED_VARIANTS(M)
+ #undef M
+
+ default:
+ throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
+ }
+ }
+};
+
+using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;
+using ManyAggregatedDataVariants = std::vector<AggregatedDataVariantsPtr>;
+using ManyAggregatedDataVariantsPtr = std::shared_ptr<ManyAggregatedDataVariants>;
+
class CompiledAggregateFunctionsHolder;
-/** How are "total" values calculated with WITH TOTALS?
- * (For more details, see TotalsHavingTransform.)
- *
- * In the absence of group_by_overflow_mode = 'any', the data is aggregated as usual, but the states of the aggregate functions are not finalized.
- * Later, the aggregate function states for all rows (passed through HAVING) are merged into one - this will be TOTALS.
- *
- * If there is group_by_overflow_mode = 'any', the data is aggregated as usual, except for the keys that did not fit in max_rows_to_group_by.
- * For these keys, the data is aggregated into one additional row - see below under the names `overflow_row`, `overflows`...
- * Later, the aggregate function states for all rows (passed through HAVING) are merged into one,
- * also overflow_row is added or not added (depending on the totals_mode setting) also - this will be TOTALS.
- */
-
-
-/** Aggregates the source of the blocks.
- */
+/** How are "total" values calculated with WITH TOTALS?
+ * (For more details, see TotalsHavingTransform.)
+ *
+ * In the absence of group_by_overflow_mode = 'any', the data is aggregated as usual, but the states of the aggregate functions are not finalized.
+ * Later, the aggregate function states for all rows (passed through HAVING) are merged into one - this will be TOTALS.
+ *
+ * If there is group_by_overflow_mode = 'any', the data is aggregated as usual, except for the keys that did not fit in max_rows_to_group_by.
+ * For these keys, the data is aggregated into one additional row - see below under the names `overflow_row`, `overflows`...
+ * Later, the aggregate function states for all rows (passed through HAVING) are merged into one,
+ * also overflow_row is added or not added (depending on the totals_mode setting) also - this will be TOTALS.
+ */
+
+
+/** Aggregates the source of the blocks.
+ */
class Aggregator final
-{
-public:
- struct Params
- {
- /// Data structure of source blocks.
- Block src_header;
- /// Data structure of intermediate blocks before merge.
- Block intermediate_header;
-
- /// What to count.
- const ColumnNumbers keys;
- const AggregateDescriptions aggregates;
- const size_t keys_size;
- const size_t aggregates_size;
-
- /// The settings of approximate calculation of GROUP BY.
- const bool overflow_row; /// Do we need to put into AggregatedDataVariants::without_key aggregates for keys that are not in max_rows_to_group_by.
- const size_t max_rows_to_group_by;
- const OverflowMode group_by_overflow_mode;
-
- /// Two-level aggregation settings (used for a large number of keys).
- /** With how many keys or the size of the aggregation state in bytes,
- * two-level aggregation begins to be used. Enough to reach of at least one of the thresholds.
- * 0 - the corresponding threshold is not specified.
- */
- size_t group_by_two_level_threshold;
- size_t group_by_two_level_threshold_bytes;
-
- /// Settings to flush temporary data to the filesystem (external aggregation).
- const size_t max_bytes_before_external_group_by; /// 0 - do not use external aggregation.
-
- /// Return empty result when aggregating without keys on empty set.
- bool empty_result_for_aggregation_by_empty_set;
-
+{
+public:
+ struct Params
+ {
+ /// Data structure of source blocks.
+ Block src_header;
+ /// Data structure of intermediate blocks before merge.
+ Block intermediate_header;
+
+ /// What to count.
+ const ColumnNumbers keys;
+ const AggregateDescriptions aggregates;
+ const size_t keys_size;
+ const size_t aggregates_size;
+
+ /// The settings of approximate calculation of GROUP BY.
+ const bool overflow_row; /// Do we need to put into AggregatedDataVariants::without_key aggregates for keys that are not in max_rows_to_group_by.
+ const size_t max_rows_to_group_by;
+ const OverflowMode group_by_overflow_mode;
+
+ /// Two-level aggregation settings (used for a large number of keys).
+ /** With how many keys or the size of the aggregation state in bytes,
+ * two-level aggregation begins to be used. Enough to reach of at least one of the thresholds.
+ * 0 - the corresponding threshold is not specified.
+ */
+ size_t group_by_two_level_threshold;
+ size_t group_by_two_level_threshold_bytes;
+
+ /// Settings to flush temporary data to the filesystem (external aggregation).
+ const size_t max_bytes_before_external_group_by; /// 0 - do not use external aggregation.
+
+ /// Return empty result when aggregating without keys on empty set.
+ bool empty_result_for_aggregation_by_empty_set;
+
VolumePtr tmp_volume;
-
- /// Settings is used to determine cache size. No threads are created.
- size_t max_threads;
-
- const size_t min_free_disk_space;
+
+ /// Settings is used to determine cache size. No threads are created.
+ size_t max_threads;
+
+ const size_t min_free_disk_space;
bool compile_aggregate_expressions;
size_t min_count_to_compile_aggregate_expression;
- Params(
- const Block & src_header_,
- const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_,
- bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
- size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_,
- size_t max_bytes_before_external_group_by_,
- bool empty_result_for_aggregation_by_empty_set_,
+ Params(
+ const Block & src_header_,
+ const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_,
+ bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
+ size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_,
+ size_t max_bytes_before_external_group_by_,
+ bool empty_result_for_aggregation_by_empty_set_,
VolumePtr tmp_volume_, size_t max_threads_,
size_t min_free_disk_space_,
bool compile_aggregate_expressions_,
size_t min_count_to_compile_aggregate_expression_,
const Block & intermediate_header_ = {})
- : src_header(src_header_),
+ : src_header(src_header_),
intermediate_header(intermediate_header_),
- keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()),
- overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_),
- group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_),
- max_bytes_before_external_group_by(max_bytes_before_external_group_by_),
- empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_),
- tmp_volume(tmp_volume_), max_threads(max_threads_),
+ keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()),
+ overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_),
+ group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_),
+ max_bytes_before_external_group_by(max_bytes_before_external_group_by_),
+ empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_),
+ tmp_volume(tmp_volume_), max_threads(max_threads_),
min_free_disk_space(min_free_disk_space_),
compile_aggregate_expressions(compile_aggregate_expressions_),
min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
- {
- }
-
- /// Only parameters that matter during merge.
- Params(const Block & intermediate_header_,
- const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_)
+ {
+ }
+
+ /// Only parameters that matter during merge.
+ Params(const Block & intermediate_header_,
+ const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_)
: Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0, false, 0)
- {
- intermediate_header = intermediate_header_;
- }
-
- static Block getHeader(
- const Block & src_header,
- const Block & intermediate_header,
- const ColumnNumbers & keys,
- const AggregateDescriptions & aggregates,
- bool final);
-
- Block getHeader(bool final) const
- {
- return getHeader(src_header, intermediate_header, keys, aggregates, final);
- }
-
- /// Returns keys and aggregated for EXPLAIN query
- void explain(WriteBuffer & out, size_t indent) const;
+ {
+ intermediate_header = intermediate_header_;
+ }
+
+ static Block getHeader(
+ const Block & src_header,
+ const Block & intermediate_header,
+ const ColumnNumbers & keys,
+ const AggregateDescriptions & aggregates,
+ bool final);
+
+ Block getHeader(bool final) const
+ {
+ return getHeader(src_header, intermediate_header, keys, aggregates, final);
+ }
+
+ /// Returns keys and aggregated for EXPLAIN query
+ void explain(WriteBuffer & out, size_t indent) const;
void explain(JSONBuilder::JSONMap & map) const;
- };
-
+ };
+
explicit Aggregator(const Params & params_);
-
- using AggregateColumns = std::vector<ColumnRawPtrs>;
- using AggregateColumnsData = std::vector<ColumnAggregateFunction::Container *>;
- using AggregateColumnsConstData = std::vector<const ColumnAggregateFunction::Container *>;
+
+ using AggregateColumns = std::vector<ColumnRawPtrs>;
+ using AggregateColumnsData = std::vector<ColumnAggregateFunction::Container *>;
+ using AggregateColumnsConstData = std::vector<const ColumnAggregateFunction::Container *>;
using AggregateFunctionsPlainPtrs = std::vector<const IAggregateFunction *>;
-
- /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break').
- bool executeOnBlock(const Block & block, AggregatedDataVariants & result,
- ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block
+
+ /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break').
+ bool executeOnBlock(const Block & block, AggregatedDataVariants & result,
+ ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block
bool & no_more_keys) const;
-
- bool executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result,
- ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block
+
+ bool executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result,
+ ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block
bool & no_more_keys) const;
-
+
/// Used for aggregate projection.
bool mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const;
- /** Convert the aggregation data structure into a block.
- * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block.
- *
- * If final = false, then ColumnAggregateFunction is created as the aggregation columns with the state of the calculations,
- * which can then be combined with other states (for distributed query processing).
- * If final = true, then columns with ready values are created as aggregate columns.
- */
- BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const;
-
- ManyAggregatedDataVariants prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const;
-
- using BucketToBlocks = std::map<Int32, BlocksList>;
- /// Merge partially aggregated blocks separated to buckets into one data structure.
- void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads);
-
- /// Merge several partially aggregated blocks into one.
- /// Precondition: for all blocks block.info.is_overflows flag must be the same.
- /// (either all blocks are from overflow data or none blocks are).
- /// The resulting block has the same value of is_overflows flag.
- Block mergeBlocks(BlocksList & blocks, bool final);
-
- /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used.
- * This is needed to simplify merging of that data with other results, that are already two-level.
- */
+ /** Convert the aggregation data structure into a block.
+ * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block.
+ *
+ * If final = false, then ColumnAggregateFunction is created as the aggregation columns with the state of the calculations,
+ * which can then be combined with other states (for distributed query processing).
+ * If final = true, then columns with ready values are created as aggregate columns.
+ */
+ BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const;
+
+ ManyAggregatedDataVariants prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const;
+
+ using BucketToBlocks = std::map<Int32, BlocksList>;
+ /// Merge partially aggregated blocks separated to buckets into one data structure.
+ void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads);
+
+ /// Merge several partially aggregated blocks into one.
+ /// Precondition: for all blocks block.info.is_overflows flag must be the same.
+ /// (either all blocks are from overflow data or none blocks are).
+ /// The resulting block has the same value of is_overflows flag.
+ Block mergeBlocks(BlocksList & blocks, bool final);
+
+ /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used.
+ * This is needed to simplify merging of that data with other results, that are already two-level.
+ */
std::vector<Block> convertBlockToTwoLevel(const Block & block) const;
-
- /// For external aggregation.
+
+ /// For external aggregation.
void writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path) const;
void writeToTemporaryFile(AggregatedDataVariants & data_variants) const;
-
- bool hasTemporaryFiles() const { return !temporary_files.empty(); }
-
- struct TemporaryFiles
- {
- std::vector<std::unique_ptr<Poco::TemporaryFile>> files;
- size_t sum_size_uncompressed = 0;
- size_t sum_size_compressed = 0;
- mutable std::mutex mutex;
-
- bool empty() const
- {
- std::lock_guard lock(mutex);
- return files.empty();
- }
- };
-
- const TemporaryFiles & getTemporaryFiles() const { return temporary_files; }
-
- /// Get data structure of the result.
- Block getHeader(bool final) const;
-
+
+ bool hasTemporaryFiles() const { return !temporary_files.empty(); }
+
+ struct TemporaryFiles
+ {
+ std::vector<std::unique_ptr<Poco::TemporaryFile>> files;
+ size_t sum_size_uncompressed = 0;
+ size_t sum_size_compressed = 0;
+ mutable std::mutex mutex;
+
+ bool empty() const
+ {
+ std::lock_guard lock(mutex);
+ return files.empty();
+ }
+ };
+
+ const TemporaryFiles & getTemporaryFiles() const { return temporary_files; }
+
+ /// Get data structure of the result.
+ Block getHeader(bool final) const;
+
private:
- friend struct AggregatedDataVariants;
- friend class ConvertingAggregatedToChunksTransform;
- friend class ConvertingAggregatedToChunksSource;
- friend class AggregatingInOrderTransform;
-
- Params params;
-
- AggregatedDataVariants::Type method_chosen;
- Sizes key_sizes;
-
- HashMethodContextPtr aggregation_state_cache;
-
- AggregateFunctionsPlainPtrs aggregate_functions;
-
- /** This array serves two purposes.
- *
+ friend struct AggregatedDataVariants;
+ friend class ConvertingAggregatedToChunksTransform;
+ friend class ConvertingAggregatedToChunksSource;
+ friend class AggregatingInOrderTransform;
+
+ Params params;
+
+ AggregatedDataVariants::Type method_chosen;
+ Sizes key_sizes;
+
+ HashMethodContextPtr aggregation_state_cache;
+
+ AggregateFunctionsPlainPtrs aggregate_functions;
+
+ /** This array serves two purposes.
+ *
* Function arguments are collected side by side, and they do not need to be collected from different places. Also the array is made zero-terminated.
- * The inner loop (for the case without_key) is almost twice as compact; performance gain of about 30%.
- */
- struct AggregateFunctionInstruction
- {
+ * The inner loop (for the case without_key) is almost twice as compact; performance gain of about 30%.
+ */
+ struct AggregateFunctionInstruction
+ {
const IAggregateFunction * that{};
size_t state_offset{};
const IColumn ** arguments{};
const IAggregateFunction * batch_that{};
const IColumn ** batch_arguments{};
const UInt64 * offsets{};
- };
-
- using AggregateFunctionInstructions = std::vector<AggregateFunctionInstruction>;
- using NestedColumnsHolder = std::vector<std::vector<const IColumn *>>;
-
- Sizes offsets_of_aggregate_states; /// The offset to the n-th aggregate function in a row of aggregate functions.
- size_t total_size_of_aggregate_states = 0; /// The total size of the row from the aggregate functions.
-
- // add info to track alignment requirement
- // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
- size_t align_aggregate_states = 1;
-
- bool all_aggregates_has_trivial_destructor = false;
-
- /// How many RAM were used to process the query before processing the first block.
- Int64 memory_usage_before_aggregation = 0;
-
- Poco::Logger * log = &Poco::Logger::get("Aggregator");
-
- /// For external aggregation.
+ };
+
+ using AggregateFunctionInstructions = std::vector<AggregateFunctionInstruction>;
+ using NestedColumnsHolder = std::vector<std::vector<const IColumn *>>;
+
+ Sizes offsets_of_aggregate_states; /// The offset to the n-th aggregate function in a row of aggregate functions.
+ size_t total_size_of_aggregate_states = 0; /// The total size of the row from the aggregate functions.
+
+ // add info to track alignment requirement
+ // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
+ size_t align_aggregate_states = 1;
+
+ bool all_aggregates_has_trivial_destructor = false;
+
+ /// How many RAM were used to process the query before processing the first block.
+ Int64 memory_usage_before_aggregation = 0;
+
+ Poco::Logger * log = &Poco::Logger::get("Aggregator");
+
+ /// For external aggregation.
mutable TemporaryFiles temporary_files;
-
+
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledAggregateFunctionsHolder> compiled_aggregate_functions_holder;
#endif
@@ -1096,240 +1096,240 @@ private:
*/
void compileAggregateFunctionsIfNeeded();
- /** Select the aggregation method based on the number and types of keys. */
- AggregatedDataVariants::Type chooseAggregationMethod();
-
- /** Create states of aggregate functions for one key.
- */
+ /** Select the aggregation method based on the number and types of keys. */
+ AggregatedDataVariants::Type chooseAggregationMethod();
+
+ /** Create states of aggregate functions for one key.
+ */
template <bool skip_compiled_aggregate_functions = false>
- void createAggregateStates(AggregateDataPtr & aggregate_data) const;
-
- /** Call `destroy` methods for states of aggregate functions.
- * Used in the exception handler for aggregation, since RAII in this case is not applicable.
- */
+ void createAggregateStates(AggregateDataPtr & aggregate_data) const;
+
+ /** Call `destroy` methods for states of aggregate functions.
+ * Used in the exception handler for aggregation, since RAII in this case is not applicable.
+ */
void destroyAllAggregateStates(AggregatedDataVariants & result) const;
-
-
- /// Process one data block, aggregate the data into a hash table.
- template <typename Method>
- void executeImpl(
- Method & method,
- Arena * aggregates_pool,
- size_t rows,
- ColumnRawPtrs & key_columns,
- AggregateFunctionInstruction * aggregate_instructions,
- bool no_more_keys,
- AggregateDataPtr overflow_row) const;
-
- /// Specialization for a particular value no_more_keys.
+
+
+ /// Process one data block, aggregate the data into a hash table.
+ template <typename Method>
+ void executeImpl(
+ Method & method,
+ Arena * aggregates_pool,
+ size_t rows,
+ ColumnRawPtrs & key_columns,
+ AggregateFunctionInstruction * aggregate_instructions,
+ bool no_more_keys,
+ AggregateDataPtr overflow_row) const;
+
+ /// Specialization for a particular value no_more_keys.
template <bool no_more_keys, bool use_compiled_functions, typename Method>
void executeImplBatch(
- Method & method,
- typename Method::State & state,
- Arena * aggregates_pool,
- size_t rows,
- AggregateFunctionInstruction * aggregate_instructions,
- AggregateDataPtr overflow_row) const;
-
- /// For case when there are no keys (all aggregate into one row).
+ Method & method,
+ typename Method::State & state,
+ Arena * aggregates_pool,
+ size_t rows,
+ AggregateFunctionInstruction * aggregate_instructions,
+ AggregateDataPtr overflow_row) const;
+
+ /// For case when there are no keys (all aggregate into one row).
template <bool use_compiled_functions>
void executeWithoutKeyImpl(
- AggregatedDataWithoutKey & res,
- size_t rows,
- AggregateFunctionInstruction * aggregate_instructions,
+ AggregatedDataWithoutKey & res,
+ size_t rows,
+ AggregateFunctionInstruction * aggregate_instructions,
Arena * arena) const;
-
- static void executeOnIntervalWithoutKeyImpl(
- AggregatedDataWithoutKey & res,
- size_t row_begin,
- size_t row_end,
- AggregateFunctionInstruction * aggregate_instructions,
- Arena * arena);
-
- template <typename Method>
- void writeToTemporaryFileImpl(
- AggregatedDataVariants & data_variants,
- Method & method,
+
+ static void executeOnIntervalWithoutKeyImpl(
+ AggregatedDataWithoutKey & res,
+ size_t row_begin,
+ size_t row_end,
+ AggregateFunctionInstruction * aggregate_instructions,
+ Arena * arena);
+
+ template <typename Method>
+ void writeToTemporaryFileImpl(
+ AggregatedDataVariants & data_variants,
+ Method & method,
IBlockOutputStream & out) const;
-
- /// Merge NULL key data from hash table `src` into `dst`.
- template <typename Method, typename Table>
- void mergeDataNullKey(
- Table & table_dst,
- Table & table_src,
- Arena * arena) const;
-
- /// Merge data from hash table `src` into `dst`.
+
+ /// Merge NULL key data from hash table `src` into `dst`.
+ template <typename Method, typename Table>
+ void mergeDataNullKey(
+ Table & table_dst,
+ Table & table_src,
+ Arena * arena) const;
+
+ /// Merge data from hash table `src` into `dst`.
template <typename Method, bool use_compiled_functions, typename Table>
- void mergeDataImpl(
- Table & table_dst,
- Table & table_src,
- Arena * arena) const;
-
- /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`.
- template <typename Method, typename Table>
- void mergeDataNoMoreKeysImpl(
- Table & table_dst,
- AggregatedDataWithoutKey & overflows,
- Table & table_src,
- Arena * arena) const;
-
- /// Same, but ignores the rest of the keys.
- template <typename Method, typename Table>
- void mergeDataOnlyExistingKeysImpl(
- Table & table_dst,
- Table & table_src,
- Arena * arena) const;
-
- void mergeWithoutKeyDataImpl(
- ManyAggregatedDataVariants & non_empty_data) const;
-
- template <typename Method>
- void mergeSingleLevelDataImpl(
- ManyAggregatedDataVariants & non_empty_data) const;
-
- template <typename Method, typename Table>
- void convertToBlockImpl(
- Method & method,
- Table & data,
- MutableColumns & key_columns,
- AggregateColumnsData & aggregate_columns,
- MutableColumns & final_aggregate_columns,
- Arena * arena,
- bool final) const;
-
- template <typename Mapped>
- void insertAggregatesIntoColumns(
- Mapped & mapped,
- MutableColumns & final_aggregate_columns,
- Arena * arena) const;
-
+ void mergeDataImpl(
+ Table & table_dst,
+ Table & table_src,
+ Arena * arena) const;
+
+ /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`.
+ template <typename Method, typename Table>
+ void mergeDataNoMoreKeysImpl(
+ Table & table_dst,
+ AggregatedDataWithoutKey & overflows,
+ Table & table_src,
+ Arena * arena) const;
+
+ /// Same, but ignores the rest of the keys.
+ template <typename Method, typename Table>
+ void mergeDataOnlyExistingKeysImpl(
+ Table & table_dst,
+ Table & table_src,
+ Arena * arena) const;
+
+ void mergeWithoutKeyDataImpl(
+ ManyAggregatedDataVariants & non_empty_data) const;
+
+ template <typename Method>
+ void mergeSingleLevelDataImpl(
+ ManyAggregatedDataVariants & non_empty_data) const;
+
+ template <typename Method, typename Table>
+ void convertToBlockImpl(
+ Method & method,
+ Table & data,
+ MutableColumns & key_columns,
+ AggregateColumnsData & aggregate_columns,
+ MutableColumns & final_aggregate_columns,
+ Arena * arena,
+ bool final) const;
+
+ template <typename Mapped>
+ void insertAggregatesIntoColumns(
+ Mapped & mapped,
+ MutableColumns & final_aggregate_columns,
+ Arena * arena) const;
+
template <typename Method, bool use_compiled_functions, typename Table>
- void convertToBlockImplFinal(
- Method & method,
- Table & data,
+ void convertToBlockImplFinal(
+ Method & method,
+ Table & data,
std::vector<IColumn *> key_columns,
- MutableColumns & final_aggregate_columns,
- Arena * arena) const;
-
- template <typename Method, typename Table>
- void convertToBlockImplNotFinal(
- Method & method,
- Table & data,
+ MutableColumns & final_aggregate_columns,
+ Arena * arena) const;
+
+ template <typename Method, typename Table>
+ void convertToBlockImplNotFinal(
+ Method & method,
+ Table & data,
std::vector<IColumn *> key_columns,
- AggregateColumnsData & aggregate_columns) const;
-
- template <typename Filler>
- Block prepareBlockAndFill(
- AggregatedDataVariants & data_variants,
- bool final,
- size_t rows,
- Filler && filler) const;
-
- template <typename Method>
- Block convertOneBucketToBlock(
- AggregatedDataVariants & data_variants,
- Method & method,
+ AggregateColumnsData & aggregate_columns) const;
+
+ template <typename Filler>
+ Block prepareBlockAndFill(
+ AggregatedDataVariants & data_variants,
+ bool final,
+ size_t rows,
+ Filler && filler) const;
+
+ template <typename Method>
+ Block convertOneBucketToBlock(
+ AggregatedDataVariants & data_variants,
+ Method & method,
Arena * arena,
- bool final,
- size_t bucket) const;
-
- Block mergeAndConvertOneBucketToBlock(
- ManyAggregatedDataVariants & variants,
- Arena * arena,
- bool final,
- size_t bucket,
- std::atomic<bool> * is_cancelled = nullptr) const;
-
- Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const;
- Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const;
- BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const;
-
- template <typename Method>
- BlocksList prepareBlocksAndFillTwoLevelImpl(
- AggregatedDataVariants & data_variants,
- Method & method,
- bool final,
- ThreadPool * thread_pool) const;
-
- template <bool no_more_keys, typename Method, typename Table>
- void mergeStreamsImplCase(
- Block & block,
- Arena * aggregates_pool,
- Method & method,
- Table & data,
- AggregateDataPtr overflow_row) const;
-
- template <typename Method, typename Table>
- void mergeStreamsImpl(
- Block & block,
- Arena * aggregates_pool,
- Method & method,
- Table & data,
- AggregateDataPtr overflow_row,
- bool no_more_keys) const;
-
- void mergeWithoutKeyStreamsImpl(
- Block & block,
- AggregatedDataVariants & result) const;
-
- template <typename Method>
- void mergeBucketImpl(
- ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic<bool> * is_cancelled = nullptr) const;
-
- template <typename Method>
- void convertBlockToTwoLevelImpl(
- Method & method,
- Arena * pool,
- ColumnRawPtrs & key_columns,
- const Block & source,
- std::vector<Block> & destinations) const;
-
- template <typename Method, typename Table>
- void destroyImpl(Table & table) const;
-
- void destroyWithoutKey(
- AggregatedDataVariants & result) const;
-
-
- /** Checks constraints on the maximum number of keys for aggregation.
- * If it is exceeded, then, depending on the group_by_overflow_mode, either
- * - throws an exception;
- * - returns false, which means that execution must be aborted;
- * - sets the variable no_more_keys to true.
- */
- bool checkLimits(size_t result_size, bool & no_more_keys) const;
-
- void prepareAggregateInstructions(
- Columns columns,
- AggregateColumns & aggregate_columns,
- Columns & materialized_columns,
- AggregateFunctionInstructions & instructions,
+ bool final,
+ size_t bucket) const;
+
+ Block mergeAndConvertOneBucketToBlock(
+ ManyAggregatedDataVariants & variants,
+ Arena * arena,
+ bool final,
+ size_t bucket,
+ std::atomic<bool> * is_cancelled = nullptr) const;
+
+ Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const;
+ Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const;
+ BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const;
+
+ template <typename Method>
+ BlocksList prepareBlocksAndFillTwoLevelImpl(
+ AggregatedDataVariants & data_variants,
+ Method & method,
+ bool final,
+ ThreadPool * thread_pool) const;
+
+ template <bool no_more_keys, typename Method, typename Table>
+ void mergeStreamsImplCase(
+ Block & block,
+ Arena * aggregates_pool,
+ Method & method,
+ Table & data,
+ AggregateDataPtr overflow_row) const;
+
+ template <typename Method, typename Table>
+ void mergeStreamsImpl(
+ Block & block,
+ Arena * aggregates_pool,
+ Method & method,
+ Table & data,
+ AggregateDataPtr overflow_row,
+ bool no_more_keys) const;
+
+ void mergeWithoutKeyStreamsImpl(
+ Block & block,
+ AggregatedDataVariants & result) const;
+
+ template <typename Method>
+ void mergeBucketImpl(
+ ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic<bool> * is_cancelled = nullptr) const;
+
+ template <typename Method>
+ void convertBlockToTwoLevelImpl(
+ Method & method,
+ Arena * pool,
+ ColumnRawPtrs & key_columns,
+ const Block & source,
+ std::vector<Block> & destinations) const;
+
+ template <typename Method, typename Table>
+ void destroyImpl(Table & table) const;
+
+ void destroyWithoutKey(
+ AggregatedDataVariants & result) const;
+
+
+ /** Checks constraints on the maximum number of keys for aggregation.
+ * If it is exceeded, then, depending on the group_by_overflow_mode, either
+ * - throws an exception;
+ * - returns false, which means that execution must be aborted;
+ * - sets the variable no_more_keys to true.
+ */
+ bool checkLimits(size_t result_size, bool & no_more_keys) const;
+
+ void prepareAggregateInstructions(
+ Columns columns,
+ AggregateColumns & aggregate_columns,
+ Columns & materialized_columns,
+ AggregateFunctionInstructions & instructions,
NestedColumnsHolder & nested_columns_holder) const;
-
+
void addSingleKeyToAggregateColumns(
const AggregatedDataVariants & data_variants,
MutableColumns & aggregate_columns) const;
-
+
void addArenasToAggregateColumns(
const AggregatedDataVariants & data_variants,
MutableColumns & aggregate_columns) const;
- void createStatesAndFillKeyColumnsWithSingleKey(
- AggregatedDataVariants & data_variants,
- Columns & key_columns, size_t key_row,
+ void createStatesAndFillKeyColumnsWithSingleKey(
+ AggregatedDataVariants & data_variants,
+ Columns & key_columns, size_t key_row,
MutableColumns & final_key_columns) const;
-};
-
-
-/** Get the aggregation variant by its type. */
-template <typename Method> Method & getDataVariant(AggregatedDataVariants & variants);
-
-#define M(NAME, IS_TWO_LEVEL) \
- template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant<decltype(AggregatedDataVariants::NAME)::element_type>(AggregatedDataVariants & variants) { return *variants.NAME; }
-
-APPLY_FOR_AGGREGATED_VARIANTS(M)
-
-#undef M
-
-}
+};
+
+
+/** Get the aggregation variant by its type. */
+template <typename Method> Method & getDataVariant(AggregatedDataVariants & variants);
+
+#define M(NAME, IS_TWO_LEVEL) \
+ template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant<decltype(AggregatedDataVariants::NAME)::element_type>(AggregatedDataVariants & variants) { return *variants.NAME; }
+
+APPLY_FOR_AGGREGATED_VARIANTS(M)
+
+#undef M
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp
index 3591303893..f1329573c8 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ClientInfo.cpp
@@ -1,71 +1,71 @@
-#include <Interpreters/ClientInfo.h>
-#include <IO/ReadBuffer.h>
-#include <IO/WriteBuffer.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <Core/Defines.h>
-#include <common/getFQDNOrHostName.h>
-#include <unistd.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-
-void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) const
-{
- if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
- throw Exception("Logical error: method ClientInfo::write is called for unsupported server revision", ErrorCodes::LOGICAL_ERROR);
-
- writeBinary(UInt8(query_kind), out);
- if (empty())
- return;
-
- writeBinary(initial_user, out);
- writeBinary(initial_query_id, out);
- writeBinary(initial_address.toString(), out);
-
+#include <Interpreters/ClientInfo.h>
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Core/Defines.h>
+#include <common/getFQDNOrHostName.h>
+#include <unistd.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+
+void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) const
+{
+ if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
+ throw Exception("Logical error: method ClientInfo::write is called for unsupported server revision", ErrorCodes::LOGICAL_ERROR);
+
+ writeBinary(UInt8(query_kind), out);
+ if (empty())
+ return;
+
+ writeBinary(initial_user, out);
+ writeBinary(initial_query_id, out);
+ writeBinary(initial_address.toString(), out);
+
if (server_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME)
writeBinary(initial_query_start_time_microseconds, out);
- writeBinary(UInt8(interface), out);
-
- if (interface == Interface::TCP)
- {
- writeBinary(os_user, out);
- writeBinary(client_hostname, out);
- writeBinary(client_name, out);
- writeVarUInt(client_version_major, out);
- writeVarUInt(client_version_minor, out);
+ writeBinary(UInt8(interface), out);
+
+ if (interface == Interface::TCP)
+ {
+ writeBinary(os_user, out);
+ writeBinary(client_hostname, out);
+ writeBinary(client_name, out);
+ writeVarUInt(client_version_major, out);
+ writeVarUInt(client_version_minor, out);
writeVarUInt(client_tcp_protocol_version, out);
- }
- else if (interface == Interface::HTTP)
- {
- writeBinary(UInt8(http_method), out);
- writeBinary(http_user_agent, out);
+ }
+ else if (interface == Interface::HTTP)
+ {
+ writeBinary(UInt8(http_method), out);
+ writeBinary(http_user_agent, out);
if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_X_FORWARDED_FOR_IN_CLIENT_INFO)
writeBinary(forwarded_for, out);
if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_REFERER_IN_CLIENT_INFO)
writeBinary(http_referer, out);
- }
-
- if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO)
- writeBinary(quota_key, out);
-
+ }
+
+ if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO)
+ writeBinary(quota_key, out);
+
if (server_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH)
writeVarUInt(distributed_depth, out);
- if (interface == Interface::TCP)
- {
- if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH)
- writeVarUInt(client_version_patch, out);
- }
+ if (interface == Interface::TCP)
+ {
+ if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH)
+ writeVarUInt(client_version_patch, out);
+ }
if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY)
{
@@ -86,74 +86,74 @@ void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision)
writeBinary(uint8_t(0), out);
}
}
-}
-
-
-void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision)
-{
- if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
- throw Exception("Logical error: method ClientInfo::read is called for unsupported client revision", ErrorCodes::LOGICAL_ERROR);
-
- UInt8 read_query_kind = 0;
- readBinary(read_query_kind, in);
- query_kind = QueryKind(read_query_kind);
- if (empty())
- return;
-
- readBinary(initial_user, in);
- readBinary(initial_query_id, in);
-
- String initial_address_string;
- readBinary(initial_address_string, in);
- initial_address = Poco::Net::SocketAddress(initial_address_string);
-
+}
+
+
+void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision)
+{
+ if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
+ throw Exception("Logical error: method ClientInfo::read is called for unsupported client revision", ErrorCodes::LOGICAL_ERROR);
+
+ UInt8 read_query_kind = 0;
+ readBinary(read_query_kind, in);
+ query_kind = QueryKind(read_query_kind);
+ if (empty())
+ return;
+
+ readBinary(initial_user, in);
+ readBinary(initial_query_id, in);
+
+ String initial_address_string;
+ readBinary(initial_address_string, in);
+ initial_address = Poco::Net::SocketAddress(initial_address_string);
+
if (client_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME)
{
readBinary(initial_query_start_time_microseconds, in);
initial_query_start_time = initial_query_start_time_microseconds / 1000000;
}
- UInt8 read_interface = 0;
- readBinary(read_interface, in);
- interface = Interface(read_interface);
-
- if (interface == Interface::TCP)
- {
- readBinary(os_user, in);
- readBinary(client_hostname, in);
- readBinary(client_name, in);
- readVarUInt(client_version_major, in);
- readVarUInt(client_version_minor, in);
+ UInt8 read_interface = 0;
+ readBinary(read_interface, in);
+ interface = Interface(read_interface);
+
+ if (interface == Interface::TCP)
+ {
+ readBinary(os_user, in);
+ readBinary(client_hostname, in);
+ readBinary(client_name, in);
+ readVarUInt(client_version_major, in);
+ readVarUInt(client_version_minor, in);
readVarUInt(client_tcp_protocol_version, in);
- }
- else if (interface == Interface::HTTP)
- {
- UInt8 read_http_method = 0;
- readBinary(read_http_method, in);
- http_method = HTTPMethod(read_http_method);
-
- readBinary(http_user_agent, in);
+ }
+ else if (interface == Interface::HTTP)
+ {
+ UInt8 read_http_method = 0;
+ readBinary(read_http_method, in);
+ http_method = HTTPMethod(read_http_method);
+
+ readBinary(http_user_agent, in);
if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_X_FORWARDED_FOR_IN_CLIENT_INFO)
readBinary(forwarded_for, in);
if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_REFERER_IN_CLIENT_INFO)
readBinary(http_referer, in);
- }
-
- if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO)
- readBinary(quota_key, in);
-
+ }
+
+ if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO)
+ readBinary(quota_key, in);
+
if (client_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH)
readVarUInt(distributed_depth, in);
- if (interface == Interface::TCP)
- {
- if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH)
- readVarUInt(client_version_patch, in);
- else
+ if (interface == Interface::TCP)
+ {
+ if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH)
+ readVarUInt(client_version_patch, in);
+ else
client_version_patch = client_tcp_protocol_version;
- }
+ }
if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY)
{
@@ -167,32 +167,32 @@ void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision)
readBinary(client_trace_context.trace_flags, in);
}
}
-}
-
-
-void ClientInfo::setInitialQuery()
-{
- query_kind = QueryKind::INITIAL_QUERY;
- fillOSUserHostNameAndVersionInfo();
+}
+
+
+void ClientInfo::setInitialQuery()
+{
+ query_kind = QueryKind::INITIAL_QUERY;
+ fillOSUserHostNameAndVersionInfo();
client_name = "ClickHouse " + client_name;
-}
-
-
-void ClientInfo::fillOSUserHostNameAndVersionInfo()
-{
- os_user.resize(256, '\0');
- if (0 == getlogin_r(os_user.data(), os_user.size() - 1))
- os_user.resize(strlen(os_user.c_str()));
- else
- os_user.clear(); /// Don't mind if we cannot determine user login.
-
- client_hostname = getFQDNOrHostName();
-
- client_version_major = DBMS_VERSION_MAJOR;
- client_version_minor = DBMS_VERSION_MINOR;
- client_version_patch = DBMS_VERSION_PATCH;
+}
+
+
+void ClientInfo::fillOSUserHostNameAndVersionInfo()
+{
+ os_user.resize(256, '\0');
+ if (0 == getlogin_r(os_user.data(), os_user.size() - 1))
+ os_user.resize(strlen(os_user.c_str()));
+ else
+ os_user.clear(); /// Don't mind if we cannot determine user login.
+
+ client_hostname = getFQDNOrHostName();
+
+ client_version_major = DBMS_VERSION_MAJOR;
+ client_version_minor = DBMS_VERSION_MINOR;
+ client_version_patch = DBMS_VERSION_PATCH;
client_tcp_protocol_version = DBMS_TCP_PROTOCOL_VERSION;
-}
-
-
-}
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp
index e5959273f0..7259c8a456 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp
@@ -1,87 +1,87 @@
-#include <Interpreters/Cluster.h>
-#include <common/SimpleCache.h>
-#include <Common/DNSResolver.h>
-#include <Common/escapeForFileName.h>
-#include <Common/isLocalAddress.h>
-#include <Common/parseAddress.h>
+#include <Interpreters/Cluster.h>
+#include <common/SimpleCache.h>
+#include <Common/DNSResolver.h>
+#include <Common/escapeForFileName.h>
+#include <Common/isLocalAddress.h>
+#include <Common/parseAddress.h>
#include <Common/Config/AbstractConfigurationComparison.h>
#include <Core/Settings.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <Poco/Util/Application.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Poco/Util/Application.h>
#include <common/range.h>
#include <boost/range/algorithm_ext/erase.hpp>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int UNKNOWN_ELEMENT_IN_CONFIG;
- extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
- extern const int LOGICAL_ERROR;
- extern const int SHARD_HAS_NO_CONNECTIONS;
- extern const int SYNTAX_ERROR;
-}
-
-namespace
-{
-
-/// Default shard weight.
-constexpr UInt32 default_weight = 1;
-
-inline bool isLocalImpl(const Cluster::Address & address, const Poco::Net::SocketAddress & resolved_address, UInt16 clickhouse_port)
-{
- /// If there is replica, for which:
- /// - its port is the same that the server is listening;
- /// - its host is resolved to set of addresses, one of which is the same as one of addresses of network interfaces of the server machine*;
- /// then we must go to this shard without any inter-process communication.
- ///
- /// * - this criteria is somewhat approximate.
- ///
- /// Also, replica is considered non-local, if it has default database set
- /// (only reason is to avoid query rewrite).
-
- return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port);
-}
-
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int UNKNOWN_ELEMENT_IN_CONFIG;
+ extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
+ extern const int LOGICAL_ERROR;
+ extern const int SHARD_HAS_NO_CONNECTIONS;
+ extern const int SYNTAX_ERROR;
+}
+
+namespace
+{
+
+/// Default shard weight.
+constexpr UInt32 default_weight = 1;
+
+inline bool isLocalImpl(const Cluster::Address & address, const Poco::Net::SocketAddress & resolved_address, UInt16 clickhouse_port)
+{
+ /// If there is replica, for which:
+ /// - its port is the same that the server is listening;
+ /// - its host is resolved to set of addresses, one of which is the same as one of addresses of network interfaces of the server machine*;
+ /// then we must go to this shard without any inter-process communication.
+ ///
+ /// * - this criteria is somewhat approximate.
+ ///
+ /// Also, replica is considered non-local, if it has default database set
+ /// (only reason is to avoid query rewrite).
+
+ return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port);
+}
+
void concatInsertPath(std::string & insert_path, const std::string & dir_name)
{
if (insert_path.empty())
insert_path = dir_name;
else
insert_path += "," + dir_name;
+}
+
}
-}
-
-/// Implementation of Cluster::Address class
-
-std::optional<Poco::Net::SocketAddress> Cluster::Address::getResolvedAddress() const
-{
- try
- {
- return DNSResolver::instance().resolveAddress(host_name, port);
- }
- catch (...)
- {
- /// Failure in DNS resolution in cluster initialization is Ok.
- tryLogCurrentException("Cluster");
- return {};
- }
-}
-
-
-bool Cluster::Address::isLocal(UInt16 clickhouse_port) const
-{
- if (auto resolved = getResolvedAddress())
- return isLocalImpl(*this, *resolved, clickhouse_port);
- return false;
-}
-
-
-Cluster::Address::Address(
+/// Implementation of Cluster::Address class
+
+std::optional<Poco::Net::SocketAddress> Cluster::Address::getResolvedAddress() const
+{
+ try
+ {
+ return DNSResolver::instance().resolveAddress(host_name, port);
+ }
+ catch (...)
+ {
+ /// Failure in DNS resolution in cluster initialization is Ok.
+ tryLogCurrentException("Cluster");
+ return {};
+ }
+}
+
+
+bool Cluster::Address::isLocal(UInt16 clickhouse_port) const
+{
+ if (auto resolved = getResolvedAddress())
+ return isLocalImpl(*this, *resolved, clickhouse_port);
+ return false;
+}
+
+
+Cluster::Address::Address(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const String & cluster_,
@@ -92,27 +92,27 @@ Cluster::Address::Address(
, cluster_secret(cluster_secret_)
, shard_index(shard_index_)
, replica_index(replica_index_)
-{
- host_name = config.getString(config_prefix + ".host");
- port = static_cast<UInt16>(config.getInt(config_prefix + ".port"));
- if (config.has(config_prefix + ".user"))
- user_specified = true;
-
- user = config.getString(config_prefix + ".user", "default");
- password = config.getString(config_prefix + ".password", "");
- default_database = config.getString(config_prefix + ".default_database", "");
- secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
- priority = config.getInt(config_prefix + ".priority", 1);
- const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
- is_local = isLocal(config.getInt(port_type, 0));
+{
+ host_name = config.getString(config_prefix + ".host");
+ port = static_cast<UInt16>(config.getInt(config_prefix + ".port"));
+ if (config.has(config_prefix + ".user"))
+ user_specified = true;
+
+ user = config.getString(config_prefix + ".user", "default");
+ password = config.getString(config_prefix + ".password", "");
+ default_database = config.getString(config_prefix + ".default_database", "");
+ secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
+ priority = config.getInt(config_prefix + ".priority", 1);
+ const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
+ is_local = isLocal(config.getInt(port_type, 0));
/// By default compression is disabled if address looks like localhost.
/// NOTE: it's still enabled when interacting with servers on different port, but we don't want to complicate the logic.
compression = config.getBool(config_prefix + ".compression", !is_local)
? Protocol::Compression::Enable : Protocol::Compression::Disable;
-}
-
-
+}
+
+
Cluster::Address::Address(
const String & host_port_,
const String & user_,
@@ -124,7 +124,7 @@ Cluster::Address::Address(
UInt32 shard_index_,
UInt32 replica_index_)
: user(user_), password(password_)
-{
+{
bool can_be_local = true;
std::pair<std::string, UInt16> parsed_host_port;
if (!treat_local_port_as_remote)
@@ -147,158 +147,158 @@ Cluster::Address::Address(
parsed_host_port = parseAddress(host_port_, clickhouse_port);
}
}
- host_name = parsed_host_port.first;
- port = parsed_host_port.second;
- secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
- priority = priority_;
+ host_name = parsed_host_port.first;
+ port = parsed_host_port.second;
+ secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
+ priority = priority_;
is_local = can_be_local && isLocal(clickhouse_port);
shard_index = shard_index_;
replica_index = replica_index_;
-}
-
-
-String Cluster::Address::toString() const
-{
- return toString(host_name, port);
-}
-
-String Cluster::Address::toString(const String & host_name, UInt16 port)
-{
- return escapeForFileName(host_name) + ':' + DB::toString(port);
-}
-
-String Cluster::Address::readableString() const
-{
- String res;
-
- /// If it looks like IPv6 address add braces to avoid ambiguity in ipv6_host:port notation
- if (host_name.find_first_of(':') != std::string::npos && !host_name.empty() && host_name.back() != ']')
- res += '[' + host_name + ']';
- else
- res += host_name;
-
- res += ':' + DB::toString(port);
- return res;
-}
-
-std::pair<String, UInt16> Cluster::Address::fromString(const String & host_port_string)
-{
- auto pos = host_port_string.find_last_of(':');
- if (pos == std::string::npos)
- throw Exception("Incorrect <host>:<port> format " + host_port_string, ErrorCodes::SYNTAX_ERROR);
-
- return {unescapeForFileName(host_port_string.substr(0, pos)), parse<UInt16>(host_port_string.substr(pos + 1))};
-}
-
-
-String Cluster::Address::toFullString(bool use_compact_format) const
-{
- if (use_compact_format)
- {
- if (shard_index == 0 || replica_index == 0)
- // shard_num/replica_num like in system.clusters table
- throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR);
-
+}
+
+
+String Cluster::Address::toString() const
+{
+ return toString(host_name, port);
+}
+
+String Cluster::Address::toString(const String & host_name, UInt16 port)
+{
+ return escapeForFileName(host_name) + ':' + DB::toString(port);
+}
+
+String Cluster::Address::readableString() const
+{
+ String res;
+
+ /// If it looks like IPv6 address add braces to avoid ambiguity in ipv6_host:port notation
+ if (host_name.find_first_of(':') != std::string::npos && !host_name.empty() && host_name.back() != ']')
+ res += '[' + host_name + ']';
+ else
+ res += host_name;
+
+ res += ':' + DB::toString(port);
+ return res;
+}
+
+std::pair<String, UInt16> Cluster::Address::fromString(const String & host_port_string)
+{
+ auto pos = host_port_string.find_last_of(':');
+ if (pos == std::string::npos)
+ throw Exception("Incorrect <host>:<port> format " + host_port_string, ErrorCodes::SYNTAX_ERROR);
+
+ return {unescapeForFileName(host_port_string.substr(0, pos)), parse<UInt16>(host_port_string.substr(pos + 1))};
+}
+
+
+String Cluster::Address::toFullString(bool use_compact_format) const
+{
+ if (use_compact_format)
+ {
+ if (shard_index == 0 || replica_index == 0)
+ // shard_num/replica_num like in system.clusters table
+ throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR);
+
return fmt::format("shard{}_replica{}", shard_index, replica_index);
- }
- else
- {
- return
- escapeForFileName(user)
- + (password.empty() ? "" : (':' + escapeForFileName(password))) + '@'
- + escapeForFileName(host_name) + ':' + std::to_string(port)
- + (default_database.empty() ? "" : ('#' + escapeForFileName(default_database)))
- + ((secure == Protocol::Secure::Enable) ? "+secure" : "");
- }
-}
-
-Cluster::Address Cluster::Address::fromFullString(const String & full_string)
-{
- const char * address_begin = full_string.data();
- const char * address_end = address_begin + full_string.size();
-
- const char * user_pw_end = strchr(full_string.data(), '@');
-
+ }
+ else
+ {
+ return
+ escapeForFileName(user)
+ + (password.empty() ? "" : (':' + escapeForFileName(password))) + '@'
+ + escapeForFileName(host_name) + ':' + std::to_string(port)
+ + (default_database.empty() ? "" : ('#' + escapeForFileName(default_database)))
+ + ((secure == Protocol::Secure::Enable) ? "+secure" : "");
+ }
+}
+
+Cluster::Address Cluster::Address::fromFullString(const String & full_string)
+{
+ const char * address_begin = full_string.data();
+ const char * address_end = address_begin + full_string.size();
+
+ const char * user_pw_end = strchr(full_string.data(), '@');
+
/// parsing with the new shard{shard_index}[_replica{replica_index}] format
if (!user_pw_end && full_string.starts_with("shard"))
- {
- const char * underscore = strchr(full_string.data(), '_');
-
- Address address;
- address.shard_index = parse<UInt32>(address_begin + strlen("shard"));
- address.replica_index = underscore ? parse<UInt32>(underscore + strlen("_replica")) : 0;
-
- return address;
- }
- else
- {
- /// parsing with the old user[:password]@host:port#default_database format
- /// This format is appeared to be inconvenient for the following reasons:
- /// - credentials are exposed in file name;
- /// - the file name can be too long.
-
- Protocol::Secure secure = Protocol::Secure::Disable;
- const char * secure_tag = "+secure";
+ {
+ const char * underscore = strchr(full_string.data(), '_');
+
+ Address address;
+ address.shard_index = parse<UInt32>(address_begin + strlen("shard"));
+ address.replica_index = underscore ? parse<UInt32>(underscore + strlen("_replica")) : 0;
+
+ return address;
+ }
+ else
+ {
+ /// parsing with the old user[:password]@host:port#default_database format
+ /// This format is appeared to be inconvenient for the following reasons:
+ /// - credentials are exposed in file name;
+ /// - the file name can be too long.
+
+ Protocol::Secure secure = Protocol::Secure::Disable;
+ const char * secure_tag = "+secure";
if (full_string.ends_with(secure_tag))
- {
- address_end -= strlen(secure_tag);
- secure = Protocol::Secure::Enable;
- }
-
- const char * colon = strchr(full_string.data(), ':');
- if (!user_pw_end || !colon)
- throw Exception("Incorrect user[:password]@host:port#default_database format " + full_string, ErrorCodes::SYNTAX_ERROR);
-
- const bool has_pw = colon < user_pw_end;
- const char * host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon;
- if (!host_end)
- throw Exception("Incorrect address '" + full_string + "', it does not contain port", ErrorCodes::SYNTAX_ERROR);
-
- const char * has_db = strchr(full_string.data(), '#');
- const char * port_end = has_db ? has_db : address_end;
-
- Address address;
- address.secure = secure;
- address.port = parse<UInt16>(host_end + 1, port_end - (host_end + 1));
- address.host_name = unescapeForFileName(std::string(user_pw_end + 1, host_end));
- address.user = unescapeForFileName(std::string(address_begin, has_pw ? colon : user_pw_end));
- address.password = has_pw ? unescapeForFileName(std::string(colon + 1, user_pw_end)) : std::string();
- address.default_database = has_db ? unescapeForFileName(std::string(has_db + 1, address_end)) : std::string();
- // address.priority ignored
- return address;
- }
-}
-
-
-/// Implementation of Clusters class
-
+ {
+ address_end -= strlen(secure_tag);
+ secure = Protocol::Secure::Enable;
+ }
+
+ const char * colon = strchr(full_string.data(), ':');
+ if (!user_pw_end || !colon)
+ throw Exception("Incorrect user[:password]@host:port#default_database format " + full_string, ErrorCodes::SYNTAX_ERROR);
+
+ const bool has_pw = colon < user_pw_end;
+ const char * host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon;
+ if (!host_end)
+ throw Exception("Incorrect address '" + full_string + "', it does not contain port", ErrorCodes::SYNTAX_ERROR);
+
+ const char * has_db = strchr(full_string.data(), '#');
+ const char * port_end = has_db ? has_db : address_end;
+
+ Address address;
+ address.secure = secure;
+ address.port = parse<UInt16>(host_end + 1, port_end - (host_end + 1));
+ address.host_name = unescapeForFileName(std::string(user_pw_end + 1, host_end));
+ address.user = unescapeForFileName(std::string(address_begin, has_pw ? colon : user_pw_end));
+ address.password = has_pw ? unescapeForFileName(std::string(colon + 1, user_pw_end)) : std::string();
+ address.default_database = has_db ? unescapeForFileName(std::string(has_db + 1, address_end)) : std::string();
+ // address.priority ignored
+ return address;
+ }
+}
+
+
+/// Implementation of Clusters class
+
Clusters::Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix)
-{
+{
updateClusters(config, settings, config_prefix);
-}
-
-
-ClusterPtr Clusters::getCluster(const std::string & cluster_name) const
-{
- std::lock_guard lock(mutex);
-
- auto it = impl.find(cluster_name);
- return (it != impl.end()) ? it->second : nullptr;
-}
-
-
-void Clusters::setCluster(const String & cluster_name, const std::shared_ptr<Cluster> & cluster)
-{
- std::lock_guard lock(mutex);
- impl[cluster_name] = cluster;
-}
-
-
+}
+
+
+ClusterPtr Clusters::getCluster(const std::string & cluster_name) const
+{
+ std::lock_guard lock(mutex);
+
+ auto it = impl.find(cluster_name);
+ return (it != impl.end()) ? it->second : nullptr;
+}
+
+
+void Clusters::setCluster(const String & cluster_name, const std::shared_ptr<Cluster> & cluster)
+{
+ std::lock_guard lock(mutex);
+ impl[cluster_name] = cluster;
+}
+
+
void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_config, const Settings & settings, const String & config_prefix, Poco::Util::AbstractConfiguration * old_config)
-{
+{
Poco::Util::AbstractConfiguration::Keys new_config_keys;
new_config.keys(config_prefix, new_config_keys);
-
+
/// If old config is set, we will update only clusters with updated config.
/// In this case, we first need to find clusters that were deleted from config.
Poco::Util::AbstractConfiguration::Keys deleted_keys;
@@ -314,11 +314,11 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf
old_config_keys.begin(), old_config_keys.end(), new_config_keys.begin(), new_config_keys.end(), std::back_inserter(deleted_keys));
}
- std::lock_guard lock(mutex);
-
+ std::lock_guard lock(mutex);
+
/// If old config is set, remove deleted clusters from impl, otherwise just clear it.
if (old_config)
- {
+ {
for (const auto & key : deleted_keys)
impl.erase(key);
}
@@ -327,181 +327,181 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf
for (const auto & key : new_config_keys)
{
- if (key.find('.') != String::npos)
- throw Exception("Cluster names with dots are not supported: '" + key + "'", ErrorCodes::SYNTAX_ERROR);
-
+ if (key.find('.') != String::npos)
+ throw Exception("Cluster names with dots are not supported: '" + key + "'", ErrorCodes::SYNTAX_ERROR);
+
/// If old config is set and cluster config wasn't changed, don't update this cluster.
if (!old_config || !isSameConfiguration(new_config, *old_config, config_prefix + "." + key))
impl[key] = std::make_shared<Cluster>(new_config, settings, config_prefix, key);
- }
-}
-
-Clusters::Impl Clusters::getContainer() const
-{
- std::lock_guard lock(mutex);
- /// The following line copies container of shared_ptrs to return value under lock
- return impl;
-}
-
-
-/// Implementation of `Cluster` class
-
+ }
+}
+
+Clusters::Impl Clusters::getContainer() const
+{
+ std::lock_guard lock(mutex);
+ /// The following line copies container of shared_ptrs to return value under lock
+ return impl;
+}
+
+
+/// Implementation of `Cluster` class
+
Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
const Settings & settings,
const String & config_prefix_,
const String & cluster_name) : name(cluster_name)
-{
+{
auto config_prefix = config_prefix_ + "." + cluster_name;
- Poco::Util::AbstractConfiguration::Keys config_keys;
+ Poco::Util::AbstractConfiguration::Keys config_keys;
config.keys(config_prefix, config_keys);
-
+
config_prefix += ".";
secret = config.getString(config_prefix + "secret", "");
boost::range::remove_erase(config_keys, "secret");
- if (config_keys.empty())
+ if (config_keys.empty())
throw Exception("No cluster elements (shard, node) specified in config at path " + config_prefix, ErrorCodes::SHARD_HAS_NO_CONNECTIONS);
-
- UInt32 current_shard_num = 1;
- for (const auto & key : config_keys)
- {
+
+ UInt32 current_shard_num = 1;
+ for (const auto & key : config_keys)
+ {
if (key.starts_with("node"))
- {
- /// Shard without replicas.
-
- Addresses addresses;
-
- const auto & prefix = config_prefix + key;
- const auto weight = config.getInt(prefix + ".weight", default_weight);
-
+ {
+ /// Shard without replicas.
+
+ Addresses addresses;
+
+ const auto & prefix = config_prefix + key;
+ const auto weight = config.getInt(prefix + ".weight", default_weight);
+
addresses.emplace_back(config, prefix, cluster_name, secret, current_shard_num, 1);
- const auto & address = addresses.back();
-
- ShardInfo info;
- info.shard_num = current_shard_num;
- info.weight = weight;
-
- if (address.is_local)
- info.local_addresses.push_back(address);
-
+ const auto & address = addresses.back();
+
+ ShardInfo info;
+ info.shard_num = current_shard_num;
+ info.weight = weight;
+
+ if (address.is_local)
+ info.local_addresses.push_back(address);
+
auto pool = ConnectionPoolFactory::instance().get(
- settings.distributed_connections_pool_size,
- address.host_name, address.port,
- address.default_database, address.user, address.password,
+ settings.distributed_connections_pool_size,
+ address.host_name, address.port,
+ address.default_database, address.user, address.password,
address.cluster, address.cluster_secret,
- "server", address.compression,
- address.secure, address.priority);
-
- info.pool = std::make_shared<ConnectionPoolWithFailover>(
- ConnectionPoolPtrs{pool}, settings.load_balancing);
- info.per_replica_pools = {std::move(pool)};
-
- if (weight)
- slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
-
- shards_info.emplace_back(std::move(info));
- addresses_with_failover.emplace_back(std::move(addresses));
- }
+ "server", address.compression,
+ address.secure, address.priority);
+
+ info.pool = std::make_shared<ConnectionPoolWithFailover>(
+ ConnectionPoolPtrs{pool}, settings.load_balancing);
+ info.per_replica_pools = {std::move(pool)};
+
+ if (weight)
+ slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
+
+ shards_info.emplace_back(std::move(info));
+ addresses_with_failover.emplace_back(std::move(addresses));
+ }
else if (key.starts_with("shard"))
- {
- /// Shard with replicas.
-
- Poco::Util::AbstractConfiguration::Keys replica_keys;
- config.keys(config_prefix + key, replica_keys);
-
- addresses_with_failover.emplace_back();
- Addresses & replica_addresses = addresses_with_failover.back();
- UInt32 current_replica_num = 1;
-
- const auto & partial_prefix = config_prefix + key + ".";
- const auto weight = config.getUInt(partial_prefix + ".weight", default_weight);
-
- bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
-
+ {
+ /// Shard with replicas.
+
+ Poco::Util::AbstractConfiguration::Keys replica_keys;
+ config.keys(config_prefix + key, replica_keys);
+
+ addresses_with_failover.emplace_back();
+ Addresses & replica_addresses = addresses_with_failover.back();
+ UInt32 current_replica_num = 1;
+
+ const auto & partial_prefix = config_prefix + key + ".";
+ const auto weight = config.getUInt(partial_prefix + ".weight", default_weight);
+
+ bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
+
ShardInfoInsertPathForInternalReplication insert_paths;
/// "_all_replicas" is a marker that will be replaced with all replicas
/// (for creating connections in the Distributed engine)
insert_paths.compact = fmt::format("shard{}_all_replicas", current_shard_num);
-
- for (const auto & replica_key : replica_keys)
- {
+
+ for (const auto & replica_key : replica_keys)
+ {
if (replica_key.starts_with("weight") ||replica_key.starts_with("internal_replication"))
- continue;
-
+ continue;
+
if (replica_key.starts_with("replica"))
- {
+ {
replica_addresses.emplace_back(config,
partial_prefix + replica_key,
cluster_name,
secret,
current_shard_num,
current_replica_num);
- ++current_replica_num;
-
- if (internal_replication)
- {
+ ++current_replica_num;
+
+ if (internal_replication)
+ {
auto dir_name = replica_addresses.back().toFullString(/* use_compact_format= */ false);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name);
- }
- }
- else
- throw Exception("Unknown element in config: " + replica_key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
- }
-
- Addresses shard_local_addresses;
-
- ConnectionPoolPtrs all_replicas_pools;
- all_replicas_pools.reserve(replica_addresses.size());
-
- for (const auto & replica : replica_addresses)
- {
+ }
+ }
+ else
+ throw Exception("Unknown element in config: " + replica_key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+ }
+
+ Addresses shard_local_addresses;
+
+ ConnectionPoolPtrs all_replicas_pools;
+ all_replicas_pools.reserve(replica_addresses.size());
+
+ for (const auto & replica : replica_addresses)
+ {
auto replica_pool = ConnectionPoolFactory::instance().get(
- settings.distributed_connections_pool_size,
- replica.host_name, replica.port,
- replica.default_database, replica.user, replica.password,
+ settings.distributed_connections_pool_size,
+ replica.host_name, replica.port,
+ replica.default_database, replica.user, replica.password,
replica.cluster, replica.cluster_secret,
- "server", replica.compression,
- replica.secure, replica.priority);
-
- all_replicas_pools.emplace_back(replica_pool);
- if (replica.is_local)
- shard_local_addresses.push_back(replica);
- }
-
- ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
- all_replicas_pools, settings.load_balancing,
- settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
-
- if (weight)
- slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
-
- shards_info.push_back({
+ "server", replica.compression,
+ replica.secure, replica.priority);
+
+ all_replicas_pools.emplace_back(replica_pool);
+ if (replica.is_local)
+ shard_local_addresses.push_back(replica);
+ }
+
+ ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
+ all_replicas_pools, settings.load_balancing,
+ settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
+
+ if (weight)
+ slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
+
+ shards_info.push_back({
std::move(insert_paths),
- current_shard_num,
- weight,
- std::move(shard_local_addresses),
- std::move(shard_pool),
- std::move(all_replicas_pools),
- internal_replication
- });
- }
- else
- throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
-
- ++current_shard_num;
- }
-
- if (addresses_with_failover.empty())
- throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
-
- initMisc();
-}
-
-
+ current_shard_num,
+ weight,
+ std::move(shard_local_addresses),
+ std::move(shard_pool),
+ std::move(all_replicas_pools),
+ internal_replication
+ });
+ }
+ else
+ throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+
+ ++current_shard_num;
+ }
+
+ if (addresses_with_failover.empty())
+ throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
+
+ initMisc();
+}
+
+
Cluster::Cluster(
const Settings & settings,
const std::vector<std::vector<String>> & names,
@@ -512,13 +512,13 @@ Cluster::Cluster(
bool treat_local_port_as_remote,
bool secure,
Int64 priority)
-{
- UInt32 current_shard_num = 1;
-
- for (const auto & shard : names)
- {
- Addresses current;
- for (const auto & replica : shard)
+{
+ UInt32 current_shard_num = 1;
+
+ for (const auto & shard : names)
+ {
+ Addresses current;
+ for (const auto & replica : shard)
current.emplace_back(
replica,
username,
@@ -529,163 +529,163 @@ Cluster::Cluster(
priority,
current_shard_num,
current.size() + 1);
-
- addresses_with_failover.emplace_back(current);
-
- Addresses shard_local_addresses;
- ConnectionPoolPtrs all_replicas;
- all_replicas.reserve(current.size());
-
- for (const auto & replica : current)
- {
+
+ addresses_with_failover.emplace_back(current);
+
+ Addresses shard_local_addresses;
+ ConnectionPoolPtrs all_replicas;
+ all_replicas.reserve(current.size());
+
+ for (const auto & replica : current)
+ {
auto replica_pool = ConnectionPoolFactory::instance().get(
- settings.distributed_connections_pool_size,
- replica.host_name, replica.port,
- replica.default_database, replica.user, replica.password,
+ settings.distributed_connections_pool_size,
+ replica.host_name, replica.port,
+ replica.default_database, replica.user, replica.password,
replica.cluster, replica.cluster_secret,
- "server", replica.compression, replica.secure, replica.priority);
- all_replicas.emplace_back(replica_pool);
- if (replica.is_local && !treat_local_as_remote)
- shard_local_addresses.push_back(replica);
- }
-
- ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
- all_replicas, settings.load_balancing,
- settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
-
- slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size());
- shards_info.push_back({
+ "server", replica.compression, replica.secure, replica.priority);
+ all_replicas.emplace_back(replica_pool);
+ if (replica.is_local && !treat_local_as_remote)
+ shard_local_addresses.push_back(replica);
+ }
+
+ ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
+ all_replicas, settings.load_balancing,
+ settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
+
+ slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size());
+ shards_info.push_back({
{}, // insert_path_for_internal_replication
- current_shard_num,
- default_weight,
- std::move(shard_local_addresses),
- std::move(shard_pool),
- std::move(all_replicas),
- false // has_internal_replication
- });
- ++current_shard_num;
- }
-
- initMisc();
-}
-
-
+ current_shard_num,
+ default_weight,
+ std::move(shard_local_addresses),
+ std::move(shard_pool),
+ std::move(all_replicas),
+ false // has_internal_replication
+ });
+ ++current_shard_num;
+ }
+
+ initMisc();
+}
+
+
Poco::Timespan Cluster::saturate(Poco::Timespan v, Poco::Timespan limit)
-{
- if (limit.totalMicroseconds() == 0)
- return v;
- else
- return (v > limit) ? limit : v;
-}
-
-
-void Cluster::initMisc()
-{
- for (const auto & shard_info : shards_info)
- {
- if (!shard_info.isLocal() && !shard_info.hasRemoteConnections())
- throw Exception("Found shard without any specified connection",
- ErrorCodes::SHARD_HAS_NO_CONNECTIONS);
- }
-
- for (const auto & shard_info : shards_info)
- {
- if (shard_info.isLocal())
- ++local_shard_count;
- else
- ++remote_shard_count;
- }
-
- for (auto & shard_info : shards_info)
- {
- if (!shard_info.isLocal())
- {
- any_remote_shard_info = &shard_info;
- break;
- }
- }
-}
-
-std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings) const
-{
- return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings)};
-}
-
-std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
-{
- return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, {index}) };
-}
-
-std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector<size_t> & indices) const
-{
- return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) };
-}
-
-Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings)
-{
- if (from.addresses_with_failover.empty())
- throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR);
-
+{
+ if (limit.totalMicroseconds() == 0)
+ return v;
+ else
+ return (v > limit) ? limit : v;
+}
+
+
+void Cluster::initMisc()
+{
+ for (const auto & shard_info : shards_info)
+ {
+ if (!shard_info.isLocal() && !shard_info.hasRemoteConnections())
+ throw Exception("Found shard without any specified connection",
+ ErrorCodes::SHARD_HAS_NO_CONNECTIONS);
+ }
+
+ for (const auto & shard_info : shards_info)
+ {
+ if (shard_info.isLocal())
+ ++local_shard_count;
+ else
+ ++remote_shard_count;
+ }
+
+ for (auto & shard_info : shards_info)
+ {
+ if (!shard_info.isLocal())
+ {
+ any_remote_shard_info = &shard_info;
+ break;
+ }
+ }
+}
+
+std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings) const
+{
+ return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings)};
+}
+
+std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
+{
+ return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, {index}) };
+}
+
+std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector<size_t> & indices) const
+{
+ return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) };
+}
+
+Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings)
+{
+ if (from.addresses_with_failover.empty())
+ throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR);
+
UInt32 shard_num = 0;
- std::set<std::pair<String, int>> unique_hosts;
+ std::set<std::pair<String, int>> unique_hosts;
for (size_t shard_index : collections::range(0, from.shards_info.size()))
- {
- const auto & replicas = from.addresses_with_failover[shard_index];
- for (const auto & address : replicas)
- {
- if (!unique_hosts.emplace(address.host_name, address.port).second)
- continue; /// Duplicate host, skip.
-
- ShardInfo info;
+ {
+ const auto & replicas = from.addresses_with_failover[shard_index];
+ for (const auto & address : replicas)
+ {
+ if (!unique_hosts.emplace(address.host_name, address.port).second)
+ continue; /// Duplicate host, skip.
+
+ ShardInfo info;
info.shard_num = ++shard_num;
- if (address.is_local)
- info.local_addresses.push_back(address);
-
+ if (address.is_local)
+ info.local_addresses.push_back(address);
+
auto pool = ConnectionPoolFactory::instance().get(
- settings.distributed_connections_pool_size,
- address.host_name,
- address.port,
- address.default_database,
- address.user,
- address.password,
+ settings.distributed_connections_pool_size,
+ address.host_name,
+ address.port,
+ address.default_database,
+ address.user,
+ address.password,
address.cluster,
address.cluster_secret,
- "server",
- address.compression,
- address.secure,
- address.priority);
-
- info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing);
- info.per_replica_pools = {std::move(pool)};
-
- addresses_with_failover.emplace_back(Addresses{address});
- shards_info.emplace_back(std::move(info));
- }
- }
-
- initMisc();
-}
-
-
-Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector<size_t> & indices)
-{
- for (size_t index : indices)
- {
- shards_info.emplace_back(from.shards_info.at(index));
-
- if (!from.addresses_with_failover.empty())
- addresses_with_failover.emplace_back(from.addresses_with_failover.at(index));
- }
-
- initMisc();
-}
-
+ "server",
+ address.compression,
+ address.secure,
+ address.priority);
+
+ info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing);
+ info.per_replica_pools = {std::move(pool)};
+
+ addresses_with_failover.emplace_back(Addresses{address});
+ shards_info.emplace_back(std::move(info));
+ }
+ }
+
+ initMisc();
+}
+
+
+Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector<size_t> & indices)
+{
+ for (size_t index : indices)
+ {
+ shards_info.emplace_back(from.shards_info.at(index));
+
+ if (!from.addresses_with_failover.empty())
+ addresses_with_failover.emplace_back(from.addresses_with_failover.at(index));
+ }
+
+ initMisc();
+}
+
const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const
-{
- if (!has_internal_replication)
- throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR);
-
+{
+ if (!has_internal_replication)
+ throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR);
+
const auto & paths = insert_path_for_internal_replication;
if (!use_compact_format)
{
@@ -697,12 +697,12 @@ const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool pr
}
return path;
}
- else
+ else
{
return paths.compact;
}
-}
-
+}
+
bool Cluster::maybeCrossReplication() const
{
/// Cluster can be used for cross-replication if some replicas have different default database names,
@@ -718,6 +718,6 @@ bool Cluster::maybeCrossReplication() const
return true;
return false;
-}
+}
}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h
index a77eb3983d..e2312932b4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h
@@ -1,10 +1,10 @@
-#pragma once
-
-#include <Client/ConnectionPool.h>
-#include <Client/ConnectionPoolWithFailover.h>
-
-#include <Poco/Net/SocketAddress.h>
+#pragma once
+
+#include <Client/ConnectionPool.h>
+#include <Client/ConnectionPoolWithFailover.h>
+#include <Poco/Net/SocketAddress.h>
+
#include <map>
namespace Poco
@@ -15,33 +15,33 @@ namespace Poco
}
}
-namespace DB
-{
+namespace DB
+{
struct Settings;
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-/// Cluster contains connection pools to each node
-/// With the local nodes, the connection is not established, but the request is executed directly.
-/// Therefore we store only the number of local nodes
-/// In the config, the cluster includes nodes <node> or <shard>
-class Cluster
-{
-public:
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+/// Cluster contains connection pools to each node
+/// With the local nodes, the connection is not established, but the request is executed directly.
+/// Therefore we store only the number of local nodes
+/// In the config, the cluster includes nodes <node> or <shard>
+class Cluster
+{
+public:
Cluster(const Poco::Util::AbstractConfiguration & config,
const Settings & settings,
const String & config_prefix_,
const String & cluster_name);
-
- /// Construct a cluster by the names of shards and replicas.
- /// Local are treated as well as remote ones if treat_local_as_remote is true.
+
+ /// Construct a cluster by the names of shards and replicas.
+ /// Local are treated as well as remote ones if treat_local_as_remote is true.
/// Local are also treated as remote if treat_local_port_as_remote is set and the local address includes a port
- /// 'clickhouse_port' - port that this server instance listen for queries.
- /// This parameter is needed only to check that some address is local (points to ourself).
+ /// 'clickhouse_port' - port that this server instance listen for queries.
+ /// This parameter is needed only to check that some address is local (points to ourself).
///
/// Used for remote() function.
Cluster(
@@ -54,111 +54,111 @@ public:
bool treat_local_port_as_remote,
bool secure = false,
Int64 priority = 1);
-
- Cluster(const Cluster &)= delete;
- Cluster & operator=(const Cluster &) = delete;
-
- /// is used to set a limit on the size of the timeout
+
+ Cluster(const Cluster &)= delete;
+ Cluster & operator=(const Cluster &) = delete;
+
+ /// is used to set a limit on the size of the timeout
static Poco::Timespan saturate(Poco::Timespan v, Poco::Timespan limit);
-
-public:
+
+public:
using SlotToShard = std::vector<UInt64>;
- struct Address
- {
- /** In configuration file,
- * addresses are located either in <node> elements:
- * <node>
- * <host>example01-01-1</host>
- * <port>9000</port>
- * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed -->
- * </node>
- * ...
- * or in <shard> and inside in <replica> elements:
- * <shard>
- * <replica>
- * <host>example01-01-1</host>
- * <port>9000</port>
- * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed -->
- * </replica>
- * </shard>
- */
-
- String host_name;
- UInt16 port;
- String user;
- String password;
+ struct Address
+ {
+ /** In configuration file,
+ * addresses are located either in <node> elements:
+ * <node>
+ * <host>example01-01-1</host>
+ * <port>9000</port>
+ * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed -->
+ * </node>
+ * ...
+ * or in <shard> and inside in <replica> elements:
+ * <shard>
+ * <replica>
+ * <host>example01-01-1</host>
+ * <port>9000</port>
+ * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed -->
+ * </replica>
+ * </shard>
+ */
+
+ String host_name;
+ UInt16 port;
+ String user;
+ String password;
/// For inter-server authorization
String cluster;
String cluster_secret;
- UInt32 shard_index{}; /// shard serial number in configuration file, starting from 1.
- UInt32 replica_index{}; /// replica serial number in this shard, starting from 1; zero means no replicas.
-
- /// This database is selected when no database is specified for Distributed table
- String default_database;
- /// The locality is determined at the initialization, and is not changed even if DNS is changed
- bool is_local = false;
- bool user_specified = false;
-
- Protocol::Compression compression = Protocol::Compression::Enable;
- Protocol::Secure secure = Protocol::Secure::Disable;
-
- Int64 priority = 1;
-
- Address() = default;
-
- Address(
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
+ UInt32 shard_index{}; /// shard serial number in configuration file, starting from 1.
+ UInt32 replica_index{}; /// replica serial number in this shard, starting from 1; zero means no replicas.
+
+ /// This database is selected when no database is specified for Distributed table
+ String default_database;
+ /// The locality is determined at the initialization, and is not changed even if DNS is changed
+ bool is_local = false;
+ bool user_specified = false;
+
+ Protocol::Compression compression = Protocol::Compression::Enable;
+ Protocol::Secure secure = Protocol::Secure::Disable;
+
+ Int64 priority = 1;
+
+ Address() = default;
+
+ Address(
+ const Poco::Util::AbstractConfiguration & config,
+ const String & config_prefix,
const String & cluster_,
const String & cluster_secret_,
- UInt32 shard_index_ = 0,
- UInt32 replica_index_ = 0);
-
- Address(
- const String & host_port_,
- const String & user_,
- const String & password_,
- UInt16 clickhouse_port,
+ UInt32 shard_index_ = 0,
+ UInt32 replica_index_ = 0);
+
+ Address(
+ const String & host_port_,
+ const String & user_,
+ const String & password_,
+ UInt16 clickhouse_port,
bool treat_local_port_as_remote,
- bool secure_ = false,
+ bool secure_ = false,
Int64 priority_ = 1,
UInt32 shard_index_ = 0,
UInt32 replica_index_ = 0);
-
- /// Returns 'escaped_host_name:port'
- String toString() const;
-
- /// Returns 'host_name:port'
- String readableString() const;
-
- static String toString(const String & host_name, UInt16 port);
-
- static std::pair<String, UInt16> fromString(const String & host_port_string);
-
- /// Returns escaped shard{shard_index}_replica{replica_index} or escaped
- /// user:password@resolved_host_address:resolved_host_port#default_database
- /// depending on use_compact_format flag
- String toFullString(bool use_compact_format) const;
-
- /// Returns address with only shard index and replica index or full address without shard index and replica index
- static Address fromFullString(const String & address_full_string);
-
- /// Returns resolved address if it does resolve.
- std::optional<Poco::Net::SocketAddress> getResolvedAddress() const;
-
- auto tuple() const { return std::tie(host_name, port, secure, user, password, default_database); }
- bool operator==(const Address & other) const { return tuple() == other.tuple(); }
-
- private:
- bool isLocal(UInt16 clickhouse_port) const;
- };
-
- using Addresses = std::vector<Address>;
- using AddressesWithFailover = std::vector<Addresses>;
-
+
+ /// Returns 'escaped_host_name:port'
+ String toString() const;
+
+ /// Returns 'host_name:port'
+ String readableString() const;
+
+ static String toString(const String & host_name, UInt16 port);
+
+ static std::pair<String, UInt16> fromString(const String & host_port_string);
+
+ /// Returns escaped shard{shard_index}_replica{replica_index} or escaped
+ /// user:password@resolved_host_address:resolved_host_port#default_database
+ /// depending on use_compact_format flag
+ String toFullString(bool use_compact_format) const;
+
+ /// Returns address with only shard index and replica index or full address without shard index and replica index
+ static Address fromFullString(const String & address_full_string);
+
+ /// Returns resolved address if it does resolve.
+ std::optional<Poco::Net::SocketAddress> getResolvedAddress() const;
+
+ auto tuple() const { return std::tie(host_name, port, secure, user, password, default_database); }
+ bool operator==(const Address & other) const { return tuple() == other.tuple(); }
+
+ private:
+ bool isLocal(UInt16 clickhouse_port) const;
+ };
+
+ using Addresses = std::vector<Address>;
+ using AddressesWithFailover = std::vector<Addresses>;
+
/// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
///
/// Contains different path for permutations of:
@@ -178,129 +178,129 @@ public:
std::string compact;
};
- struct ShardInfo
- {
- public:
- bool isLocal() const { return !local_addresses.empty(); }
- bool hasRemoteConnections() const { return local_addresses.size() != per_replica_pools.size(); }
- size_t getLocalNodeCount() const { return local_addresses.size(); }
- bool hasInternalReplication() const { return has_internal_replication; }
- /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
+ struct ShardInfo
+ {
+ public:
+ bool isLocal() const { return !local_addresses.empty(); }
+ bool hasRemoteConnections() const { return local_addresses.size() != per_replica_pools.size(); }
+ size_t getLocalNodeCount() const { return local_addresses.size(); }
+ bool hasInternalReplication() const { return has_internal_replication; }
+ /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const;
-
- public:
+
+ public:
ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication;
- /// Number of the shard, the indexation begins with 1
- UInt32 shard_num = 0;
- UInt32 weight = 1;
- Addresses local_addresses;
- /// nullptr if there are no remote addresses
- ConnectionPoolWithFailoverPtr pool;
- /// Connection pool for each replica, contains nullptr for local replicas
- ConnectionPoolPtrs per_replica_pools;
- bool has_internal_replication = false;
- };
-
- using ShardsInfo = std::vector<ShardInfo>;
-
- String getHashOfAddresses() const { return hash_of_addresses; }
- const ShardsInfo & getShardsInfo() const { return shards_info; }
- const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; }
-
- const ShardInfo & getAnyShardInfo() const
- {
- if (shards_info.empty())
- throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR);
- return shards_info.front();
- }
-
- /// The number of remote shards.
- size_t getRemoteShardCount() const { return remote_shard_count; }
-
- /// The number of clickhouse nodes located locally
- /// we access the local nodes directly.
- size_t getLocalShardCount() const { return local_shard_count; }
-
- /// The number of all shards.
- size_t getShardCount() const { return shards_info.size(); }
-
+ /// Number of the shard, the indexation begins with 1
+ UInt32 shard_num = 0;
+ UInt32 weight = 1;
+ Addresses local_addresses;
+ /// nullptr if there are no remote addresses
+ ConnectionPoolWithFailoverPtr pool;
+ /// Connection pool for each replica, contains nullptr for local replicas
+ ConnectionPoolPtrs per_replica_pools;
+ bool has_internal_replication = false;
+ };
+
+ using ShardsInfo = std::vector<ShardInfo>;
+
+ String getHashOfAddresses() const { return hash_of_addresses; }
+ const ShardsInfo & getShardsInfo() const { return shards_info; }
+ const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; }
+
+ const ShardInfo & getAnyShardInfo() const
+ {
+ if (shards_info.empty())
+ throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR);
+ return shards_info.front();
+ }
+
+ /// The number of remote shards.
+ size_t getRemoteShardCount() const { return remote_shard_count; }
+
+ /// The number of clickhouse nodes located locally
+ /// we access the local nodes directly.
+ size_t getLocalShardCount() const { return local_shard_count; }
+
+ /// The number of all shards.
+ size_t getShardCount() const { return shards_info.size(); }
+
const String & getSecret() const { return secret; }
- /// Get a subcluster consisting of one shard - index by count (from 0) of the shard of this cluster.
- std::unique_ptr<Cluster> getClusterWithSingleShard(size_t index) const;
-
- /// Get a subcluster consisting of one or multiple shards - indexes by count (from 0) of the shard of this cluster.
- std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
-
- /// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
- std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const;
-
+ /// Get a subcluster consisting of one shard - index by count (from 0) of the shard of this cluster.
+ std::unique_ptr<Cluster> getClusterWithSingleShard(size_t index) const;
+
+ /// Get a subcluster consisting of one or multiple shards - indexes by count (from 0) of the shard of this cluster.
+ std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
+
+ /// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
+ std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const;
+
/// Returns false if cluster configuration doesn't allow to use it for cross-replication.
/// NOTE: true does not mean, that it's actually a cross-replication cluster.
bool maybeCrossReplication() const;
-private:
- SlotToShard slot_to_shard;
-
-public:
- const SlotToShard & getSlotToShard() const { return slot_to_shard; }
-
-private:
- void initMisc();
-
- /// For getClusterWithMultipleShards implementation.
- struct SubclusterTag {};
- Cluster(SubclusterTag, const Cluster & from, const std::vector<size_t> & indices);
-
- /// For getClusterWithReplicasAsShards implementation
- struct ReplicasAsShardsTag {};
- Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings);
-
+private:
+ SlotToShard slot_to_shard;
+
+public:
+ const SlotToShard & getSlotToShard() const { return slot_to_shard; }
+
+private:
+ void initMisc();
+
+ /// For getClusterWithMultipleShards implementation.
+ struct SubclusterTag {};
+ Cluster(SubclusterTag, const Cluster & from, const std::vector<size_t> & indices);
+
+ /// For getClusterWithReplicasAsShards implementation
+ struct ReplicasAsShardsTag {};
+ Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings);
+
/// Inter-server secret
String secret;
- String hash_of_addresses;
- /// Description of the cluster shards.
- ShardsInfo shards_info;
- /// Any remote shard.
- ShardInfo * any_remote_shard_info = nullptr;
-
- /// Non-empty is either addresses or addresses_with_failover.
- /// The size and order of the elements in the corresponding array corresponds to shards_info.
-
- /// An array of shards. For each shard, an array of replica addresses (servers that are considered identical).
- AddressesWithFailover addresses_with_failover;
-
- size_t remote_shard_count = 0;
- size_t local_shard_count = 0;
+ String hash_of_addresses;
+ /// Description of the cluster shards.
+ ShardsInfo shards_info;
+ /// Any remote shard.
+ ShardInfo * any_remote_shard_info = nullptr;
+
+ /// Non-empty is either addresses or addresses_with_failover.
+ /// The size and order of the elements in the corresponding array corresponds to shards_info.
+
+ /// An array of shards. For each shard, an array of replica addresses (servers that are considered identical).
+ AddressesWithFailover addresses_with_failover;
+
+ size_t remote_shard_count = 0;
+ size_t local_shard_count = 0;
String name;
-};
-
-using ClusterPtr = std::shared_ptr<Cluster>;
-
-
-class Clusters
-{
-public:
+};
+
+using ClusterPtr = std::shared_ptr<Cluster>;
+
+
+class Clusters
+{
+public:
Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix = "remote_servers");
-
- Clusters(const Clusters &) = delete;
- Clusters & operator=(const Clusters &) = delete;
-
- ClusterPtr getCluster(const std::string & cluster_name) const;
- void setCluster(const String & cluster_name, const ClusterPtr & cluster);
-
+
+ Clusters(const Clusters &) = delete;
+ Clusters & operator=(const Clusters &) = delete;
+
+ ClusterPtr getCluster(const std::string & cluster_name) const;
+ void setCluster(const String & cluster_name, const ClusterPtr & cluster);
+
void updateClusters(const Poco::Util::AbstractConfiguration & new_config, const Settings & settings, const String & config_prefix, Poco::Util::AbstractConfiguration * old_config = nullptr);
-
-public:
- using Impl = std::map<String, ClusterPtr>;
-
- Impl getContainer() const;
-
-protected:
- Impl impl;
- mutable std::mutex mutex;
-};
-
-}
+
+public:
+ using Impl = std::map<String, ClusterPtr>;
+
+ Impl getContainer() const;
+
+protected:
+ Impl impl;
+ mutable std::mutex mutex;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h
index 2d0041bd96..49cd7bafc3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h
@@ -1,37 +1,37 @@
-#pragma once
-
+#pragma once
+
#include <Columns/FilterDescription.h>
-#include <DataStreams/IBlockStream_fwd.h>
-#include <Interpreters/AggregateDescription.h>
+#include <DataStreams/IBlockStream_fwd.h>
+#include <Interpreters/AggregateDescription.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/SubqueryForSet.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/WindowDescription.h>
#include <Interpreters/join_common.h>
-#include <Parsers/IAST_fwd.h>
-#include <Storages/IStorage_fwd.h>
-#include <Storages/SelectQueryInfo.h>
-
-namespace DB
-{
-
-class Block;
+#include <Parsers/IAST_fwd.h>
+#include <Storages/IStorage_fwd.h>
+#include <Storages/SelectQueryInfo.h>
+
+namespace DB
+{
+
+class Block;
struct Settings;
-
-struct ExpressionActionsChain;
-class ExpressionActions;
-using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
-using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
-
-struct ASTTableJoin;
-class IJoin;
-using JoinPtr = std::shared_ptr<IJoin>;
-
-class ASTFunction;
-class ASTExpressionList;
-class ASTSelectQuery;
-struct ASTTablesInSelectQueryElement;
-
+
+struct ExpressionActionsChain;
+class ExpressionActions;
+using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
+using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
+
+struct ASTTableJoin;
+class IJoin;
+using JoinPtr = std::shared_ptr<IJoin>;
+
+class ASTFunction;
+class ASTExpressionList;
+class ASTSelectQuery;
+struct ASTTablesInSelectQueryElement;
+
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
@@ -41,95 +41,95 @@ using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>;
class ActionsDAG;
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
-/// Create columns in block or return false if not possible
-bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false);
-
-/// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately.
-struct ExpressionAnalyzerData
-{
+/// Create columns in block or return false if not possible
+bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false);
+
+/// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately.
+struct ExpressionAnalyzerData
+{
~ExpressionAnalyzerData();
- SubqueriesForSets subqueries_for_sets;
- PreparedSets prepared_sets;
-
+ SubqueriesForSets subqueries_for_sets;
+ PreparedSets prepared_sets;
+
std::unique_ptr<QueryPlan> joined_plan;
/// Columns after ARRAY JOIN. If there is no ARRAY JOIN, it's source_columns.
NamesAndTypesList columns_after_array_join;
/// Columns after Columns after ARRAY JOIN and JOIN. If there is no JOIN, it's columns_after_array_join.
NamesAndTypesList columns_after_join;
- /// Columns after ARRAY JOIN, JOIN, and/or aggregation.
- NamesAndTypesList aggregated_columns;
+ /// Columns after ARRAY JOIN, JOIN, and/or aggregation.
+ NamesAndTypesList aggregated_columns;
/// Columns after window functions.
NamesAndTypesList columns_after_window;
-
- bool has_aggregation = false;
- NamesAndTypesList aggregation_keys;
+
+ bool has_aggregation = false;
+ NamesAndTypesList aggregation_keys;
bool has_const_aggregation_keys = false;
- AggregateDescriptions aggregate_descriptions;
-
+ AggregateDescriptions aggregate_descriptions;
+
WindowDescriptions window_descriptions;
NamesAndTypesList window_columns;
- bool has_global_subqueries = false;
-
- /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
- TemporaryTablesMapping external_tables;
-};
-
-
-/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
- *
- * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.
- */
+ bool has_global_subqueries = false;
+
+ /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
+ TemporaryTablesMapping external_tables;
+};
+
+
+/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
+ *
+ * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.
+ */
class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::noncopyable, protected WithContext
-{
-private:
- /// Extracts settings to enlight which are used (and avoid copy of others).
- struct ExtractedSettings
- {
- const bool use_index_for_in_with_subqueries;
- const SizeLimits size_limits_for_set;
+{
+private:
+ /// Extracts settings to enlight which are used (and avoid copy of others).
+ struct ExtractedSettings
+ {
+ const bool use_index_for_in_with_subqueries;
+ const SizeLimits size_limits_for_set;
const UInt64 distributed_group_by_no_merge;
-
+
ExtractedSettings(const Settings & settings_);
- };
-
-public:
- /// Ctor for non-select queries. Generally its usage is:
- /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions();
+ };
+
+public:
+ /// Ctor for non-select queries. Generally its usage is:
+ /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions();
ExpressionAnalyzer(const ASTPtr & query_, const TreeRewriterResultPtr & syntax_analyzer_result_, ContextPtr context_)
: ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false, {}, {})
{
}
-
+
~ExpressionAnalyzer();
- void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types);
-
- /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression.
- /// If add_aliases, only the calculated values in the desired order and add aliases.
- /// If also project_result, than only aliases remain in the output block.
- /// Otherwise, only temporary columns will be deleted from the block.
+ void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types);
+
+ /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression.
+ /// If add_aliases, only the calculated values in the desired order and add aliases.
+ /// If also project_result, than only aliases remain in the output block.
+ /// Otherwise, only temporary columns will be deleted from the block.
ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true);
ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no);
-
- /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants.
- /// Does not execute subqueries.
+
+ /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants.
+ /// Does not execute subqueries.
ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {});
-
- /** Sets that require a subquery to be create.
- * Only the sets needed to perform actions returned from already executed `append*` or `getActions`.
- * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions`
- * and create all the returned sets before performing the actions.
- */
+
+ /** Sets that require a subquery to be create.
+ * Only the sets needed to perform actions returned from already executed `append*` or `getActions`.
+ * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions`
+ * and create all the returned sets before performing the actions.
+ */
SubqueriesForSets & getSubqueriesForSets() { return subqueries_for_sets; }
-
+
PreparedSets & getPreparedSets() { return prepared_sets; }
- /// Get intermediates for tests
- const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
-
+ /// Get intermediates for tests
+ const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
+
/// A list of windows for window functions.
const WindowDescriptions & windowDescriptions() const { return window_descriptions; }
@@ -148,76 +148,76 @@ public:
*/
SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name);
-protected:
- ExpressionAnalyzer(
- const ASTPtr & query_,
+protected:
+ ExpressionAnalyzer(
+ const ASTPtr & query_,
const TreeRewriterResultPtr & syntax_analyzer_result_,
ContextPtr context_,
- size_t subquery_depth_,
+ size_t subquery_depth_,
bool do_global_,
SubqueriesForSets subqueries_for_sets_,
PreparedSets prepared_sets_);
-
- ASTPtr query;
- const ExtractedSettings settings;
- size_t subquery_depth;
-
+
+ ASTPtr query;
+ const ExtractedSettings settings;
+ size_t subquery_depth;
+
TreeRewriterResultPtr syntax;
-
- const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
- const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; }
- const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
- const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
- /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
- void initGlobalSubqueriesAndExternalTables(bool do_global);
-
+
+ const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
+ const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; }
+ const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
+ const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
+ /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
+ void initGlobalSubqueriesAndExternalTables(bool do_global);
+
ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;
-
+
void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
-
- /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
- * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
- * prepared sets would not be applicable for MergeTree index optimization.
- */
+
+ /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
+ * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
+ * prepared sets would not be applicable for MergeTree index optimization.
+ */
void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
-
+
void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
- /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
- * Create a set of columns aggregated_columns resulting after the aggregation, if any,
- * or after all the actions that are normally performed before aggregation.
- * Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
- */
- void analyzeAggregation();
+ /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
+ * Create a set of columns aggregated_columns resulting after the aggregation, if any,
+ * or after all the actions that are normally performed before aggregation.
+ * Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
+ */
+ void analyzeAggregation();
bool makeAggregateDescriptions(ActionsDAGPtr & actions);
-
- const ASTSelectQuery * getSelectQuery() const;
-
+
+ const ASTSelectQuery * getSelectQuery() const;
+
bool isRemoteStorage() const { return syntax->is_remote_storage; }
-};
-
-class SelectQueryExpressionAnalyzer;
-
-/// Result of SelectQueryExpressionAnalyzer: expressions for InterpreterSelectQuery
-struct ExpressionAnalysisResult
-{
+};
+
+class SelectQueryExpressionAnalyzer;
+
+/// Result of SelectQueryExpressionAnalyzer: expressions for InterpreterSelectQuery
+struct ExpressionAnalysisResult
+{
std::string dump() const;
- /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
- bool first_stage = false;
- /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
- bool second_stage = false;
-
- bool need_aggregate = false;
- bool has_order_by = false;
+ /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
+ bool first_stage = false;
+ /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
+ bool second_stage = false;
+
+ bool need_aggregate = false;
+ bool has_order_by = false;
bool has_window = false;
-
+
String where_column_name;
- bool remove_where_filter = false;
- bool optimize_read_in_order = false;
- bool optimize_aggregation_in_order = false;
+ bool remove_where_filter = false;
+ bool optimize_read_in_order = false;
+ bool optimize_aggregation_in_order = false;
bool join_has_delayed_stream = false;
-
+
ActionsDAGPtr before_array_join;
ArrayJoinActionPtr array_join;
ActionsDAGPtr before_join;
@@ -230,63 +230,63 @@ struct ExpressionAnalysisResult
ActionsDAGPtr before_order_by;
ActionsDAGPtr before_limit_by;
ActionsDAGPtr final_projection;
-
+
/// Columns from the SELECT list, before renaming them to aliases. Used to
/// perform SELECT DISTINCT.
- Names selected_columns;
-
+ Names selected_columns;
+
/// Columns to read from storage if any.
Names required_columns;
- /// Columns will be removed after prewhere actions execution.
+ /// Columns will be removed after prewhere actions execution.
NameSet columns_to_remove_after_prewhere;
-
+
PrewhereInfoPtr prewhere_info;
FilterDAGInfoPtr filter_info;
- ConstantFilterDescription prewhere_constant_filter_description;
- ConstantFilterDescription where_constant_filter_description;
- /// Actions by every element of ORDER BY
- ManyExpressionActions order_by_elements_actions;
- ManyExpressionActions group_by_elements_actions;
-
- ExpressionAnalysisResult() = default;
-
- ExpressionAnalysisResult(
- SelectQueryExpressionAnalyzer & query_analyzer,
+ ConstantFilterDescription prewhere_constant_filter_description;
+ ConstantFilterDescription where_constant_filter_description;
+ /// Actions by every element of ORDER BY
+ ManyExpressionActions order_by_elements_actions;
+ ManyExpressionActions group_by_elements_actions;
+
+ ExpressionAnalysisResult() = default;
+
+ ExpressionAnalysisResult(
+ SelectQueryExpressionAnalyzer & query_analyzer,
const StorageMetadataPtr & metadata_snapshot,
- bool first_stage,
- bool second_stage,
- bool only_types,
+ bool first_stage,
+ bool second_stage,
+ bool only_types,
const FilterDAGInfoPtr & filter_info,
- const Block & source_header);
-
- /// Filter for row-level security.
- bool hasFilter() const { return filter_info.get(); }
-
- bool hasJoin() const { return join.get(); }
- bool hasPrewhere() const { return prewhere_info.get(); }
- bool hasWhere() const { return before_where.get(); }
- bool hasHaving() const { return before_having.get(); }
- bool hasLimitBy() const { return before_limit_by.get(); }
-
- void removeExtraColumns() const;
- void checkActions() const;
+ const Block & source_header);
+
+ /// Filter for row-level security.
+ bool hasFilter() const { return filter_info.get(); }
+
+ bool hasJoin() const { return join.get(); }
+ bool hasPrewhere() const { return prewhere_info.get(); }
+ bool hasWhere() const { return before_where.get(); }
+ bool hasHaving() const { return before_having.get(); }
+ bool hasLimitBy() const { return before_limit_by.get(); }
+
+ void removeExtraColumns() const;
+ void checkActions() const;
void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query);
-};
-
-/// SelectQuery specific ExpressionAnalyzer part.
-class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer
-{
-public:
- friend struct ExpressionAnalysisResult;
-
- SelectQueryExpressionAnalyzer(
- const ASTPtr & query_,
+};
+
+/// SelectQuery specific ExpressionAnalyzer part.
+class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer
+{
+public:
+ friend struct ExpressionAnalysisResult;
+
+ SelectQueryExpressionAnalyzer(
+ const ASTPtr & query_,
const TreeRewriterResultPtr & syntax_analyzer_result_,
ContextPtr context_,
const StorageMetadataPtr & metadata_snapshot_,
- const NameSet & required_result_columns_ = {},
- bool do_global_ = false,
+ const NameSet & required_result_columns_ = {},
+ bool do_global_ = false,
const SelectQueryOptions & options_ = {},
SubqueriesForSets subqueries_for_sets_ = {},
PreparedSets prepared_sets_ = {})
@@ -301,79 +301,79 @@ public:
, metadata_snapshot(metadata_snapshot_)
, required_result_columns(required_result_columns_)
, query_options(options_)
- {
- }
-
- /// Does the expression have aggregate functions or a GROUP BY or HAVING section.
- bool hasAggregation() const { return has_aggregation; }
+ {
+ }
+
+ /// Does the expression have aggregate functions or a GROUP BY or HAVING section.
+ bool hasAggregation() const { return has_aggregation; }
bool hasWindow() const { return !syntax->window_function_asts.empty(); }
- bool hasGlobalSubqueries() { return has_global_subqueries; }
- bool hasTableJoin() const { return syntax->ast_join; }
-
- const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
+ bool hasGlobalSubqueries() { return has_global_subqueries; }
+ bool hasTableJoin() const { return syntax->ast_join; }
+
+ const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
bool hasConstAggregationKeys() const { return has_const_aggregation_keys; }
- const AggregateDescriptions & aggregates() const { return aggregate_descriptions; }
-
- const PreparedSets & getPreparedSets() const { return prepared_sets; }
+ const AggregateDescriptions & aggregates() const { return aggregate_descriptions; }
+
+ const PreparedSets & getPreparedSets() const { return prepared_sets; }
std::unique_ptr<QueryPlan> getJoinedPlan();
-
- /// Tables that will need to be sent to remote servers for distributed query processing.
- const TemporaryTablesMapping & getExternalTables() const { return external_tables; }
-
+
+ /// Tables that will need to be sent to remote servers for distributed query processing.
+ const TemporaryTablesMapping & getExternalTables() const { return external_tables; }
+
ActionsDAGPtr simpleSelectActions();
-
- /// These appends are public only for tests
- void appendSelect(ExpressionActionsChain & chain, bool only_types);
- /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
+
+ /// These appends are public only for tests
+ void appendSelect(ExpressionActionsChain & chain, bool only_types);
+ /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const;
-
+
/// Create Set-s that we make from IN section to use index on them.
void makeSetsForIndex(const ASTPtr & node);
-private:
+private:
StorageMetadataPtr metadata_snapshot;
- /// If non-empty, ignore all expressions not from this list.
- NameSet required_result_columns;
- SelectQueryOptions query_options;
-
+ /// If non-empty, ignore all expressions not from this list.
+ NameSet required_result_columns;
+ SelectQueryOptions query_options;
+
JoinPtr makeTableJoin(
const ASTTablesInSelectQueryElement & join_element,
const ColumnsWithTypeAndName & left_sample_columns);
-
- const ASTSelectQuery * getAggregatingQuery() const;
-
- /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query.
- *
- * Example usage:
- * ExpressionActionsChain chain;
- * analyzer.appendWhere(chain);
- * chain.addStep();
- * analyzer.appendSelect(chain);
- * analyzer.appendOrderBy(chain);
- * chain.finalize();
- *
- * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way
- * shouldn't be executed, they are only needed to get a list of columns with their types.
- */
-
- /// Before aggregation:
+
+ const ASTSelectQuery * getAggregatingQuery() const;
+
+ /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query.
+ *
+ * Example usage:
+ * ExpressionActionsChain chain;
+ * analyzer.appendWhere(chain);
+ * chain.addStep();
+ * analyzer.appendSelect(chain);
+ * analyzer.appendOrderBy(chain);
+ * chain.finalize();
+ *
+ * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way
+ * shouldn't be executed, they are only needed to get a list of columns with their types.
+ */
+
+ /// Before aggregation:
ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types);
- bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types);
+ bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types);
JoinPtr appendJoin(ExpressionActionsChain & chain);
- /// remove_filter is set in ExpressionActionsChain::finalize();
- /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
+ /// remove_filter is set in ExpressionActionsChain::finalize();
+ /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);
- bool appendWhere(ExpressionActionsChain & chain, bool only_types);
- bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
- void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
+ bool appendWhere(ExpressionActionsChain & chain, bool only_types);
+ bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
+ void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
void appendWindowFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
-
- /// After aggregation:
- bool appendHaving(ExpressionActionsChain & chain, bool only_types);
- /// appendSelect
+
+ /// After aggregation:
+ bool appendHaving(ExpressionActionsChain & chain, bool only_types);
+ /// appendSelect
ActionsDAGPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &);
- bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
- /// appendProjectResult
-};
-
-}
+ bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
+ /// appendProjectResult
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h
index 2215402e1d..bbc8b67d4f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h
@@ -1,51 +1,51 @@
-#pragma once
-
-#include <memory>
-#include <vector>
-
-#include <Core/Names.h>
-#include <Columns/IColumn.h>
-
-namespace DB
-{
-
-class Block;
-struct ExtraBlock;
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <Core/Names.h>
+#include <Columns/IColumn.h>
+
+namespace DB
+{
+
+class Block;
+struct ExtraBlock;
using ExtraBlockPtr = std::shared_ptr<ExtraBlock>;
-
+
class TableJoin;
class NotJoinedBlocks;
-class IJoin
-{
-public:
- virtual ~IJoin() = default;
-
+class IJoin
+{
+public:
+ virtual ~IJoin() = default;
+
virtual const TableJoin & getTableJoin() const = 0;
- /// Add block of data from right hand of JOIN.
- /// @returns false, if some limit was exceeded and you should not insert more data.
- virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0;
-
- /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock).
- /// Could be called from different threads in parallel.
- virtual void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) = 0;
-
+ /// Add block of data from right hand of JOIN.
+ /// @returns false, if some limit was exceeded and you should not insert more data.
+ virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0;
+
+ /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock).
+ /// Could be called from different threads in parallel.
+ virtual void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) = 0;
+
/// Set/Get totals for right table
- virtual void setTotals(const Block & block) = 0;
+ virtual void setTotals(const Block & block) = 0;
virtual const Block & getTotals() const = 0;
-
- virtual size_t getTotalRowCount() const = 0;
- virtual size_t getTotalByteCount() const = 0;
+
+ virtual size_t getTotalRowCount() const = 0;
+ virtual size_t getTotalByteCount() const = 0;
virtual bool alwaysReturnsEmptySet() const = 0;
-
+
/// StorageJoin/Dictionary is already filled. No need to call addJoinedBlock.
/// Different query plan is used for such joins.
virtual bool isFilled() const { return false; }
virtual std::shared_ptr<NotJoinedBlocks> getNonJoinedBlocks(const Block &, UInt64) const = 0;
-};
-
-using JoinPtr = std::shared_ptr<IJoin>;
-
-}
+};
+
+using JoinPtr = std::shared_ptr<IJoin>;
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp
index a61b0d6dd9..16b59e6622 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.cpp
@@ -1,69 +1,69 @@
-#include "InternalTextLogsQueue.h"
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeEnum.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <common/logger_useful.h>
-
-#include <Poco/Message.h>
-
-
-namespace DB
-{
-
-InternalTextLogsQueue::InternalTextLogsQueue()
- : ConcurrentBoundedQueue<MutableColumns>(std::numeric_limits<int>::max()),
- max_priority(Poco::Message::Priority::PRIO_INFORMATION) {}
-
-
-Block InternalTextLogsQueue::getSampleBlock()
-{
- return Block {
- {std::make_shared<DataTypeDateTime>(), "event_time"},
- {std::make_shared<DataTypeUInt32>(), "event_time_microseconds"},
- {std::make_shared<DataTypeString>(), "host_name"},
- {std::make_shared<DataTypeString>(), "query_id"},
- {std::make_shared<DataTypeUInt64>(), "thread_id"},
- {std::make_shared<DataTypeInt8>(), "priority"},
- {std::make_shared<DataTypeString>(), "source"},
- {std::make_shared<DataTypeString>(), "text"}
- };
-}
-
-MutableColumns InternalTextLogsQueue::getSampleColumns()
-{
- static Block sample_block = getSampleBlock();
- return sample_block.cloneEmptyColumns();
-}
-
-void InternalTextLogsQueue::pushBlock(Block && log_block)
-{
- static Block sample_block = getSampleBlock();
-
- if (blocksHaveEqualStructure(sample_block, log_block))
- emplace(log_block.mutateColumns());
- else
- LOG_WARNING(&Poco::Logger::get("InternalTextLogsQueue"), "Log block have different structure");
-}
-
-const char * InternalTextLogsQueue::getPriorityName(int priority)
-{
- /// See Poco::Message::Priority
-
- static constexpr const char * const PRIORITIES[] =
- {
- "Unknown",
- "Fatal",
- "Critical",
- "Error",
- "Warning",
- "Notice",
- "Information",
- "Debug",
- "Trace"
- };
-
- return (priority >= 1 && priority <= 8) ? PRIORITIES[priority] : PRIORITIES[0];
-}
-
-}
+#include "InternalTextLogsQueue.h"
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <common/logger_useful.h>
+
+#include <Poco/Message.h>
+
+
+namespace DB
+{
+
+InternalTextLogsQueue::InternalTextLogsQueue()
+ : ConcurrentBoundedQueue<MutableColumns>(std::numeric_limits<int>::max()),
+ max_priority(Poco::Message::Priority::PRIO_INFORMATION) {}
+
+
+Block InternalTextLogsQueue::getSampleBlock()
+{
+ return Block {
+ {std::make_shared<DataTypeDateTime>(), "event_time"},
+ {std::make_shared<DataTypeUInt32>(), "event_time_microseconds"},
+ {std::make_shared<DataTypeString>(), "host_name"},
+ {std::make_shared<DataTypeString>(), "query_id"},
+ {std::make_shared<DataTypeUInt64>(), "thread_id"},
+ {std::make_shared<DataTypeInt8>(), "priority"},
+ {std::make_shared<DataTypeString>(), "source"},
+ {std::make_shared<DataTypeString>(), "text"}
+ };
+}
+
+MutableColumns InternalTextLogsQueue::getSampleColumns()
+{
+ static Block sample_block = getSampleBlock();
+ return sample_block.cloneEmptyColumns();
+}
+
+void InternalTextLogsQueue::pushBlock(Block && log_block)
+{
+ static Block sample_block = getSampleBlock();
+
+ if (blocksHaveEqualStructure(sample_block, log_block))
+ emplace(log_block.mutateColumns());
+ else
+ LOG_WARNING(&Poco::Logger::get("InternalTextLogsQueue"), "Log block have different structure");
+}
+
+const char * InternalTextLogsQueue::getPriorityName(int priority)
+{
+ /// See Poco::Message::Priority
+
+ static constexpr const char * const PRIORITIES[] =
+ {
+ "Unknown",
+ "Fatal",
+ "Critical",
+ "Error",
+ "Warning",
+ "Notice",
+ "Information",
+ "Debug",
+ "Trace"
+ };
+
+ return (priority >= 1 && priority <= 8) ? PRIORITIES[priority] : PRIORITIES[0];
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h
index 28841598d3..af3f0c624f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/InternalTextLogsQueue.h
@@ -1,31 +1,31 @@
-#pragma once
-#include <Common/ConcurrentBoundedQueue.h>
-#include <Core/Block.h>
-
-
-namespace DB
-{
-
-class InternalTextLogsQueue : public ConcurrentBoundedQueue<MutableColumns>
-{
-public:
- /// You should not push logs in the queue if their priority greater max_priority
- int max_priority;
-
- InternalTextLogsQueue();
-
- static Block getSampleBlock();
- static MutableColumns getSampleColumns();
-
- /// Is used to pass block from remote server to the client
- void pushBlock(Block && log_block);
-
- /// Converts priority from Poco::Message::Priority to a string
- static const char * getPriorityName(int priority);
-};
-
-using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>;
-
-}
-
-
+#pragma once
+#include <Common/ConcurrentBoundedQueue.h>
+#include <Core/Block.h>
+
+
+namespace DB
+{
+
+class InternalTextLogsQueue : public ConcurrentBoundedQueue<MutableColumns>
+{
+public:
+ /// You should not push logs in the queue if their priority greater max_priority
+ int max_priority;
+
+ InternalTextLogsQueue();
+
+ static Block getSampleBlock();
+ static MutableColumns getSampleColumns();
+
+ /// Is used to pass block from remote server to the client
+ void pushBlock(Block && log_block);
+
+ /// Converts priority from Poco::Message::Priority to a string
+ static const char * getPriorityName(int priority);
+};
+
+using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>;
+
+}
+
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h
index f486752e19..e6043473d4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h
@@ -1,71 +1,71 @@
-#pragma once
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <DataTypes/IDataType.h>
+#include <memory>
+#include <unordered_map>
+#include <DataTypes/DataTypeLowCardinality.h>
+
-#include <Parsers/IAST.h>
-#include <DataTypes/IDataType.h>
-#include <memory>
-#include <unordered_map>
-#include <DataTypes/DataTypeLowCardinality.h>
-
-
-namespace DB
-{
-
-struct PreparedSetKey
-{
- /// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired
- /// data types of set elements (two different Sets can be required for two tuples with the same contents
- /// if left hand sides of the IN operators have different types).
- static PreparedSetKey forLiteral(const IAST & ast, DataTypes types_)
- {
+namespace DB
+{
+
+struct PreparedSetKey
+{
+ /// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired
+ /// data types of set elements (two different Sets can be required for two tuples with the same contents
+ /// if left hand sides of the IN operators have different types).
+ static PreparedSetKey forLiteral(const IAST & ast, DataTypes types_)
+ {
/// Remove LowCardinality types from type list because Set doesn't support LowCardinality keys now,
- /// just converts LowCardinality to ordinary types.
- for (auto & type : types_)
- type = recursiveRemoveLowCardinality(type);
-
- PreparedSetKey key;
- key.ast_hash = ast.getTreeHash();
- key.types = std::move(types_);
- return key;
- }
-
- /// Prepared sets for subqueries are indexed only by the AST contents because the type of the resulting
- /// set is fully determined by the subquery.
- static PreparedSetKey forSubquery(const IAST & ast)
- {
- PreparedSetKey key;
- key.ast_hash = ast.getTreeHash();
- return key;
- }
-
- IAST::Hash ast_hash;
- DataTypes types; /// Empty for subqueries.
-
- bool operator==(const PreparedSetKey & other) const
- {
- if (ast_hash != other.ast_hash)
- return false;
-
- if (types.size() != other.types.size())
- return false;
-
- for (size_t i = 0; i < types.size(); ++i)
- {
- if (!types[i]->equals(*other.types[i]))
- return false;
- }
-
- return true;
- }
-
- struct Hash
- {
- UInt64 operator()(const PreparedSetKey & key) const { return key.ast_hash.first; }
- };
-};
-
-class Set;
-using SetPtr = std::shared_ptr<Set>;
-
-using PreparedSets = std::unordered_map<PreparedSetKey, SetPtr, PreparedSetKey::Hash>;
-
-}
+ /// just converts LowCardinality to ordinary types.
+ for (auto & type : types_)
+ type = recursiveRemoveLowCardinality(type);
+
+ PreparedSetKey key;
+ key.ast_hash = ast.getTreeHash();
+ key.types = std::move(types_);
+ return key;
+ }
+
+ /// Prepared sets for subqueries are indexed only by the AST contents because the type of the resulting
+ /// set is fully determined by the subquery.
+ static PreparedSetKey forSubquery(const IAST & ast)
+ {
+ PreparedSetKey key;
+ key.ast_hash = ast.getTreeHash();
+ return key;
+ }
+
+ IAST::Hash ast_hash;
+ DataTypes types; /// Empty for subqueries.
+
+ bool operator==(const PreparedSetKey & other) const
+ {
+ if (ast_hash != other.ast_hash)
+ return false;
+
+ if (types.size() != other.types.size())
+ return false;
+
+ for (size_t i = 0; i < types.size(); ++i)
+ {
+ if (!types[i]->equals(*other.types[i]))
+ return false;
+ }
+
+ return true;
+ }
+
+ struct Hash
+ {
+ UInt64 operator()(const PreparedSetKey & key) const { return key.ast_hash.first; }
+ };
+};
+
+class Set;
+using SetPtr = std::shared_ptr<Set>;
+
+using PreparedSets = std::unordered_map<PreparedSetKey, SetPtr, PreparedSetKey::Hash>;
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp
index 2e8f986ca6..fe9b6806d8 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.cpp
@@ -1,43 +1,43 @@
-#include "ProfileEventsExt.h"
-#include <Common/typeid_cast.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnArray.h>
+#include "ProfileEventsExt.h"
+#include <Common/typeid_cast.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeArray.h>
-
-namespace ProfileEvents
-{
-
-/// Put implementation here to avoid extra linking dependencies for clickhouse_common_io
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
+
+namespace ProfileEvents
+{
+
+/// Put implementation here to avoid extra linking dependencies for clickhouse_common_io
void dumpToMapColumn(const Counters & counters, DB::IColumn * column, bool nonzero_only)
-{
+{
auto * column_map = column ? &typeid_cast<DB::ColumnMap &>(*column) : nullptr;
if (!column_map)
return;
-
+
auto & offsets = column_map->getNestedColumn().getOffsets();
auto & tuple_column = column_map->getNestedData();
auto & key_column = tuple_column.getColumn(0);
auto & value_column = tuple_column.getColumn(1);
- size_t size = 0;
- for (Event event = 0; event < Counters::num_counters; ++event)
- {
- UInt64 value = counters[event].load(std::memory_order_relaxed);
-
- if (nonzero_only && 0 == value)
- continue;
-
+ size_t size = 0;
+ for (Event event = 0; event < Counters::num_counters; ++event)
+ {
+ UInt64 value = counters[event].load(std::memory_order_relaxed);
+
+ if (nonzero_only && 0 == value)
+ continue;
+
const char * desc = ProfileEvents::getName(event);
key_column.insertData(desc, strlen(desc));
value_column.insert(value);
size++;
- }
-
+ }
+
offsets.push_back(offsets.back() + size);
-}
-
-}
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h
index 7d513f0cd0..56f5c81bc4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ProfileEventsExt.h
@@ -1,12 +1,12 @@
-#pragma once
-#include <Common/ProfileEvents.h>
-#include <Columns/IColumn.h>
-
-
-namespace ProfileEvents
-{
-
+#pragma once
+#include <Common/ProfileEvents.h>
+#include <Columns/IColumn.h>
+
+
+namespace ProfileEvents
+{
+
/// Dumps profile events to columns Map(String, UInt64)
void dumpToMapColumn(const Counters & counters, DB::IColumn * column, bool nonzero_only = true);
-
-}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp
index 2cbb963444..fc3226743f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.cpp
@@ -2,43 +2,43 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
-#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/ProfileEventsExt.h>
-#include <Interpreters/QueryLog.h>
-#include <Poco/Net/IPAddress.h>
+#include <Interpreters/QueryLog.h>
+#include <Poco/Net/IPAddress.h>
#include <Common/ClickHouseRevision.h>
#include <Common/IPv6ToBinary.h>
#include <Common/ProfileEvents.h>
#include <Common/typeid_cast.h>
-
-
-namespace DB
-{
-
+
+
+namespace DB
+{
+
NamesAndTypesList QueryLogElement::getNamesAndTypes()
-{
- auto query_status_datatype = std::make_shared<DataTypeEnum8>(
- DataTypeEnum8::Values
- {
- {"QueryStart", static_cast<Int8>(QUERY_START)},
- {"QueryFinish", static_cast<Int8>(QUERY_FINISH)},
- {"ExceptionBeforeStart", static_cast<Int8>(EXCEPTION_BEFORE_START)},
- {"ExceptionWhileProcessing", static_cast<Int8>(EXCEPTION_WHILE_PROCESSING)}
- });
-
- return
- {
+{
+ auto query_status_datatype = std::make_shared<DataTypeEnum8>(
+ DataTypeEnum8::Values
+ {
+ {"QueryStart", static_cast<Int8>(QUERY_START)},
+ {"QueryFinish", static_cast<Int8>(QUERY_FINISH)},
+ {"ExceptionBeforeStart", static_cast<Int8>(EXCEPTION_BEFORE_START)},
+ {"ExceptionWhileProcessing", static_cast<Int8>(EXCEPTION_WHILE_PROCESSING)}
+ });
+
+ return
+ {
{"type", std::move(query_status_datatype)},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
@@ -46,7 +46,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"query_start_time", std::make_shared<DataTypeDateTime>()},
{"query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"query_duration_ms", std::make_shared<DataTypeUInt64>()},
-
+
{"read_rows", std::make_shared<DataTypeUInt64>()},
{"read_bytes", std::make_shared<DataTypeUInt64>()},
{"written_rows", std::make_shared<DataTypeUInt64>()},
@@ -54,7 +54,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"result_rows", std::make_shared<DataTypeUInt64>()},
{"result_bytes", std::make_shared<DataTypeUInt64>()},
{"memory_usage", std::make_shared<DataTypeUInt64>()},
-
+
{"current_database", std::make_shared<DataTypeString>()},
{"query", std::make_shared<DataTypeString>()},
{"formatted_query", std::make_shared<DataTypeString>()},
@@ -73,7 +73,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"exception_code", std::make_shared<DataTypeInt32>()},
{"exception", std::make_shared<DataTypeString>()},
{"stack_trace", std::make_shared<DataTypeString>()},
-
+
{"is_initial_query", std::make_shared<DataTypeUInt8>()},
{"user", std::make_shared<DataTypeString>()},
{"query_id", std::make_shared<DataTypeString>()},
@@ -98,9 +98,9 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"http_referer", std::make_shared<DataTypeString>()},
{"forwarded_for", std::make_shared<DataTypeString>()},
{"quota_key", std::make_shared<DataTypeString>()},
-
+
{"revision", std::make_shared<DataTypeUInt32>()},
-
+
{"log_comment", std::make_shared<DataTypeString>()},
{"thread_ids", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
@@ -116,10 +116,10 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"used_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_storages", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_table_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}
- };
-
-}
+ };
+}
+
NamesAndAliases QueryLogElement::getNamesAndAliases()
{
return
@@ -130,30 +130,30 @@ NamesAndAliases QueryLogElement::getNamesAndAliases()
{"Settings.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapValues(Settings)"}
};
}
-
-void QueryLogElement::appendToBlock(MutableColumns & columns) const
-{
- size_t i = 0;
-
- columns[i++]->insert(type);
+
+void QueryLogElement::appendToBlock(MutableColumns & columns) const
+{
+ size_t i = 0;
+
+ columns[i++]->insert(type);
columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType());
- columns[i++]->insert(event_time);
+ columns[i++]->insert(event_time);
columns[i++]->insert(event_time_microseconds);
- columns[i++]->insert(query_start_time);
+ columns[i++]->insert(query_start_time);
columns[i++]->insert(query_start_time_microseconds);
- columns[i++]->insert(query_duration_ms);
-
- columns[i++]->insert(read_rows);
- columns[i++]->insert(read_bytes);
- columns[i++]->insert(written_rows);
- columns[i++]->insert(written_bytes);
- columns[i++]->insert(result_rows);
- columns[i++]->insert(result_bytes);
-
- columns[i++]->insert(memory_usage);
-
+ columns[i++]->insert(query_duration_ms);
+
+ columns[i++]->insert(read_rows);
+ columns[i++]->insert(read_bytes);
+ columns[i++]->insert(written_rows);
+ columns[i++]->insert(written_bytes);
+ columns[i++]->insert(result_rows);
+ columns[i++]->insert(result_bytes);
+
+ columns[i++]->insert(memory_usage);
+
columns[i++]->insertData(current_database.data(), current_database.size());
- columns[i++]->insertData(query.data(), query.size());
+ columns[i++]->insertData(query.data(), query.size());
columns[i++]->insertData(formatted_query.data(), formatted_query.size());
columns[i++]->insert(normalized_query_hash);
columns[i++]->insertData(query_kind.data(), query_kind.size());
@@ -184,43 +184,43 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
fill_column(query_views, column_views);
}
- columns[i++]->insert(exception_code);
- columns[i++]->insertData(exception.data(), exception.size());
- columns[i++]->insertData(stack_trace.data(), stack_trace.size());
-
- appendClientInfo(client_info, columns, i);
-
+ columns[i++]->insert(exception_code);
+ columns[i++]->insertData(exception.data(), exception.size());
+ columns[i++]->insertData(stack_trace.data(), stack_trace.size());
+
+ appendClientInfo(client_info, columns, i);
+
columns[i++]->insert(ClickHouseRevision::getVersionRevision());
-
+
columns[i++]->insertData(log_comment.data(), log_comment.size());
- {
- Array threads_array;
- threads_array.reserve(thread_ids.size());
- for (const UInt64 thread_id : thread_ids)
- threads_array.emplace_back(thread_id);
- columns[i++]->insert(threads_array);
- }
-
- if (profile_counters)
- {
+ {
+ Array threads_array;
+ threads_array.reserve(thread_ids.size());
+ for (const UInt64 thread_id : thread_ids)
+ threads_array.emplace_back(thread_id);
+ columns[i++]->insert(threads_array);
+ }
+
+ if (profile_counters)
+ {
auto * column = columns[i++].get();
ProfileEvents::dumpToMapColumn(*profile_counters, column, true);
- }
- else
- {
- columns[i++]->insertDefault();
- }
-
- if (query_settings)
- {
+ }
+ else
+ {
+ columns[i++]->insertDefault();
+ }
+
+ if (query_settings)
+ {
auto * column = columns[i++].get();
query_settings->dumpToMapColumn(column, true);
- }
- else
- {
- columns[i++]->insertDefault();
- }
+ }
+ else
+ {
+ columns[i++]->insertDefault();
+ }
{
auto & column_aggregate_function_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
@@ -255,39 +255,39 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
fill_column(used_storages, column_storage_factory_objects);
fill_column(used_table_functions, column_table_function_factory_objects);
}
-}
-
-void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i)
-{
- columns[i++]->insert(client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY);
-
- columns[i++]->insert(client_info.current_user);
- columns[i++]->insert(client_info.current_query_id);
- columns[i++]->insertData(IPv6ToBinary(client_info.current_address.host()).data(), 16);
- columns[i++]->insert(client_info.current_address.port());
-
- columns[i++]->insert(client_info.initial_user);
- columns[i++]->insert(client_info.initial_query_id);
- columns[i++]->insertData(IPv6ToBinary(client_info.initial_address.host()).data(), 16);
- columns[i++]->insert(client_info.initial_address.port());
+}
+
+void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i)
+{
+ columns[i++]->insert(client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY);
+
+ columns[i++]->insert(client_info.current_user);
+ columns[i++]->insert(client_info.current_query_id);
+ columns[i++]->insertData(IPv6ToBinary(client_info.current_address.host()).data(), 16);
+ columns[i++]->insert(client_info.current_address.port());
+
+ columns[i++]->insert(client_info.initial_user);
+ columns[i++]->insert(client_info.initial_query_id);
+ columns[i++]->insertData(IPv6ToBinary(client_info.initial_address.host()).data(), 16);
+ columns[i++]->insert(client_info.initial_address.port());
columns[i++]->insert(client_info.initial_query_start_time);
columns[i++]->insert(client_info.initial_query_start_time_microseconds);
-
- columns[i++]->insert(UInt64(client_info.interface));
-
- columns[i++]->insert(client_info.os_user);
- columns[i++]->insert(client_info.client_hostname);
- columns[i++]->insert(client_info.client_name);
+
+ columns[i++]->insert(UInt64(client_info.interface));
+
+ columns[i++]->insert(client_info.os_user);
+ columns[i++]->insert(client_info.client_hostname);
+ columns[i++]->insert(client_info.client_name);
columns[i++]->insert(client_info.client_tcp_protocol_version);
- columns[i++]->insert(client_info.client_version_major);
- columns[i++]->insert(client_info.client_version_minor);
- columns[i++]->insert(client_info.client_version_patch);
-
- columns[i++]->insert(UInt64(client_info.http_method));
- columns[i++]->insert(client_info.http_user_agent);
+ columns[i++]->insert(client_info.client_version_major);
+ columns[i++]->insert(client_info.client_version_minor);
+ columns[i++]->insert(client_info.client_version_patch);
+
+ columns[i++]->insert(UInt64(client_info.http_method));
+ columns[i++]->insert(client_info.http_user_agent);
columns[i++]->insert(client_info.http_referer);
columns[i++]->insert(client_info.forwarded_for);
-
- columns[i++]->insert(client_info.quota_key);
-}
-}
+
+ columns[i++]->insert(client_info.quota_key);
+}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h
index 2713febe1b..93c8388f9d 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryLog.h
@@ -1,59 +1,59 @@
-#pragma once
-
+#pragma once
+
#include <Core/NamesAndAliases.h>
-#include <Interpreters/SystemLog.h>
-#include <Interpreters/ClientInfo.h>
-
-namespace ProfileEvents
-{
- class Counters;
-}
-
-
-namespace DB
-{
-
-
-/** Allows to log information about queries execution:
- * - info about start of query execution;
- * - performance metrics (are set at the end of query execution);
- * - info about errors of query execution.
- */
-
-/// A struct which will be inserted as row into query_log table
-struct QueryLogElement
-{
- using Type = QueryLogElementType;
-
- Type type = QUERY_START;
-
- /// Depending on the type of query and type of stage, not all the fields may be filled.
-
- time_t event_time{};
+#include <Interpreters/SystemLog.h>
+#include <Interpreters/ClientInfo.h>
+
+namespace ProfileEvents
+{
+ class Counters;
+}
+
+
+namespace DB
+{
+
+
+/** Allows to log information about queries execution:
+ * - info about start of query execution;
+ * - performance metrics (are set at the end of query execution);
+ * - info about errors of query execution.
+ */
+
+/// A struct which will be inserted as row into query_log table
+struct QueryLogElement
+{
+ using Type = QueryLogElementType;
+
+ Type type = QUERY_START;
+
+ /// Depending on the type of query and type of stage, not all the fields may be filled.
+
+ time_t event_time{};
Decimal64 event_time_microseconds{};
- time_t query_start_time{};
+ time_t query_start_time{};
Decimal64 query_start_time_microseconds{};
- UInt64 query_duration_ms{};
-
- /// The data fetched from DB to execute the query
- UInt64 read_rows{};
- UInt64 read_bytes{};
-
- /// The data written to DB
- UInt64 written_rows{};
- UInt64 written_bytes{};
-
- /// The data sent to the client
- UInt64 result_rows{};
- UInt64 result_bytes{};
-
- UInt64 memory_usage{};
-
+ UInt64 query_duration_ms{};
+
+ /// The data fetched from DB to execute the query
+ UInt64 read_rows{};
+ UInt64 read_bytes{};
+
+ /// The data written to DB
+ UInt64 written_rows{};
+ UInt64 written_bytes{};
+
+ /// The data sent to the client
+ UInt64 result_rows{};
+ UInt64 result_bytes{};
+
+ UInt64 memory_usage{};
+
String current_database;
- String query;
+ String query;
String formatted_query;
UInt64 normalized_query_hash{};
-
+
String query_kind;
std::set<String> query_databases;
std::set<String> query_tables;
@@ -71,32 +71,32 @@ struct QueryLogElement
std::unordered_set<String> used_storages;
std::unordered_set<String> used_table_functions;
- Int32 exception_code{}; // because ErrorCodes are int
- String exception;
- String stack_trace;
-
- ClientInfo client_info;
-
+ Int32 exception_code{}; // because ErrorCodes are int
+ String exception;
+ String stack_trace;
+
+ ClientInfo client_info;
+
String log_comment;
- std::vector<UInt64> thread_ids;
- std::shared_ptr<ProfileEvents::Counters> profile_counters;
- std::shared_ptr<Settings> query_settings;
-
- static std::string name() { return "QueryLog"; }
-
+ std::vector<UInt64> thread_ids;
+ std::shared_ptr<ProfileEvents::Counters> profile_counters;
+ std::shared_ptr<Settings> query_settings;
+
+ static std::string name() { return "QueryLog"; }
+
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases();
- void appendToBlock(MutableColumns & columns) const;
-
- static void appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i);
-};
-
-
-/// Instead of typedef - to allow forward declaration.
-class QueryLog : public SystemLog<QueryLogElement>
-{
- using SystemLog<QueryLogElement>::SystemLog;
-};
-
-}
+ void appendToBlock(MutableColumns & columns) const;
+
+ static void appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i);
+};
+
+
+/// Instead of typedef - to allow forward declaration.
+class QueryLog : public SystemLog<QueryLogElement>
+{
+ using SystemLog<QueryLogElement>::SystemLog;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp
index 7ca3c10045..0c9a6ab316 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.cpp
@@ -1,27 +1,27 @@
-#include "QueryThreadLog.h"
+#include "QueryThreadLog.h"
#include <array>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
-#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeMap.h>
-#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeFactory.h>
-#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/ProfileEventsExt.h>
-#include <Interpreters/QueryLog.h>
+#include <Interpreters/QueryLog.h>
#include <Poco/Net/IPAddress.h>
-#include <Common/ClickHouseRevision.h>
-
-
-namespace DB
-{
+#include <Common/ClickHouseRevision.h>
+
+
+namespace DB
+{
NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
-{
+{
return {
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
@@ -29,21 +29,21 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
{"query_start_time", std::make_shared<DataTypeDateTime>()},
{"query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"query_duration_ms", std::make_shared<DataTypeUInt64>()},
-
+
{"read_rows", std::make_shared<DataTypeUInt64>()},
{"read_bytes", std::make_shared<DataTypeUInt64>()},
{"written_rows", std::make_shared<DataTypeUInt64>()},
{"written_bytes", std::make_shared<DataTypeUInt64>()},
{"memory_usage", std::make_shared<DataTypeInt64>()},
{"peak_memory_usage", std::make_shared<DataTypeInt64>()},
-
+
{"thread_name", std::make_shared<DataTypeString>()},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{"master_thread_id", std::make_shared<DataTypeUInt64>()},
{"current_database", std::make_shared<DataTypeString>()},
{"query", std::make_shared<DataTypeString>()},
{"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
-
+
{"is_initial_query", std::make_shared<DataTypeUInt8>()},
{"user", std::make_shared<DataTypeString>()},
{"query_id", std::make_shared<DataTypeString>()},
@@ -68,13 +68,13 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
{"http_referer", std::make_shared<DataTypeString>()},
{"forwarded_for", std::make_shared<DataTypeString>()},
{"quota_key", std::make_shared<DataTypeString>()},
-
+
{"revision", std::make_shared<DataTypeUInt32>()},
-
+
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
- };
-}
-
+ };
+}
+
NamesAndAliases QueryThreadLogElement::getNamesAndAliases()
{
return
@@ -84,46 +84,46 @@ NamesAndAliases QueryThreadLogElement::getNamesAndAliases()
};
}
-void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const
-{
- size_t i = 0;
-
+void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const
+{
+ size_t i = 0;
+
columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType());
- columns[i++]->insert(event_time);
+ columns[i++]->insert(event_time);
columns[i++]->insert(event_time_microseconds);
- columns[i++]->insert(query_start_time);
+ columns[i++]->insert(query_start_time);
columns[i++]->insert(query_start_time_microseconds);
- columns[i++]->insert(query_duration_ms);
-
- columns[i++]->insert(read_rows);
- columns[i++]->insert(read_bytes);
- columns[i++]->insert(written_rows);
- columns[i++]->insert(written_bytes);
-
- columns[i++]->insert(memory_usage);
- columns[i++]->insert(peak_memory_usage);
-
- columns[i++]->insertData(thread_name.data(), thread_name.size());
- columns[i++]->insert(thread_id);
- columns[i++]->insert(master_thread_id);
-
+ columns[i++]->insert(query_duration_ms);
+
+ columns[i++]->insert(read_rows);
+ columns[i++]->insert(read_bytes);
+ columns[i++]->insert(written_rows);
+ columns[i++]->insert(written_bytes);
+
+ columns[i++]->insert(memory_usage);
+ columns[i++]->insert(peak_memory_usage);
+
+ columns[i++]->insertData(thread_name.data(), thread_name.size());
+ columns[i++]->insert(thread_id);
+ columns[i++]->insert(master_thread_id);
+
columns[i++]->insertData(current_database.data(), current_database.size());
- columns[i++]->insertData(query.data(), query.size());
+ columns[i++]->insertData(query.data(), query.size());
columns[i++]->insert(normalized_query_hash);
-
- QueryLogElement::appendClientInfo(client_info, columns, i);
-
+
+ QueryLogElement::appendClientInfo(client_info, columns, i);
+
columns[i++]->insert(ClickHouseRevision::getVersionRevision());
-
- if (profile_counters)
- {
+
+ if (profile_counters)
+ {
auto * column = columns[i++].get();
ProfileEvents::dumpToMapColumn(*profile_counters, column, true);
- }
- else
- {
- columns[i++]->insertDefault();
- }
-}
-
-}
+ }
+ else
+ {
+ columns[i++]->insertDefault();
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h
index 57e93edbaf..a2585d7814 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryThreadLog.h
@@ -1,66 +1,66 @@
-#pragma once
-
-#include <Interpreters/SystemLog.h>
-#include <Interpreters/ClientInfo.h>
-
-
-namespace ProfileEvents
-{
- class Counters;
-}
-
-
-namespace DB
-{
-
-struct QueryThreadLogElement
-{
- time_t event_time{};
+#pragma once
+
+#include <Interpreters/SystemLog.h>
+#include <Interpreters/ClientInfo.h>
+
+
+namespace ProfileEvents
+{
+ class Counters;
+}
+
+
+namespace DB
+{
+
+struct QueryThreadLogElement
+{
+ time_t event_time{};
Decimal64 event_time_microseconds{};
- /// When query was attached to current thread
- time_t query_start_time{};
+ /// When query was attached to current thread
+ time_t query_start_time{};
/// same as above but adds microsecond precision
Decimal64 query_start_time_microseconds{};
- /// Real time spent by the thread to execute the query
- UInt64 query_duration_ms{};
-
- /// The data fetched from DB in current thread to execute the query
- UInt64 read_rows{};
- UInt64 read_bytes{};
-
- /// The data written to DB
- UInt64 written_rows{};
- UInt64 written_bytes{};
-
- Int64 memory_usage{};
- Int64 peak_memory_usage{};
-
- String thread_name;
- UInt64 thread_id{};
- UInt64 master_thread_id{};
-
+ /// Real time spent by the thread to execute the query
+ UInt64 query_duration_ms{};
+
+ /// The data fetched from DB in current thread to execute the query
+ UInt64 read_rows{};
+ UInt64 read_bytes{};
+
+ /// The data written to DB
+ UInt64 written_rows{};
+ UInt64 written_bytes{};
+
+ Int64 memory_usage{};
+ Int64 peak_memory_usage{};
+
+ String thread_name;
+ UInt64 thread_id{};
+ UInt64 master_thread_id{};
+
String current_database;
- String query;
+ String query;
UInt64 normalized_query_hash{};
- ClientInfo client_info;
-
- std::shared_ptr<ProfileEvents::Counters> profile_counters;
-
- static std::string name() { return "QueryThreadLog"; }
-
+ ClientInfo client_info;
+
+ std::shared_ptr<ProfileEvents::Counters> profile_counters;
+
+ static std::string name() { return "QueryThreadLog"; }
+
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases();
- void appendToBlock(MutableColumns & columns) const;
-};
-
-
-class QueryThreadLog : public SystemLog<QueryThreadLogElement>
-{
- using SystemLog<QueryThreadLogElement>::SystemLog;
-};
-
-
-}
-
-
+ void appendToBlock(MutableColumns & columns) const;
+};
+
+
+class QueryThreadLog : public SystemLog<QueryThreadLogElement>
+{
+ using SystemLog<QueryThreadLogElement>::SystemLog;
+};
+
+
+}
+
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h
index 709ecdc239..a5173c872d 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h
@@ -1,38 +1,38 @@
-#pragma once
-
-#include <Core/QueryProcessingStage.h>
+#pragma once
+
+#include <Core/QueryProcessingStage.h>
#include <optional>
-
-namespace DB
-{
-
-/**
- * to_stage
- * - the stage to which the query is to be executed. By default - till to the end.
- * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
- *
- * subquery_depth
- * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
- * for INSERT SELECT, a value 1 is passed instead of 0.
- *
- * only_analyze
- * - the object was created only for query analysis.
- *
- * is_subquery
- * - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results.
+
+namespace DB
+{
+
+/**
+ * to_stage
+ * - the stage to which the query is to be executed. By default - till to the end.
+ * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
+ *
+ * subquery_depth
+ * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
+ * for INSERT SELECT, a value 1 is passed instead of 0.
+ *
+ * only_analyze
+ * - the object was created only for query analysis.
+ *
+ * is_subquery
+ * - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results.
*
* is_internal
* - the object was created only for internal queries.
- */
-struct SelectQueryOptions
-{
- QueryProcessingStage::Enum to_stage;
- size_t subquery_depth;
- bool only_analyze = false;
- bool modify_inplace = false;
- bool remove_duplicates = false;
- bool ignore_quota = false;
- bool ignore_limits = false;
+ */
+struct SelectQueryOptions
+{
+ QueryProcessingStage::Enum to_stage;
+ size_t subquery_depth;
+ bool only_analyze = false;
+ bool modify_inplace = false;
+ bool remove_duplicates = false;
+ bool ignore_quota = false;
+ bool ignore_limits = false;
/// This flag is needed to analyze query ignoring table projections.
/// It is needed because we build another one InterpreterSelectQuery while analyzing projections.
/// It helps to avoid infinite recursion.
@@ -45,7 +45,7 @@ struct SelectQueryOptions
bool is_internal = false;
bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select
bool with_all_cols = false; /// asterisk include materialized and aliased columns
-
+
/// These two fields are used to evaluate shardNum() and shardCount() function when
/// prefer_localhost_replica == 1 and local instance is selected. They are needed because local
/// instance might have multiple shards and scalars can only hold one value.
@@ -58,49 +58,49 @@ struct SelectQueryOptions
bool is_subquery_ = false)
: to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_)
{}
-
- SelectQueryOptions copy() const { return *this; }
-
- SelectQueryOptions subquery() const
- {
- SelectQueryOptions out = *this;
- out.to_stage = QueryProcessingStage::Complete;
- ++out.subquery_depth;
+
+ SelectQueryOptions copy() const { return *this; }
+
+ SelectQueryOptions subquery() const
+ {
+ SelectQueryOptions out = *this;
+ out.to_stage = QueryProcessingStage::Complete;
+ ++out.subquery_depth;
out.is_subquery = true;
- return out;
- }
-
- SelectQueryOptions & analyze(bool dry_run = true)
- {
- only_analyze = dry_run;
- return *this;
- }
-
- SelectQueryOptions & modify(bool value = true)
- {
- modify_inplace = value;
- return *this;
- }
-
- SelectQueryOptions & noModify() { return modify(false); }
-
- SelectQueryOptions & removeDuplicates(bool value = true)
- {
- remove_duplicates = value;
- return *this;
- }
-
- SelectQueryOptions & noSubquery()
- {
- subquery_depth = 0;
- return *this;
- }
-
- SelectQueryOptions & ignoreLimits(bool value = true)
- {
- ignore_limits = value;
- return *this;
- }
+ return out;
+ }
+
+ SelectQueryOptions & analyze(bool dry_run = true)
+ {
+ only_analyze = dry_run;
+ return *this;
+ }
+
+ SelectQueryOptions & modify(bool value = true)
+ {
+ modify_inplace = value;
+ return *this;
+ }
+
+ SelectQueryOptions & noModify() { return modify(false); }
+
+ SelectQueryOptions & removeDuplicates(bool value = true)
+ {
+ remove_duplicates = value;
+ return *this;
+ }
+
+ SelectQueryOptions & noSubquery()
+ {
+ subquery_depth = 0;
+ return *this;
+ }
+
+ SelectQueryOptions & ignoreLimits(bool value = true)
+ {
+ ignore_limits = value;
+ return *this;
+ }
SelectQueryOptions & ignoreProjections(bool value = true)
{
@@ -138,6 +138,6 @@ struct SelectQueryOptions
shard_count = shard_count_;
return *this;
}
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp
index bfb969cab2..0b3d63100a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp
@@ -1,84 +1,84 @@
-#include <Interpreters/StorageID.h>
-#include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Common/quoteString.h>
-#include <IO/WriteHelpers.h>
+#include <Interpreters/StorageID.h>
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Common/quoteString.h>
+#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
-#include <Interpreters/DatabaseAndTableWithAlias.h>
+#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Poco/Util/AbstractConfiguration.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int UNKNOWN_DATABASE;
-}
-
-StorageID::StorageID(const ASTQueryWithTableAndOutput & query)
-{
- database_name = query.database;
- table_name = query.table;
- uuid = query.uuid;
- assertNotEmpty();
-}
-
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int UNKNOWN_DATABASE;
+}
+
+StorageID::StorageID(const ASTQueryWithTableAndOutput & query)
+{
+ database_name = query.database;
+ table_name = query.table;
+ uuid = query.uuid;
+ assertNotEmpty();
+}
+
StorageID::StorageID(const ASTTableIdentifier & table_identifier_node)
-{
- DatabaseAndTableWithAlias database_table(table_identifier_node);
- database_name = database_table.database;
- table_name = database_table.table;
- uuid = database_table.uuid;
- assertNotEmpty();
-}
-
-StorageID::StorageID(const ASTPtr & node)
-{
+{
+ DatabaseAndTableWithAlias database_table(table_identifier_node);
+ database_name = database_table.database;
+ table_name = database_table.table;
+ uuid = database_table.uuid;
+ assertNotEmpty();
+}
+
+StorageID::StorageID(const ASTPtr & node)
+{
if (const auto * identifier = node->as<ASTTableIdentifier>())
- *this = StorageID(*identifier);
- else if (const auto * simple_query = dynamic_cast<const ASTQueryWithTableAndOutput *>(node.get()))
- *this = StorageID(*simple_query);
- else
- throw Exception("Unexpected AST", ErrorCodes::LOGICAL_ERROR);
-}
-
-String StorageID::getTableName() const
-{
- assertNotEmpty();
- return table_name;
-}
-
-String StorageID::getDatabaseName() const
-{
- assertNotEmpty();
- if (database_name.empty())
- throw Exception("Database name is empty", ErrorCodes::UNKNOWN_DATABASE);
- return database_name;
-}
-
-String StorageID::getNameForLogs() const
-{
- assertNotEmpty();
- return (database_name.empty() ? "" : backQuoteIfNeed(database_name) + ".") + backQuoteIfNeed(table_name)
+ *this = StorageID(*identifier);
+ else if (const auto * simple_query = dynamic_cast<const ASTQueryWithTableAndOutput *>(node.get()))
+ *this = StorageID(*simple_query);
+ else
+ throw Exception("Unexpected AST", ErrorCodes::LOGICAL_ERROR);
+}
+
+String StorageID::getTableName() const
+{
+ assertNotEmpty();
+ return table_name;
+}
+
+String StorageID::getDatabaseName() const
+{
+ assertNotEmpty();
+ if (database_name.empty())
+ throw Exception("Database name is empty", ErrorCodes::UNKNOWN_DATABASE);
+ return database_name;
+}
+
+String StorageID::getNameForLogs() const
+{
+ assertNotEmpty();
+ return (database_name.empty() ? "" : backQuoteIfNeed(database_name) + ".") + backQuoteIfNeed(table_name)
+ (hasUUID() ? " (" + toString(uuid) + ")" : "");
-}
-
-bool StorageID::operator<(const StorageID & rhs) const
-{
- assertNotEmpty();
- /// It's needed for ViewDependencies
- if (!hasUUID() && !rhs.hasUUID())
- /// If both IDs don't have UUID, compare them like pair of strings
- return std::tie(database_name, table_name) < std::tie(rhs.database_name, rhs.table_name);
- else if (hasUUID() && rhs.hasUUID())
- /// If both IDs have UUID, compare UUIDs and ignore database and table name
- return uuid < rhs.uuid;
- else
- /// All IDs without UUID are less, then all IDs with UUID
- return !hasUUID();
-}
-
+}
+
+bool StorageID::operator<(const StorageID & rhs) const
+{
+ assertNotEmpty();
+ /// It's needed for ViewDependencies
+ if (!hasUUID() && !rhs.hasUUID())
+ /// If both IDs don't have UUID, compare them like pair of strings
+ return std::tie(database_name, table_name) < std::tie(rhs.database_name, rhs.table_name);
+ else if (hasUUID() && rhs.hasUUID())
+ /// If both IDs have UUID, compare UUIDs and ignore database and table name
+ return uuid < rhs.uuid;
+ else
+ /// All IDs without UUID are less, then all IDs with UUID
+ return !hasUUID();
+}
+
bool StorageID::operator==(const StorageID & rhs) const
{
assertNotEmpty();
@@ -88,15 +88,15 @@ bool StorageID::operator==(const StorageID & rhs) const
return std::tie(database_name, table_name) == std::tie(rhs.database_name, rhs.table_name);
}
-String StorageID::getFullTableName() const
-{
- return backQuoteIfNeed(getDatabaseName()) + "." + backQuoteIfNeed(table_name);
-}
-
+String StorageID::getFullTableName() const
+{
+ return backQuoteIfNeed(getDatabaseName()) + "." + backQuoteIfNeed(table_name);
+}
+
String StorageID::getFullNameNotQuoted() const
{
return getDatabaseName() + "." + table_name;
-}
+}
StorageID StorageID::fromDictionaryConfig(const Poco::Util::AbstractConfiguration & config,
const String & config_prefix)
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h
index 974f5bd3e5..57ac1d94fb 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h
@@ -1,37 +1,37 @@
-#pragma once
-
-#include <Core/Block.h>
-#include <Storages/IStorage_fwd.h>
-
-
-namespace DB
-{
-
+#pragma once
+
+#include <Core/Block.h>
+#include <Storages/IStorage_fwd.h>
+
+
+namespace DB
+{
+
class QueryPlan;
-
+
class Set;
using SetPtr = std::shared_ptr<Set>;
-/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section.
-struct SubqueryForSet
-{
+/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section.
+struct SubqueryForSet
+{
SubqueryForSet();
~SubqueryForSet();
SubqueryForSet(SubqueryForSet &&);
SubqueryForSet & operator= (SubqueryForSet &&);
- /// The source is obtained using the InterpreterSelectQuery subquery.
+ /// The source is obtained using the InterpreterSelectQuery subquery.
std::unique_ptr<QueryPlan> source;
-
- /// If set, build it from result.
- SetPtr set;
-
- /// If set, put the result into the table.
- /// This is a temporary table for transferring to remote servers for distributed query processing.
- StoragePtr table;
-};
-
-/// ID of subquery -> what to do with it.
-using SubqueriesForSets = std::unordered_map<String, SubqueryForSet>;
-
-}
+
+ /// If set, build it from result.
+ SetPtr set;
+
+ /// If set, put the result into the table.
+ /// This is a temporary table for transferring to remote servers for distributed query processing.
+ StoragePtr table;
+};
+
+/// ID of subquery -> what to do with it.
+using SubqueriesForSets = std::unordered_map<String, SubqueryForSet>;
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp
index 64851f1cb2..cbeb8a9407 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.cpp
@@ -1,115 +1,115 @@
-#include <Interpreters/TablesStatus.h>
-#include <IO/ReadBuffer.h>
-#include <IO/WriteBuffer.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int TOO_LARGE_ARRAY_SIZE;
- extern const int LOGICAL_ERROR;
-}
-
-void TableStatus::write(WriteBuffer & out) const
-{
- writeBinary(is_replicated, out);
- if (is_replicated)
- {
- writeVarUInt(absolute_delay, out);
- }
-}
-
-void TableStatus::read(ReadBuffer & in)
-{
- absolute_delay = 0;
- readBinary(is_replicated, in);
- if (is_replicated)
- {
- readVarUInt(absolute_delay, in);
- }
-}
-
-void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const
-{
- if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
- throw Exception(
- "Logical error: method TablesStatusRequest::write is called for unsupported server revision",
- ErrorCodes::LOGICAL_ERROR);
-
- writeVarUInt(tables.size(), out);
- for (const auto & table_name : tables)
- {
- writeBinary(table_name.database, out);
- writeBinary(table_name.table, out);
- }
-}
-
-void TablesStatusRequest::read(ReadBuffer & in, UInt64 client_protocol_revision)
-{
- if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
- throw Exception(
- "method TablesStatusRequest::read is called for unsupported client revision",
- ErrorCodes::LOGICAL_ERROR);
-
- size_t size = 0;
- readVarUInt(size, in);
-
- if (size > DEFAULT_MAX_STRING_SIZE)
- throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
-
- for (size_t i = 0; i < size; ++i)
- {
- QualifiedTableName table_name;
- readBinary(table_name.database, in);
- readBinary(table_name.table, in);
- tables.emplace(std::move(table_name));
- }
-}
-
-void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revision) const
-{
- if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
- throw Exception(
- "method TablesStatusResponse::write is called for unsupported client revision",
- ErrorCodes::LOGICAL_ERROR);
-
- writeVarUInt(table_states_by_id.size(), out);
- for (const auto & kv: table_states_by_id)
- {
- const QualifiedTableName & table_name = kv.first;
- writeBinary(table_name.database, out);
- writeBinary(table_name.table, out);
-
- const TableStatus & status = kv.second;
- status.write(out);
- }
-}
-
-void TablesStatusResponse::read(ReadBuffer & in, UInt64 server_protocol_revision)
-{
- if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
- throw Exception(
- "method TablesStatusResponse::read is called for unsupported server revision",
- ErrorCodes::LOGICAL_ERROR);
-
- size_t size = 0;
- readVarUInt(size, in);
-
- if (size > DEFAULT_MAX_STRING_SIZE)
- throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
-
- for (size_t i = 0; i < size; ++i)
- {
- QualifiedTableName table_name;
- readBinary(table_name.database, in);
- readBinary(table_name.table, in);
-
- TableStatus status;
- status.read(in);
- table_states_by_id.emplace(std::move(table_name), std::move(status));
- }
-}
-
-}
+#include <Interpreters/TablesStatus.h>
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int TOO_LARGE_ARRAY_SIZE;
+ extern const int LOGICAL_ERROR;
+}
+
+void TableStatus::write(WriteBuffer & out) const
+{
+ writeBinary(is_replicated, out);
+ if (is_replicated)
+ {
+ writeVarUInt(absolute_delay, out);
+ }
+}
+
+void TableStatus::read(ReadBuffer & in)
+{
+ absolute_delay = 0;
+ readBinary(is_replicated, in);
+ if (is_replicated)
+ {
+ readVarUInt(absolute_delay, in);
+ }
+}
+
+void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const
+{
+ if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
+ throw Exception(
+ "Logical error: method TablesStatusRequest::write is called for unsupported server revision",
+ ErrorCodes::LOGICAL_ERROR);
+
+ writeVarUInt(tables.size(), out);
+ for (const auto & table_name : tables)
+ {
+ writeBinary(table_name.database, out);
+ writeBinary(table_name.table, out);
+ }
+}
+
+void TablesStatusRequest::read(ReadBuffer & in, UInt64 client_protocol_revision)
+{
+ if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
+ throw Exception(
+ "method TablesStatusRequest::read is called for unsupported client revision",
+ ErrorCodes::LOGICAL_ERROR);
+
+ size_t size = 0;
+ readVarUInt(size, in);
+
+ if (size > DEFAULT_MAX_STRING_SIZE)
+ throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ QualifiedTableName table_name;
+ readBinary(table_name.database, in);
+ readBinary(table_name.table, in);
+ tables.emplace(std::move(table_name));
+ }
+}
+
+void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revision) const
+{
+ if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
+ throw Exception(
+ "method TablesStatusResponse::write is called for unsupported client revision",
+ ErrorCodes::LOGICAL_ERROR);
+
+ writeVarUInt(table_states_by_id.size(), out);
+ for (const auto & kv: table_states_by_id)
+ {
+ const QualifiedTableName & table_name = kv.first;
+ writeBinary(table_name.database, out);
+ writeBinary(table_name.table, out);
+
+ const TableStatus & status = kv.second;
+ status.write(out);
+ }
+}
+
+void TablesStatusResponse::read(ReadBuffer & in, UInt64 server_protocol_revision)
+{
+ if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
+ throw Exception(
+ "method TablesStatusResponse::read is called for unsupported server revision",
+ ErrorCodes::LOGICAL_ERROR);
+
+ size_t size = 0;
+ readVarUInt(size, in);
+
+ if (size > DEFAULT_MAX_STRING_SIZE)
+ throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ QualifiedTableName table_name;
+ readBinary(table_name.database, in);
+ readBinary(table_name.table, in);
+
+ TableStatus status;
+ status.read(in);
+ table_states_by_id.emplace(std::move(table_name), std::move(status));
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h
index 85290f69c8..4e1c0d185a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TablesStatus.h
@@ -1,52 +1,52 @@
-#pragma once
-
-#include <unordered_set>
-#include <unordered_map>
-
+#pragma once
+
+#include <unordered_set>
+#include <unordered_map>
+
#include <common/types.h>
-#include <Core/QualifiedTableName.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-}
-
-class ReadBuffer;
-class WriteBuffer;
-
-
-/// The following are request-response messages for TablesStatus request of the client-server protocol.
-/// Client can ask for about a set of tables and the server will respond with the following information for each table:
-/// - Is the table Replicated?
-/// - If yes, replication delay for that table.
-///
-/// For nonexistent tables there will be no TableStatus entry in the response.
-
-struct TableStatus
-{
- bool is_replicated = false;
- UInt32 absolute_delay = 0;
-
- void write(WriteBuffer & out) const;
- void read(ReadBuffer & in);
-};
-
-struct TablesStatusRequest
-{
- std::unordered_set<QualifiedTableName> tables;
-
- void write(WriteBuffer & out, UInt64 server_protocol_revision) const;
- void read(ReadBuffer & in, UInt64 client_protocol_revision);
-};
-
-struct TablesStatusResponse
-{
- std::unordered_map<QualifiedTableName, TableStatus> table_states_by_id;
-
- void write(WriteBuffer & out, UInt64 client_protocol_revision) const;
- void read(ReadBuffer & in, UInt64 server_protocol_revision);
-};
-
-}
+#include <Core/QualifiedTableName.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+}
+
+class ReadBuffer;
+class WriteBuffer;
+
+
+/// The following are request-response messages for TablesStatus request of the client-server protocol.
+/// Client can ask for about a set of tables and the server will respond with the following information for each table:
+/// - Is the table Replicated?
+/// - If yes, replication delay for that table.
+///
+/// For nonexistent tables there will be no TableStatus entry in the response.
+
+struct TableStatus
+{
+ bool is_replicated = false;
+ UInt32 absolute_delay = 0;
+
+ void write(WriteBuffer & out) const;
+ void read(ReadBuffer & in);
+};
+
+struct TablesStatusRequest
+{
+ std::unordered_set<QualifiedTableName> tables;
+
+ void write(WriteBuffer & out, UInt64 server_protocol_revision) const;
+ void read(ReadBuffer & in, UInt64 client_protocol_revision);
+};
+
+struct TablesStatusResponse
+{
+ std::unordered_map<QualifiedTableName, TableStatus> table_states_by_id;
+
+ void write(WriteBuffer & out, UInt64 client_protocol_revision) const;
+ void read(ReadBuffer & in, UInt64 server_protocol_revision);
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp
index 52f9c6b6fb..c42b8572f4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp
@@ -1,28 +1,28 @@
-#include <Common/ThreadStatus.h>
-
+#include <Common/ThreadStatus.h>
+
#include <DataStreams/PushingToViewsBlockOutputStream.h>
-#include <Interpreters/Context.h>
+#include <Interpreters/Context.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/ProcessList.h>
-#include <Interpreters/QueryThreadLog.h>
+#include <Interpreters/QueryThreadLog.h>
#include <Interpreters/QueryViewsLog.h>
#include <Parsers/formatAST.h>
-#include <Common/CurrentThread.h>
-#include <Common/Exception.h>
+#include <Common/CurrentThread.h>
+#include <Common/Exception.h>
#include <Common/ProfileEvents.h>
-#include <Common/QueryProfiler.h>
+#include <Common/QueryProfiler.h>
#include <Common/SensitiveDataMasker.h>
-#include <Common/ThreadProfileEvents.h>
-#include <Common/TraceCollector.h>
-#include <common/errnoToString.h>
-
-#if defined(OS_LINUX)
-# include <Common/hasLinuxCapability.h>
-
-# include <sys/time.h>
-# include <sys/resource.h>
-#endif
-
+#include <Common/ThreadProfileEvents.h>
+#include <Common/TraceCollector.h>
+#include <common/errnoToString.h>
+
+#if defined(OS_LINUX)
+# include <Common/hasLinuxCapability.h>
+
+# include <sys/time.h>
+# include <sys/resource.h>
+#endif
+
namespace ProfileEvents
{
extern const Event SelectedRows;
@@ -30,26 +30,26 @@ extern const Event SelectedBytes;
extern const Event InsertedRows;
extern const Event InsertedBytes;
}
-
-
-/// Implement some methods of ThreadStatus and CurrentThread here to avoid extra linking dependencies in clickhouse_common_io
-/// TODO It doesn't make sense.
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int CANNOT_SET_THREAD_PRIORITY;
-}
-
+
+
+/// Implement some methods of ThreadStatus and CurrentThread here to avoid extra linking dependencies in clickhouse_common_io
+/// TODO It doesn't make sense.
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int CANNOT_SET_THREAD_PRIORITY;
+}
+
void ThreadStatus::applyQuerySettings()
{
auto query_context_ptr = query_context.lock();
assert(query_context_ptr);
const Settings & settings = query_context_ptr->getSettingsRef();
-
+
query_id = query_context_ptr->getCurrentQueryId();
initQueryProfiler();
@@ -74,21 +74,21 @@ void ThreadStatus::applyQuerySettings()
void ThreadStatus::attachQueryContext(ContextPtr query_context_)
-{
+{
query_context = query_context_;
if (global_context.expired())
global_context = query_context_->getGlobalContext();
-
- if (thread_group)
- {
- std::lock_guard lock(thread_group->mutex);
-
- thread_group->query_context = query_context;
+
+ if (thread_group)
+ {
+ std::lock_guard lock(thread_group->mutex);
+
+ thread_group->query_context = query_context;
if (thread_group->global_context.expired())
- thread_group->global_context = global_context;
- }
-
+ thread_group->global_context = global_context;
+ }
+
// Generate new span for thread manually here, because we can't depend
// on OpenTelemetrySpanHolder due to link order issues.
// FIXME why and how is this different from setupState()?
@@ -99,43 +99,43 @@ void ThreadStatus::attachQueryContext(ContextPtr query_context_)
}
applyQuerySettings();
-}
-
-void CurrentThread::defaultThreadDeleter()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->detachQuery(true, true);
-}
-
-void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_)
-{
- assertState({ThreadState::DetachedFromQuery}, __PRETTY_FUNCTION__);
-
- /// Attach or init current thread to thread group and copy useful information from it
- thread_group = thread_group_;
-
- performance_counters.setParent(&thread_group->performance_counters);
- memory_tracker.setParent(&thread_group->memory_tracker);
-
- {
- std::lock_guard lock(thread_group->mutex);
-
- /// NOTE: thread may be attached multiple times if it is reused from a thread pool.
- thread_group->thread_ids.emplace_back(thread_id);
-
- logs_queue_ptr = thread_group->logs_queue_ptr;
+}
+
+void CurrentThread::defaultThreadDeleter()
+{
+ if (unlikely(!current_thread))
+ return;
+ current_thread->detachQuery(true, true);
+}
+
+void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_)
+{
+ assertState({ThreadState::DetachedFromQuery}, __PRETTY_FUNCTION__);
+
+ /// Attach or init current thread to thread group and copy useful information from it
+ thread_group = thread_group_;
+
+ performance_counters.setParent(&thread_group->performance_counters);
+ memory_tracker.setParent(&thread_group->memory_tracker);
+
+ {
+ std::lock_guard lock(thread_group->mutex);
+
+ /// NOTE: thread may be attached multiple times if it is reused from a thread pool.
+ thread_group->thread_ids.emplace_back(thread_id);
+
+ logs_queue_ptr = thread_group->logs_queue_ptr;
fatal_error_callback = thread_group->fatal_error_callback;
- query_context = thread_group->query_context;
-
+ query_context = thread_group->query_context;
+
if (global_context.expired())
- global_context = thread_group->global_context;
- }
-
+ global_context = thread_group->global_context;
+ }
+
if (auto query_context_ptr = query_context.lock())
{
applyQuerySettings();
-
+
// Generate new span for thread manually here, because we can't depend
// on OpenTelemetrySpanHolder due to link order issues.
thread_trace_context = query_context_ptr->query_trace_context;
@@ -149,35 +149,35 @@ void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_)
thread_trace_context.trace_id = 0;
}
- initPerformanceCounters();
-
- thread_state = ThreadState::AttachedToQuery;
-}
-
-void ThreadStatus::initializeQuery()
-{
- setupState(std::make_shared<ThreadGroupStatus>());
-
- /// No need to lock on mutex here
- thread_group->memory_tracker.setDescription("(for query)");
- thread_group->master_thread_id = thread_id;
-}
-
-void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool check_detached)
-{
- if (thread_state == ThreadState::AttachedToQuery)
- {
- if (check_detached)
- throw Exception("Can't attach query to the thread, it is already attached", ErrorCodes::LOGICAL_ERROR);
- return;
- }
-
- if (!thread_group_)
- throw Exception("Attempt to attach to nullptr thread group", ErrorCodes::LOGICAL_ERROR);
-
- setupState(thread_group_);
-}
-
+ initPerformanceCounters();
+
+ thread_state = ThreadState::AttachedToQuery;
+}
+
+void ThreadStatus::initializeQuery()
+{
+ setupState(std::make_shared<ThreadGroupStatus>());
+
+ /// No need to lock on mutex here
+ thread_group->memory_tracker.setDescription("(for query)");
+ thread_group->master_thread_id = thread_id;
+}
+
+void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool check_detached)
+{
+ if (thread_state == ThreadState::AttachedToQuery)
+ {
+ if (check_detached)
+ throw Exception("Can't attach query to the thread, it is already attached", ErrorCodes::LOGICAL_ERROR);
+ return;
+ }
+
+ if (!thread_group_)
+ throw Exception("Attempt to attach to nullptr thread group", ErrorCodes::LOGICAL_ERROR);
+
+ setupState(thread_group_);
+}
+
inline UInt64 time_in_nanoseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::nanoseconds>(timepoint.time_since_epoch()).count();
@@ -194,16 +194,16 @@ inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock>
return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
}
-void ThreadStatus::initPerformanceCounters()
-{
- performance_counters_finalized = false;
-
- /// Clear stats from previous query if a new query is started
- /// TODO: make separate query_thread_performance_counters and thread_performance_counters
- performance_counters.resetCounters();
- memory_tracker.resetCounters();
- memory_tracker.setDescription("(for thread)");
-
+void ThreadStatus::initPerformanceCounters()
+{
+ performance_counters_finalized = false;
+
+ /// Clear stats from previous query if a new query is started
+ /// TODO: make separate query_thread_performance_counters and thread_performance_counters
+ performance_counters.resetCounters();
+ memory_tracker.resetCounters();
+ memory_tracker.setDescription("(for thread)");
+
// query_start_time_{microseconds, nanoseconds} are all constructed from the same time point
// to ensure that they are all equal up to the precision of a second.
const auto now = std::chrono::system_clock::now();
@@ -211,78 +211,78 @@ void ThreadStatus::initPerformanceCounters()
query_start_time_nanoseconds = time_in_nanoseconds(now);
query_start_time = time_in_seconds(now);
query_start_time_microseconds = time_in_microseconds(now);
- ++queries_started;
-
+ ++queries_started;
+
// query_start_time_nanoseconds cannot be used here since RUsageCounters expect CLOCK_MONOTONIC
*last_rusage = RUsageCounters::current();
-
+
if (auto query_context_ptr = query_context.lock())
- {
+ {
const Settings & settings = query_context_ptr->getSettingsRef();
- if (settings.metrics_perf_events_enabled)
- {
- try
- {
- current_thread_counters.initializeProfileEvents(
- settings.metrics_perf_events_list);
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
- }
- }
-
- if (!taskstats)
- {
- try
- {
- taskstats = TasksStatsCounters::create(thread_id);
- }
- catch (...)
- {
- tryLogCurrentException(log);
- }
- }
- if (taskstats)
- taskstats->reset();
-}
-
-void ThreadStatus::finalizePerformanceCounters()
-{
- if (performance_counters_finalized)
- return;
-
- performance_counters_finalized = true;
- updatePerformanceCounters();
-
- // We want to close perf file descriptors if the perf events were enabled for
- // one query. What this code does in practice is less clear -- e.g., if I run
- // 'select 1 settings metrics_perf_events_enabled = 1', I still get
- // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*.
- bool close_perf_descriptors = true;
+ if (settings.metrics_perf_events_enabled)
+ {
+ try
+ {
+ current_thread_counters.initializeProfileEvents(
+ settings.metrics_perf_events_list);
+ }
+ catch (...)
+ {
+ tryLogCurrentException(__PRETTY_FUNCTION__);
+ }
+ }
+ }
+
+ if (!taskstats)
+ {
+ try
+ {
+ taskstats = TasksStatsCounters::create(thread_id);
+ }
+ catch (...)
+ {
+ tryLogCurrentException(log);
+ }
+ }
+ if (taskstats)
+ taskstats->reset();
+}
+
+void ThreadStatus::finalizePerformanceCounters()
+{
+ if (performance_counters_finalized)
+ return;
+
+ performance_counters_finalized = true;
+ updatePerformanceCounters();
+
+ // We want to close perf file descriptors if the perf events were enabled for
+ // one query. What this code does in practice is less clear -- e.g., if I run
+ // 'select 1 settings metrics_perf_events_enabled = 1', I still get
+ // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*.
+ bool close_perf_descriptors = true;
if (auto query_context_ptr = query_context.lock())
close_perf_descriptors = !query_context_ptr->getSettingsRef().metrics_perf_events_enabled;
-
- try
- {
- current_thread_counters.finalizeProfileEvents(performance_counters);
- if (close_perf_descriptors)
- current_thread_counters.closeEventDescriptors();
- }
- catch (...)
- {
- tryLogCurrentException(log);
- }
-
- try
- {
+
+ try
+ {
+ current_thread_counters.finalizeProfileEvents(performance_counters);
+ if (close_perf_descriptors)
+ current_thread_counters.closeEventDescriptors();
+ }
+ catch (...)
+ {
+ tryLogCurrentException(log);
+ }
+
+ try
+ {
auto global_context_ptr = global_context.lock();
auto query_context_ptr = query_context.lock();
if (global_context_ptr && query_context_ptr)
- {
+ {
const auto & settings = query_context_ptr->getSettingsRef();
- if (settings.log_queries && settings.log_query_threads)
+ if (settings.log_queries && settings.log_query_threads)
{
const auto now = std::chrono::system_clock::now();
Int64 query_duration_ms = (time_in_microseconds(now) - query_start_time_microseconds) / 1000;
@@ -292,14 +292,14 @@ void ThreadStatus::finalizePerformanceCounters()
logToQueryThreadLog(*thread_log, query_context_ptr->getCurrentDatabase(), now);
}
}
- }
- }
- catch (...)
- {
- tryLogCurrentException(log);
- }
-}
-
+ }
+ }
+ catch (...)
+ {
+ tryLogCurrentException(log);
+ }
+}
+
void ThreadStatus::resetPerformanceCountersLastUsage()
{
*last_rusage = RUsageCounters::current();
@@ -307,55 +307,55 @@ void ThreadStatus::resetPerformanceCountersLastUsage()
taskstats->reset();
}
-void ThreadStatus::initQueryProfiler()
-{
+void ThreadStatus::initQueryProfiler()
+{
if (!query_profiled_enabled)
return;
- /// query profilers are useless without trace collector
+ /// query profilers are useless without trace collector
auto global_context_ptr = global_context.lock();
if (!global_context_ptr || !global_context_ptr->hasTraceCollector())
- return;
-
+ return;
+
auto query_context_ptr = query_context.lock();
assert(query_context_ptr);
const auto & settings = query_context_ptr->getSettingsRef();
-
- try
- {
- if (settings.query_profiler_real_time_period_ns > 0)
- query_profiler_real = std::make_unique<QueryProfilerReal>(thread_id,
- /* period */ static_cast<UInt32>(settings.query_profiler_real_time_period_ns));
-
- if (settings.query_profiler_cpu_time_period_ns > 0)
+
+ try
+ {
+ if (settings.query_profiler_real_time_period_ns > 0)
+ query_profiler_real = std::make_unique<QueryProfilerReal>(thread_id,
+ /* period */ static_cast<UInt32>(settings.query_profiler_real_time_period_ns));
+
+ if (settings.query_profiler_cpu_time_period_ns > 0)
query_profiler_cpu = std::make_unique<QueryProfilerCPU>(thread_id,
- /* period */ static_cast<UInt32>(settings.query_profiler_cpu_time_period_ns));
- }
- catch (...)
- {
- /// QueryProfiler is optional.
- tryLogCurrentException("ThreadStatus", "Cannot initialize QueryProfiler");
- }
-}
-
-void ThreadStatus::finalizeQueryProfiler()
-{
- query_profiler_real.reset();
- query_profiler_cpu.reset();
-}
-
-void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
-{
+ /* period */ static_cast<UInt32>(settings.query_profiler_cpu_time_period_ns));
+ }
+ catch (...)
+ {
+ /// QueryProfiler is optional.
+ tryLogCurrentException("ThreadStatus", "Cannot initialize QueryProfiler");
+ }
+}
+
+void ThreadStatus::finalizeQueryProfiler()
+{
+ query_profiler_real.reset();
+ query_profiler_cpu.reset();
+}
+
+void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
+{
MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
- if (exit_if_already_detached && thread_state == ThreadState::DetachedFromQuery)
- {
- thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery;
- return;
- }
-
- assertState({ThreadState::AttachedToQuery}, __PRETTY_FUNCTION__);
-
+ if (exit_if_already_detached && thread_state == ThreadState::DetachedFromQuery)
+ {
+ thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery;
+ return;
+ }
+
+ assertState({ThreadState::AttachedToQuery}, __PRETTY_FUNCTION__);
+
std::shared_ptr<OpenTelemetrySpanLog> opentelemetry_span_log;
auto query_context_ptr = query_context.lock();
if (thread_trace_context.trace_id != UUID() && query_context_ptr)
@@ -393,41 +393,41 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
opentelemetry_span_log->add(span);
}
- finalizeQueryProfiler();
- finalizePerformanceCounters();
-
- /// Detach from thread group
- performance_counters.setParent(&ProfileEvents::global_counters);
- memory_tracker.reset();
-
+ finalizeQueryProfiler();
+ finalizePerformanceCounters();
+
+ /// Detach from thread group
+ performance_counters.setParent(&ProfileEvents::global_counters);
+ memory_tracker.reset();
+
/// Must reset pointer to thread_group's memory_tracker, because it will be destroyed two lines below (will reset to its parent).
memory_tracker.setParent(thread_group->memory_tracker.getParent());
-
- query_id.clear();
+
+ query_id.clear();
query_context.reset();
thread_trace_context.trace_id = 0;
thread_trace_context.span_id = 0;
- thread_group.reset();
-
- thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery;
-
-#if defined(__linux__)
- if (os_thread_priority)
- {
- LOG_TRACE(log, "Resetting nice");
-
- if (0 != setpriority(PRIO_PROCESS, thread_id, 0))
+ thread_group.reset();
+
+ thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery;
+
+#if defined(__linux__)
+ if (os_thread_priority)
+ {
+ LOG_TRACE(log, "Resetting nice");
+
+ if (0 != setpriority(PRIO_PROCESS, thread_id, 0))
LOG_ERROR(log, "Cannot 'setpriority' back to zero: {}", errnoToString(ErrorCodes::CANNOT_SET_THREAD_PRIORITY, errno));
-
- os_thread_priority = 0;
- }
-#endif
-}
-
+
+ os_thread_priority = 0;
+ }
+#endif
+}
+
void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point<std::chrono::system_clock> now)
-{
- QueryThreadLogElement elem;
-
+{
+ QueryThreadLogElement elem;
+
// construct current_time and current_time_microseconds using the same time point
// so that the two times will always be equal up to a precision of a second.
auto current_time = time_in_seconds(now);
@@ -435,49 +435,49 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String
elem.event_time = current_time;
elem.event_time_microseconds = current_time_microseconds;
- elem.query_start_time = query_start_time;
+ elem.query_start_time = query_start_time;
elem.query_start_time_microseconds = query_start_time_microseconds;
elem.query_duration_ms = (time_in_nanoseconds(now) - query_start_time_nanoseconds) / 1000000U;
-
- elem.read_rows = progress_in.read_rows.load(std::memory_order_relaxed);
- elem.read_bytes = progress_in.read_bytes.load(std::memory_order_relaxed);
-
- /// TODO: Use written_rows and written_bytes when run time progress is implemented
- elem.written_rows = progress_out.read_rows.load(std::memory_order_relaxed);
- elem.written_bytes = progress_out.read_bytes.load(std::memory_order_relaxed);
- elem.memory_usage = memory_tracker.get();
- elem.peak_memory_usage = memory_tracker.getPeak();
-
- elem.thread_name = getThreadName();
- elem.thread_id = thread_id;
-
+
+ elem.read_rows = progress_in.read_rows.load(std::memory_order_relaxed);
+ elem.read_bytes = progress_in.read_bytes.load(std::memory_order_relaxed);
+
+ /// TODO: Use written_rows and written_bytes when run time progress is implemented
+ elem.written_rows = progress_out.read_rows.load(std::memory_order_relaxed);
+ elem.written_bytes = progress_out.read_bytes.load(std::memory_order_relaxed);
+ elem.memory_usage = memory_tracker.get();
+ elem.peak_memory_usage = memory_tracker.getPeak();
+
+ elem.thread_name = getThreadName();
+ elem.thread_id = thread_id;
+
elem.current_database = current_database;
- if (thread_group)
- {
- {
- std::lock_guard lock(thread_group->mutex);
-
- elem.master_thread_id = thread_group->master_thread_id;
- elem.query = thread_group->query;
+ if (thread_group)
+ {
+ {
+ std::lock_guard lock(thread_group->mutex);
+
+ elem.master_thread_id = thread_group->master_thread_id;
+ elem.query = thread_group->query;
elem.normalized_query_hash = thread_group->normalized_query_hash;
- }
- }
-
+ }
+ }
+
auto query_context_ptr = query_context.lock();
if (query_context_ptr)
- {
+ {
elem.client_info = query_context_ptr->getClientInfo();
-
+
if (query_context_ptr->getSettingsRef().log_profile_events != 0)
- {
- /// NOTE: Here we are in the same thread, so we can make memcpy()
- elem.profile_counters = std::make_shared<ProfileEvents::Counters>(performance_counters.getPartiallyAtomicSnapshot());
- }
- }
-
- thread_log.add(elem);
-}
-
+ {
+ /// NOTE: Here we are in the same thread, so we can make memcpy()
+ elem.profile_counters = std::make_shared<ProfileEvents::Counters>(performance_counters.getPartiallyAtomicSnapshot());
+ }
+ }
+
+ thread_log.add(elem);
+}
+
static String getCleanQueryAst(const ASTPtr q, ContextPtr context)
{
String res = serializeAST(*q, true);
@@ -536,90 +536,90 @@ void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo)
views_log->add(element);
}
-void CurrentThread::initializeQuery()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->initializeQuery();
- current_thread->deleter = CurrentThread::defaultThreadDeleter;
-}
-
-void CurrentThread::attachTo(const ThreadGroupStatusPtr & thread_group)
-{
- if (unlikely(!current_thread))
- return;
- current_thread->attachQuery(thread_group, true);
- current_thread->deleter = CurrentThread::defaultThreadDeleter;
-}
-
-void CurrentThread::attachToIfDetached(const ThreadGroupStatusPtr & thread_group)
-{
- if (unlikely(!current_thread))
- return;
- current_thread->attachQuery(thread_group, false);
- current_thread->deleter = CurrentThread::defaultThreadDeleter;
-}
-
+void CurrentThread::initializeQuery()
+{
+ if (unlikely(!current_thread))
+ return;
+ current_thread->initializeQuery();
+ current_thread->deleter = CurrentThread::defaultThreadDeleter;
+}
+
+void CurrentThread::attachTo(const ThreadGroupStatusPtr & thread_group)
+{
+ if (unlikely(!current_thread))
+ return;
+ current_thread->attachQuery(thread_group, true);
+ current_thread->deleter = CurrentThread::defaultThreadDeleter;
+}
+
+void CurrentThread::attachToIfDetached(const ThreadGroupStatusPtr & thread_group)
+{
+ if (unlikely(!current_thread))
+ return;
+ current_thread->attachQuery(thread_group, false);
+ current_thread->deleter = CurrentThread::defaultThreadDeleter;
+}
+
void CurrentThread::attachQueryContext(ContextPtr query_context)
-{
- if (unlikely(!current_thread))
- return;
- current_thread->attachQueryContext(query_context);
-}
-
-void CurrentThread::finalizePerformanceCounters()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->finalizePerformanceCounters();
-}
-
-void CurrentThread::detachQuery()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->detachQuery(false);
-}
-
-void CurrentThread::detachQueryIfNotDetached()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->detachQuery(true);
-}
-
-
+{
+ if (unlikely(!current_thread))
+ return;
+ current_thread->attachQueryContext(query_context);
+}
+
+void CurrentThread::finalizePerformanceCounters()
+{
+ if (unlikely(!current_thread))
+ return;
+ current_thread->finalizePerformanceCounters();
+}
+
+void CurrentThread::detachQuery()
+{
+ if (unlikely(!current_thread))
+ return;
+ current_thread->detachQuery(false);
+}
+
+void CurrentThread::detachQueryIfNotDetached()
+{
+ if (unlikely(!current_thread))
+ return;
+ current_thread->detachQuery(true);
+}
+
+
CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context)
-{
- CurrentThread::initializeQuery();
- CurrentThread::attachQueryContext(query_context);
+{
+ CurrentThread::initializeQuery();
+ CurrentThread::attachQueryContext(query_context);
if (!query_context->hasQueryContext())
query_context->makeQueryContext();
-}
-
-void CurrentThread::QueryScope::logPeakMemoryUsage()
-{
- auto group = CurrentThread::getGroup();
- if (!group)
- return;
-
- log_peak_memory_usage_in_destructor = false;
- group->memory_tracker.logPeakMemoryUsage();
-}
-
-CurrentThread::QueryScope::~QueryScope()
-{
- try
- {
- if (log_peak_memory_usage_in_destructor)
- logPeakMemoryUsage();
-
- CurrentThread::detachQueryIfNotDetached();
- }
- catch (...)
- {
- tryLogCurrentException("CurrentThread", __PRETTY_FUNCTION__);
- }
-}
-
-}
+}
+
+void CurrentThread::QueryScope::logPeakMemoryUsage()
+{
+ auto group = CurrentThread::getGroup();
+ if (!group)
+ return;
+
+ log_peak_memory_usage_in_destructor = false;
+ group->memory_tracker.logPeakMemoryUsage();
+}
+
+CurrentThread::QueryScope::~QueryScope()
+{
+ try
+ {
+ if (log_peak_memory_usage_in_destructor)
+ logPeakMemoryUsage();
+
+ CurrentThread::detachQueryIfNotDetached();
+ }
+ catch (...)
+ {
+ tryLogCurrentException("CurrentThread", __PRETTY_FUNCTION__);
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h
index 0dca00c285..f3ad109c16 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/TreeRewriter.h
@@ -1,63 +1,63 @@
-#pragma once
-
-#include <Core/Block.h>
-#include <Core/NamesAndTypes.h>
-#include <Interpreters/Aliases.h>
+#pragma once
+
+#include <Core/Block.h>
+#include <Core/NamesAndTypes.h>
+#include <Interpreters/Aliases.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
-#include <Interpreters/SelectQueryOptions.h>
-#include <Storages/IStorage_fwd.h>
-
-namespace DB
-{
-
-class ASTFunction;
-struct ASTTablesInSelectQueryElement;
-class TableJoin;
-struct Settings;
-struct SelectQueryOptions;
-using Scalars = std::map<String, Block>;
+#include <Interpreters/SelectQueryOptions.h>
+#include <Storages/IStorage_fwd.h>
+
+namespace DB
+{
+
+class ASTFunction;
+struct ASTTablesInSelectQueryElement;
+class TableJoin;
+struct Settings;
+struct SelectQueryOptions;
+using Scalars = std::map<String, Block>;
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
-
+
struct TreeRewriterResult
-{
- ConstStoragePtr storage;
+{
+ ConstStoragePtr storage;
StorageMetadataPtr metadata_snapshot;
- std::shared_ptr<TableJoin> analyzed_join;
- const ASTTablesInSelectQueryElement * ast_join = nullptr;
-
- NamesAndTypesList source_columns;
- NameSet source_columns_set; /// Set of names of source_columns.
- /// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns.
- NamesAndTypesList required_source_columns;
+ std::shared_ptr<TableJoin> analyzed_join;
+ const ASTTablesInSelectQueryElement * ast_join = nullptr;
+
+ NamesAndTypesList source_columns;
+ NameSet source_columns_set; /// Set of names of source_columns.
+ /// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns.
+ NamesAndTypesList required_source_columns;
/// Same as above but also record alias columns which are expanded. This is for RBAC access check.
Names required_source_columns_before_expanding_alias_columns;
-
+
/// Set of alias columns that are expanded to their alias expressions. We still need the original columns to check access permission.
NameSet expanded_aliases;
- Aliases aliases;
- std::vector<const ASTFunction *> aggregates;
-
+ Aliases aliases;
+ std::vector<const ASTFunction *> aggregates;
+
std::vector<const ASTFunction *> window_function_asts;
- /// Which column is needed to be ARRAY-JOIN'ed to get the specified.
- /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".
- NameToNameMap array_join_result_to_source;
-
- /// For the ARRAY JOIN section, mapping from the alias to the full column name.
- /// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here.
- /// Note: not used further.
- NameToNameMap array_join_alias_to_name;
-
- /// The backward mapping for array_join_alias_to_name.
- /// Note: not used further.
- NameToNameMap array_join_name_to_alias;
-
- /// Predicate optimizer overrides the sub queries
- bool rewrite_subqueries = false;
-
+ /// Which column is needed to be ARRAY-JOIN'ed to get the specified.
+ /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".
+ NameToNameMap array_join_result_to_source;
+
+ /// For the ARRAY JOIN section, mapping from the alias to the full column name.
+ /// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here.
+ /// Note: not used further.
+ NameToNameMap array_join_alias_to_name;
+
+ /// The backward mapping for array_join_alias_to_name.
+ /// Note: not used further.
+ NameToNameMap array_join_name_to_alias;
+
+ /// Predicate optimizer overrides the sub queries
+ bool rewrite_subqueries = false;
+
/// Whether the query contains explicit columns like "SELECT column1 + column2 FROM table1".
/// Queries like "SELECT count() FROM table1", "SELECT 1" don't contain explicit columns.
bool has_explicit_columns = false;
@@ -65,46 +65,46 @@ struct TreeRewriterResult
/// Whether it's possible to use the trivial count optimization,
/// i.e. use a fast call of IStorage::totalRows() (or IStorage::totalRowsByPartitionPredicate())
/// instead of actual retrieving columns and counting rows.
- bool optimize_trivial_count = false;
-
+ bool optimize_trivial_count = false;
+
/// Cache isRemote() call for storage, because it may be too heavy.
bool is_remote_storage = false;
- /// Results of scalar sub queries
- Scalars scalars;
-
+ /// Results of scalar sub queries
+ Scalars scalars;
+
TreeRewriterResult(
const NamesAndTypesList & source_columns_,
ConstStoragePtr storage_ = {},
const StorageMetadataPtr & metadata_snapshot_ = {},
bool add_special = true);
-
- void collectSourceColumns(bool add_special);
- void collectUsedColumns(const ASTPtr & query, bool is_select);
- Names requiredSourceColumns() const { return required_source_columns.getNames(); }
+
+ void collectSourceColumns(bool add_special);
+ void collectUsedColumns(const ASTPtr & query, bool is_select);
+ Names requiredSourceColumns() const { return required_source_columns.getNames(); }
const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; }
NameSet getArrayJoinSourceNameSet() const;
Names getExpandedAliases() const { return {expanded_aliases.begin(), expanded_aliases.end()}; }
- const Scalars & getScalars() const { return scalars; }
-};
-
+ const Scalars & getScalars() const { return scalars; }
+};
+
using TreeRewriterResultPtr = std::shared_ptr<const TreeRewriterResult>;
-
+
/// Tree Rewriter in terms of CMU slides @sa https://15721.courses.cs.cmu.edu/spring2020/slides/19-optimizer1.pdf
///
/// Optimises AST tree and collect information for further expression analysis in ExpressionAnalyzer.
-/// Result AST has the following invariants:
-/// * all aliases are substituted
-/// * qualified names are translated
-/// * scalar subqueries are executed replaced with constants
-/// * unneeded columns are removed from SELECT clause
-/// * duplicated columns are removed from ORDER BY, LIMIT BY, USING(...).
+/// Result AST has the following invariants:
+/// * all aliases are substituted
+/// * qualified names are translated
+/// * scalar subqueries are executed replaced with constants
+/// * unneeded columns are removed from SELECT clause
+/// * duplicated columns are removed from ORDER BY, LIMIT BY, USING(...).
class TreeRewriter : WithContext
-{
-public:
+{
+public:
explicit TreeRewriter(ContextPtr context_) : WithContext(context_) {}
-
- /// Analyze and rewrite not select query
+
+ /// Analyze and rewrite not select query
TreeRewriterResultPtr analyze(
ASTPtr & query,
const NamesAndTypesList & source_columns_,
@@ -112,18 +112,18 @@ public:
const StorageMetadataPtr & metadata_snapshot = {},
bool allow_aggregations = false,
bool allow_self_aliases = true) const;
-
- /// Analyze and rewrite select query
+
+ /// Analyze and rewrite select query
TreeRewriterResultPtr analyzeSelect(
- ASTPtr & query,
+ ASTPtr & query,
TreeRewriterResult && result,
- const SelectQueryOptions & select_options = {},
- const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns = {},
- const Names & required_result_columns = {},
- std::shared_ptr<TableJoin> table_join = {}) const;
-
-private:
+ const SelectQueryOptions & select_options = {},
+ const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns = {},
+ const Names & required_result_columns = {},
+ std::shared_ptr<TableJoin> table_join = {}) const;
+
+private:
static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases);
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h
index ec2e1d3bd5..068f124d2f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h
@@ -1,43 +1,43 @@
-#pragma once
-
+#pragma once
+
#include <Columns/ColumnsNumber.h>
#include <Core/Block.h>
-#include <Interpreters/IJoin.h>
+#include <Interpreters/IJoin.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ExpressionActions.h>
-
-namespace DB
-{
-
-struct ColumnWithTypeAndName;
+
+namespace DB
+{
+
+struct ColumnWithTypeAndName;
class TableJoin;
-class IColumn;
-using ColumnRawPtrs = std::vector<const IColumn *>;
+class IColumn;
+using ColumnRawPtrs = std::vector<const IColumn *>;
using UInt8ColumnDataPtr = const ColumnUInt8::Container *;
-
-namespace JoinCommon
-{
+
+namespace JoinCommon
+{
bool canBecomeNullable(const DataTypePtr & type);
DataTypePtr convertTypeToNullable(const DataTypePtr & type);
void convertColumnToNullable(ColumnWithTypeAndName & column);
-void convertColumnsToNullable(Block & block, size_t starting_pos = 0);
-void removeColumnNullability(ColumnWithTypeAndName & column);
+void convertColumnsToNullable(Block & block, size_t starting_pos = 0);
+void removeColumnNullability(ColumnWithTypeAndName & column);
void changeColumnRepresentation(const ColumnPtr & src_column, ColumnPtr & dst_column);
ColumnPtr emptyNotNullableClone(const ColumnPtr & column);
ColumnPtr materializeColumn(const Block & block, const String & name);
-Columns materializeColumns(const Block & block, const Names & names);
-ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names);
-ColumnRawPtrs getRawPointers(const Columns & columns);
-void removeLowCardinalityInplace(Block & block);
+Columns materializeColumns(const Block & block, const Names & names);
+ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names);
+ColumnRawPtrs getRawPointers(const Columns & columns);
+void removeLowCardinalityInplace(Block & block);
void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type = true);
void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys);
-
-ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right);
-
+
+ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right);
+
/// Throw an exception if join condition column is not UIint8
void checkTypesOfMasks(const Block & block_left, const String & condition_name_left,
const Block & block_right, const String & condition_name_right);
-
+
/// Throw an exception if blocks have different types of key columns . Compare up to Nullability.
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left,
const Block & block_right, const Names & key_names_right);
@@ -46,9 +46,9 @@ void checkTypesOfKeys(const Block & block_left, const Names & key_names_left,
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const String & condition_name_left,
const Block & block_right, const Names & key_names_right, const String & condition_name_right);
-void createMissedColumns(Block & block);
+void createMissedColumns(Block & block);
void joinTotals(Block left_totals, Block right_totals, const TableJoin & table_join, Block & out_block);
-
+
void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count);
bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type);
@@ -61,8 +61,8 @@ void splitAdditionalColumns(const Names & key_names, const Block & sample_block,
void changeLowCardinalityInplace(ColumnWithTypeAndName & column);
-}
-
+}
+
/// Creates result from right table data in RIGHT and FULL JOIN when keys are not present in left table.
class NotJoinedBlocks final
{
@@ -118,4 +118,4 @@ private:
void setRightIndex(size_t right_pos, size_t result_position);
};
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp
index 4b70f25f5c..8864103184 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.cpp
@@ -1,114 +1,114 @@
-#include <Parsers/ASTAlterQuery.h>
+#include <Parsers/ASTAlterQuery.h>
#include <IO/Operators.h>
-#include <iomanip>
-#include <Common/quoteString.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int UNEXPECTED_AST_STRUCTURE;
-}
-
-ASTPtr ASTAlterCommand::clone() const
-{
- auto res = std::make_shared<ASTAlterCommand>(*this);
- res->children.clear();
-
- if (col_decl)
- {
- res->col_decl = col_decl->clone();
- res->children.push_back(res->col_decl);
- }
- if (column)
- {
- res->column = column->clone();
- res->children.push_back(res->column);
- }
- if (order_by)
- {
- res->order_by = order_by->clone();
- res->children.push_back(res->order_by);
- }
- if (partition)
- {
- res->partition = partition->clone();
- res->children.push_back(res->partition);
- }
- if (predicate)
- {
- res->predicate = predicate->clone();
- res->children.push_back(res->predicate);
- }
- if (ttl)
- {
- res->ttl = ttl->clone();
- res->children.push_back(res->ttl);
- }
- if (settings_changes)
- {
- res->settings_changes = settings_changes->clone();
- res->children.push_back(res->settings_changes);
- }
+#include <iomanip>
+#include <Common/quoteString.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int UNEXPECTED_AST_STRUCTURE;
+}
+
+ASTPtr ASTAlterCommand::clone() const
+{
+ auto res = std::make_shared<ASTAlterCommand>(*this);
+ res->children.clear();
+
+ if (col_decl)
+ {
+ res->col_decl = col_decl->clone();
+ res->children.push_back(res->col_decl);
+ }
+ if (column)
+ {
+ res->column = column->clone();
+ res->children.push_back(res->column);
+ }
+ if (order_by)
+ {
+ res->order_by = order_by->clone();
+ res->children.push_back(res->order_by);
+ }
+ if (partition)
+ {
+ res->partition = partition->clone();
+ res->children.push_back(res->partition);
+ }
+ if (predicate)
+ {
+ res->predicate = predicate->clone();
+ res->children.push_back(res->predicate);
+ }
+ if (ttl)
+ {
+ res->ttl = ttl->clone();
+ res->children.push_back(res->ttl);
+ }
+ if (settings_changes)
+ {
+ res->settings_changes = settings_changes->clone();
+ res->children.push_back(res->settings_changes);
+ }
if (settings_resets)
{
res->settings_resets = settings_resets->clone();
res->children.push_back(res->settings_resets);
}
- if (values)
- {
- res->values = values->clone();
- res->children.push_back(res->values);
- }
- if (rename_to)
- {
- res->rename_to = rename_to->clone();
- res->children.push_back(res->rename_to);
- }
-
- return res;
-}
-
-void ASTAlterCommand::formatImpl(
- const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
-{
- std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
-
- if (type == ASTAlterCommand::ADD_COLUMN)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
- col_decl->formatImpl(settings, state, frame);
-
- if (first)
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : "");
- else if (column) /// AFTER
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
- column->formatImpl(settings, state, frame);
- }
- }
- else if (type == ASTAlterCommand::DROP_COLUMN)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
- << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
- column->formatImpl(settings, state, frame);
- if (partition)
- {
+ if (values)
+ {
+ res->values = values->clone();
+ res->children.push_back(res->values);
+ }
+ if (rename_to)
+ {
+ res->rename_to = rename_to->clone();
+ res->children.push_back(res->rename_to);
+ }
+
+ return res;
+}
+
+void ASTAlterCommand::formatImpl(
+ const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+ std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
+
+ if (type == ASTAlterCommand::ADD_COLUMN)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
+ col_decl->formatImpl(settings, state, frame);
+
+ if (first)
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : "");
+ else if (column) /// AFTER
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
+ column->formatImpl(settings, state, frame);
+ }
+ }
+ else if (type == ASTAlterCommand::DROP_COLUMN)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
+ << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
+ column->formatImpl(settings, state, frame);
+ if (partition)
+ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
- }
- else if (type == ASTAlterCommand::MODIFY_COLUMN)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
- col_decl->formatImpl(settings, state, frame);
-
+ partition->formatImpl(settings, state, frame);
+ }
+ }
+ else if (type == ASTAlterCommand::MODIFY_COLUMN)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
+ col_decl->formatImpl(settings, state, frame);
+
if (!remove_property.empty())
- {
+ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE " << remove_property;
- }
+ }
else
{
if (first)
@@ -119,7 +119,7 @@ void ASTAlterCommand::formatImpl(
column->formatImpl(settings, state, frame);
}
}
- }
+ }
else if (type == ASTAlterCommand::MATERIALIZE_COLUMN)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
@@ -131,69 +131,69 @@ void ASTAlterCommand::formatImpl(
partition->formatImpl(settings, state, frame);
}
}
- else if (type == ASTAlterCommand::COMMENT_COLUMN)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
- column->formatImpl(settings, state, frame);
- settings.ostr << " " << (settings.hilite ? hilite_none : "");
- comment->formatImpl(settings, state, frame);
- }
- else if (type == ASTAlterCommand::MODIFY_ORDER_BY)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : "");
- order_by->formatImpl(settings, state, frame);
- }
+ else if (type == ASTAlterCommand::COMMENT_COLUMN)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
+ column->formatImpl(settings, state, frame);
+ settings.ostr << " " << (settings.hilite ? hilite_none : "");
+ comment->formatImpl(settings, state, frame);
+ }
+ else if (type == ASTAlterCommand::MODIFY_ORDER_BY)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : "");
+ order_by->formatImpl(settings, state, frame);
+ }
else if (type == ASTAlterCommand::MODIFY_SAMPLE_BY)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY SAMPLE BY " << (settings.hilite ? hilite_none : "");
sample_by->formatImpl(settings, state, frame);
}
- else if (type == ASTAlterCommand::ADD_INDEX)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
- index_decl->formatImpl(settings, state, frame);
-
+ else if (type == ASTAlterCommand::ADD_INDEX)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
+ index_decl->formatImpl(settings, state, frame);
+
if (first)
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : "");
else if (index) /// AFTER
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
- index->formatImpl(settings, state, frame);
- }
- }
- else if (type == ASTAlterCommand::DROP_INDEX)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
- << (clear_index ? "CLEAR " : "DROP ") << "INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
- index->formatImpl(settings, state, frame);
- if (partition)
- {
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
+ index->formatImpl(settings, state, frame);
+ }
+ }
+ else if (type == ASTAlterCommand::DROP_INDEX)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
+ << (clear_index ? "CLEAR " : "DROP ") << "INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
+ index->formatImpl(settings, state, frame);
+ if (partition)
+ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
- }
- else if (type == ASTAlterCommand::MATERIALIZE_INDEX)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
- << "MATERIALIZE INDEX " << (settings.hilite ? hilite_none : "");
- index->formatImpl(settings, state, frame);
- if (partition)
- {
+ partition->formatImpl(settings, state, frame);
+ }
+ }
+ else if (type == ASTAlterCommand::MATERIALIZE_INDEX)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
+ << "MATERIALIZE INDEX " << (settings.hilite ? hilite_none : "");
+ index->formatImpl(settings, state, frame);
+ if (partition)
+ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
- }
- else if (type == ASTAlterCommand::ADD_CONSTRAINT)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD CONSTRAINT " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
- constraint_decl->formatImpl(settings, state, frame);
- }
- else if (type == ASTAlterCommand::DROP_CONSTRAINT)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
- << "DROP CONSTRAINT " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
- constraint->formatImpl(settings, state, frame);
- }
+ partition->formatImpl(settings, state, frame);
+ }
+ }
+ else if (type == ASTAlterCommand::ADD_CONSTRAINT)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD CONSTRAINT " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
+ constraint_decl->formatImpl(settings, state, frame);
+ }
+ else if (type == ASTAlterCommand::DROP_CONSTRAINT)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
+ << "DROP CONSTRAINT " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
+ constraint->formatImpl(settings, state, frame);
+ }
else if (type == ASTAlterCommand::ADD_PROJECTION)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD PROJECTION " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
@@ -229,100 +229,100 @@ void ASTAlterCommand::formatImpl(
partition->formatImpl(settings, state, frame);
}
}
- else if (type == ASTAlterCommand::DROP_PARTITION)
- {
+ else if (type == ASTAlterCommand::DROP_PARTITION)
+ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
<< (detach ? "DETACH" : "DROP") << (part ? " PART " : " PARTITION ")
- << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
- else if (type == ASTAlterCommand::DROP_DETACHED_PARTITION)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP DETACHED" << (part ? " PART " : " PARTITION ")
- << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
- else if (type == ASTAlterCommand::ATTACH_PARTITION)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH "
- << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
- else if (type == ASTAlterCommand::MOVE_PARTITION)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MOVE "
- << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- settings.ostr << " TO ";
- switch (move_destination_type)
- {
- case DataDestinationType::DISK:
- settings.ostr << "DISK ";
- break;
- case DataDestinationType::VOLUME:
- settings.ostr << "VOLUME ";
- break;
- case DataDestinationType::TABLE:
- settings.ostr << "TABLE ";
- if (!to_database.empty())
- {
- settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(to_database)
- << (settings.hilite ? hilite_none : "") << ".";
- }
- settings.ostr << (settings.hilite ? hilite_identifier : "")
- << backQuoteIfNeed(to_table)
- << (settings.hilite ? hilite_none : "");
- return;
- default:
- break;
- }
- if (move_destination_type != DataDestinationType::TABLE)
- {
- settings.ostr << quoteString(move_destination_name);
- }
- }
- else if (type == ASTAlterCommand::REPLACE_PARTITION)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (replace ? "REPLACE" : "ATTACH") << " PARTITION "
- << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "");
- if (!from_database.empty())
- {
- settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_database)
- << (settings.hilite ? hilite_none : "") << ".";
- }
- settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_table) << (settings.hilite ? hilite_none : "");
- }
- else if (type == ASTAlterCommand::FETCH_PARTITION)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH "
+ << (settings.hilite ? hilite_none : "");
+ partition->formatImpl(settings, state, frame);
+ }
+ else if (type == ASTAlterCommand::DROP_DETACHED_PARTITION)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP DETACHED" << (part ? " PART " : " PARTITION ")
+ << (settings.hilite ? hilite_none : "");
+ partition->formatImpl(settings, state, frame);
+ }
+ else if (type == ASTAlterCommand::ATTACH_PARTITION)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH "
+ << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : "");
+ partition->formatImpl(settings, state, frame);
+ }
+ else if (type == ASTAlterCommand::MOVE_PARTITION)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MOVE "
+ << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : "");
+ partition->formatImpl(settings, state, frame);
+ settings.ostr << " TO ";
+ switch (move_destination_type)
+ {
+ case DataDestinationType::DISK:
+ settings.ostr << "DISK ";
+ break;
+ case DataDestinationType::VOLUME:
+ settings.ostr << "VOLUME ";
+ break;
+ case DataDestinationType::TABLE:
+ settings.ostr << "TABLE ";
+ if (!to_database.empty())
+ {
+ settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(to_database)
+ << (settings.hilite ? hilite_none : "") << ".";
+ }
+ settings.ostr << (settings.hilite ? hilite_identifier : "")
+ << backQuoteIfNeed(to_table)
+ << (settings.hilite ? hilite_none : "");
+ return;
+ default:
+ break;
+ }
+ if (move_destination_type != DataDestinationType::TABLE)
+ {
+ settings.ostr << quoteString(move_destination_name);
+ }
+ }
+ else if (type == ASTAlterCommand::REPLACE_PARTITION)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (replace ? "REPLACE" : "ATTACH") << " PARTITION "
+ << (settings.hilite ? hilite_none : "");
+ partition->formatImpl(settings, state, frame);
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "");
+ if (!from_database.empty())
+ {
+ settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_database)
+ << (settings.hilite ? hilite_none : "") << ".";
+ }
+ settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_table) << (settings.hilite ? hilite_none : "");
+ }
+ else if (type == ASTAlterCommand::FETCH_PARTITION)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH "
<< (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- settings.ostr << (settings.hilite ? hilite_keyword : "")
+ partition->formatImpl(settings, state, frame);
+ settings.ostr << (settings.hilite ? hilite_keyword : "")
<< " FROM " << (settings.hilite ? hilite_none : "") << DB::quote << from;
- }
- else if (type == ASTAlterCommand::FREEZE_PARTITION)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
-
- if (!with_name.empty())
- {
- settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
+ }
+ else if (type == ASTAlterCommand::FREEZE_PARTITION)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (settings.hilite ? hilite_none : "");
+ partition->formatImpl(settings, state, frame);
+
+ if (!with_name.empty())
+ {
+ settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
<< " " << DB::quote << with_name;
- }
- }
- else if (type == ASTAlterCommand::FREEZE_ALL)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE";
-
- if (!with_name.empty())
- {
- settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
+ }
+ }
+ else if (type == ASTAlterCommand::FREEZE_ALL)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE";
+
+ if (!with_name.empty())
+ {
+ settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
<< " " << DB::quote << with_name;
- }
- }
+ }
+ }
else if (type == ASTAlterCommand::UNFREEZE_PARTITION)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UNFREEZE PARTITION " << (settings.hilite ? hilite_none : "");
@@ -344,8 +344,8 @@ void ASTAlterCommand::formatImpl(
<< " " << DB::quote << with_name;
}
}
- else if (type == ASTAlterCommand::DELETE)
- {
+ else if (type == ASTAlterCommand::DELETE)
+ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE" << (settings.hilite ? hilite_none : "");
if (partition)
@@ -355,73 +355,73 @@ void ASTAlterCommand::formatImpl(
}
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
- predicate->formatImpl(settings, state, frame);
- }
- else if (type == ASTAlterCommand::UPDATE)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UPDATE " << (settings.hilite ? hilite_none : "");
- update_assignments->formatImpl(settings, state, frame);
-
+ predicate->formatImpl(settings, state, frame);
+ }
+ else if (type == ASTAlterCommand::UPDATE)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UPDATE " << (settings.hilite ? hilite_none : "");
+ update_assignments->formatImpl(settings, state, frame);
+
if (partition)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
}
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
- predicate->formatImpl(settings, state, frame);
- }
- else if (type == ASTAlterCommand::MODIFY_TTL)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : "");
- ttl->formatImpl(settings, state, frame);
- }
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
+ predicate->formatImpl(settings, state, frame);
+ }
+ else if (type == ASTAlterCommand::MODIFY_TTL)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : "");
+ ttl->formatImpl(settings, state, frame);
+ }
else if (type == ASTAlterCommand::REMOVE_TTL)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REMOVE TTL" << (settings.hilite ? hilite_none : "");
}
- else if (type == ASTAlterCommand::MATERIALIZE_TTL)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MATERIALIZE TTL"
- << (settings.hilite ? hilite_none : "");
- if (partition)
- {
+ else if (type == ASTAlterCommand::MATERIALIZE_TTL)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MATERIALIZE TTL"
+ << (settings.hilite ? hilite_none : "");
+ if (partition)
+ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
- }
- else if (type == ASTAlterCommand::MODIFY_SETTING)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY SETTING " << (settings.hilite ? hilite_none : "");
- settings_changes->formatImpl(settings, state, frame);
- }
+ partition->formatImpl(settings, state, frame);
+ }
+ }
+ else if (type == ASTAlterCommand::MODIFY_SETTING)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY SETTING " << (settings.hilite ? hilite_none : "");
+ settings_changes->formatImpl(settings, state, frame);
+ }
else if (type == ASTAlterCommand::RESET_SETTING)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "RESET SETTING " << (settings.hilite ? hilite_none : "");
settings_resets->formatImpl(settings, state, frame);
}
- else if (type == ASTAlterCommand::MODIFY_QUERY)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY QUERY " << settings.nl_or_ws << (settings.hilite ? hilite_none : "");
- select->formatImpl(settings, state, frame);
- }
- else if (type == ASTAlterCommand::LIVE_VIEW_REFRESH)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REFRESH " << (settings.hilite ? hilite_none : "");
- }
- else if (type == ASTAlterCommand::RENAME_COLUMN)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "RENAME COLUMN " << (if_exists ? "IF EXISTS " : "")
- << (settings.hilite ? hilite_none : "");
- column->formatImpl(settings, state, frame);
-
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO ";
- rename_to->formatImpl(settings, state, frame);
- }
- else
- throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
-}
-
+ else if (type == ASTAlterCommand::MODIFY_QUERY)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY QUERY " << settings.nl_or_ws << (settings.hilite ? hilite_none : "");
+ select->formatImpl(settings, state, frame);
+ }
+ else if (type == ASTAlterCommand::LIVE_VIEW_REFRESH)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REFRESH " << (settings.hilite ? hilite_none : "");
+ }
+ else if (type == ASTAlterCommand::RENAME_COLUMN)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "RENAME COLUMN " << (if_exists ? "IF EXISTS " : "")
+ << (settings.hilite ? hilite_none : "");
+ column->formatImpl(settings, state, frame);
+
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO ";
+ rename_to->formatImpl(settings, state, frame);
+ }
+ else
+ throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
+}
+
bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const
{
if (command_list)
@@ -438,7 +438,7 @@ bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) con
}
return false;
}
-
+
bool ASTAlterQuery::isSettingsAlter() const
{
return isOneCommandTypeOnly(ASTAlterCommand::MODIFY_SETTING);
@@ -450,50 +450,50 @@ bool ASTAlterQuery::isFreezeAlter() const
|| isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_ALL);
}
-/** Get the text that identifies this element. */
-String ASTAlterQuery::getID(char delim) const
-{
- return "AlterQuery" + (delim + database) + delim + table;
-}
-
-ASTPtr ASTAlterQuery::clone() const
-{
- auto res = std::make_shared<ASTAlterQuery>(*this);
- res->children.clear();
-
- if (command_list)
- res->set(res->command_list, command_list->clone());
-
- return res;
-}
-
-void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
-{
- frame.need_parens = false;
-
- std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
-
- if (is_live_view)
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER LIVE VIEW " << (settings.hilite ? hilite_none : "");
- else
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : "");
-
- if (!table.empty())
- {
- if (!database.empty())
- {
- settings.ostr << indent_str << backQuoteIfNeed(database);
- settings.ostr << ".";
- }
- settings.ostr << indent_str << backQuoteIfNeed(table);
- }
- formatOnCluster(settings);
- settings.ostr << settings.nl_or_ws;
-
- FormatStateStacked frame_nested = frame;
- frame_nested.need_parens = false;
- ++frame_nested.indent;
- static_cast<IAST *>(command_list)->formatImpl(settings, state, frame_nested);
-}
-
-}
+/** Get the text that identifies this element. */
+String ASTAlterQuery::getID(char delim) const
+{
+ return "AlterQuery" + (delim + database) + delim + table;
+}
+
+ASTPtr ASTAlterQuery::clone() const
+{
+ auto res = std::make_shared<ASTAlterQuery>(*this);
+ res->children.clear();
+
+ if (command_list)
+ res->set(res->command_list, command_list->clone());
+
+ return res;
+}
+
+void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+ frame.need_parens = false;
+
+ std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
+
+ if (is_live_view)
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER LIVE VIEW " << (settings.hilite ? hilite_none : "");
+ else
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : "");
+
+ if (!table.empty())
+ {
+ if (!database.empty())
+ {
+ settings.ostr << indent_str << backQuoteIfNeed(database);
+ settings.ostr << ".";
+ }
+ settings.ostr << indent_str << backQuoteIfNeed(table);
+ }
+ formatOnCluster(settings);
+ settings.ostr << settings.nl_or_ws;
+
+ FormatStateStacked frame_nested = frame;
+ frame_nested.need_parens = false;
+ ++frame_nested.indent;
+ static_cast<IAST *>(command_list)->formatImpl(settings, state, frame_nested);
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h
index a5410e797c..ac6ee30fe0 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTAlterQuery.h
@@ -1,118 +1,118 @@
-#pragma once
-
+#pragma once
+
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTQueryWithOnCluster.h>
-#include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Parsers/ASTTTLElement.h>
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Parsers/ASTTTLElement.h>
#include <Parsers/IAST.h>
-
-
-namespace DB
-{
-
-/** ALTER query:
- * ALTER TABLE [db.]name_type
- * ADD COLUMN col_name type [AFTER col_after],
- * DROP COLUMN col_drop [FROM PARTITION partition],
- * MODIFY COLUMN col_name type,
- * DROP PARTITION partition,
- * COMMENT_COLUMN col_name 'comment',
- * ALTER LIVE VIEW [db.]name_type
- * REFRESH
- */
-
-class ASTAlterCommand : public IAST
-{
-public:
- enum Type
- {
- ADD_COLUMN,
- DROP_COLUMN,
- MODIFY_COLUMN,
- COMMENT_COLUMN,
- RENAME_COLUMN,
+
+
+namespace DB
+{
+
+/** ALTER query:
+ * ALTER TABLE [db.]name_type
+ * ADD COLUMN col_name type [AFTER col_after],
+ * DROP COLUMN col_drop [FROM PARTITION partition],
+ * MODIFY COLUMN col_name type,
+ * DROP PARTITION partition,
+ * COMMENT_COLUMN col_name 'comment',
+ * ALTER LIVE VIEW [db.]name_type
+ * REFRESH
+ */
+
+class ASTAlterCommand : public IAST
+{
+public:
+ enum Type
+ {
+ ADD_COLUMN,
+ DROP_COLUMN,
+ MODIFY_COLUMN,
+ COMMENT_COLUMN,
+ RENAME_COLUMN,
MATERIALIZE_COLUMN,
- MODIFY_ORDER_BY,
+ MODIFY_ORDER_BY,
MODIFY_SAMPLE_BY,
- MODIFY_TTL,
- MATERIALIZE_TTL,
- MODIFY_SETTING,
+ MODIFY_TTL,
+ MATERIALIZE_TTL,
+ MODIFY_SETTING,
RESET_SETTING,
- MODIFY_QUERY,
+ MODIFY_QUERY,
REMOVE_TTL,
-
- ADD_INDEX,
- DROP_INDEX,
- MATERIALIZE_INDEX,
-
- ADD_CONSTRAINT,
- DROP_CONSTRAINT,
-
+
+ ADD_INDEX,
+ DROP_INDEX,
+ MATERIALIZE_INDEX,
+
+ ADD_CONSTRAINT,
+ DROP_CONSTRAINT,
+
ADD_PROJECTION,
DROP_PROJECTION,
MATERIALIZE_PROJECTION,
- DROP_PARTITION,
- DROP_DETACHED_PARTITION,
- ATTACH_PARTITION,
- MOVE_PARTITION,
- REPLACE_PARTITION,
- FETCH_PARTITION,
- FREEZE_PARTITION,
- FREEZE_ALL,
+ DROP_PARTITION,
+ DROP_DETACHED_PARTITION,
+ ATTACH_PARTITION,
+ MOVE_PARTITION,
+ REPLACE_PARTITION,
+ FETCH_PARTITION,
+ FREEZE_PARTITION,
+ FREEZE_ALL,
UNFREEZE_PARTITION,
UNFREEZE_ALL,
-
- DELETE,
- UPDATE,
-
- NO_TYPE,
-
- LIVE_VIEW_REFRESH,
- };
-
- Type type = NO_TYPE;
-
- /** The ADD COLUMN query stores the name and type of the column to add
- * This field is not used in the DROP query
- * In MODIFY query, the column name and the new type are stored here
- */
- ASTPtr col_decl;
-
- /** The ADD COLUMN and MODIFY COLUMN query here optionally stores the name of the column following AFTER
- * The DROP query stores the column name for deletion here
- * Also used for RENAME COLUMN.
- */
- ASTPtr column;
-
- /** For MODIFY ORDER BY
- */
- ASTPtr order_by;
-
+
+ DELETE,
+ UPDATE,
+
+ NO_TYPE,
+
+ LIVE_VIEW_REFRESH,
+ };
+
+ Type type = NO_TYPE;
+
+ /** The ADD COLUMN query stores the name and type of the column to add
+ * This field is not used in the DROP query
+ * In MODIFY query, the column name and the new type are stored here
+ */
+ ASTPtr col_decl;
+
+ /** The ADD COLUMN and MODIFY COLUMN query here optionally stores the name of the column following AFTER
+ * The DROP query stores the column name for deletion here
+ * Also used for RENAME COLUMN.
+ */
+ ASTPtr column;
+
+ /** For MODIFY ORDER BY
+ */
+ ASTPtr order_by;
+
/** For MODIFY SAMPLE BY
*/
ASTPtr sample_by;
- /** The ADD INDEX query stores the IndexDeclaration there.
- */
- ASTPtr index_decl;
-
- /** The ADD INDEX query stores the name of the index following AFTER.
- * The DROP INDEX query stores the name for deletion.
- * The MATERIALIZE INDEX query stores the name of the index to materialize.
- * The CLEAR INDEX query stores the name of the index to clear.
- */
- ASTPtr index;
-
- /** The ADD CONSTRAINT query stores the ConstraintDeclaration there.
- */
- ASTPtr constraint_decl;
-
- /** The DROP CONSTRAINT query stores the name for deletion.
- */
- ASTPtr constraint;
-
+ /** The ADD INDEX query stores the IndexDeclaration there.
+ */
+ ASTPtr index_decl;
+
+ /** The ADD INDEX query stores the name of the index following AFTER.
+ * The DROP INDEX query stores the name for deletion.
+ * The MATERIALIZE INDEX query stores the name of the index to materialize.
+ * The CLEAR INDEX query stores the name of the index to clear.
+ */
+ ASTPtr index;
+
+ /** The ADD CONSTRAINT query stores the ConstraintDeclaration there.
+ */
+ ASTPtr constraint_decl;
+
+ /** The DROP CONSTRAINT query stores the name for deletion.
+ */
+ ASTPtr constraint;
+
/** The ADD PROJECTION query stores the ProjectionDeclaration there.
*/
ASTPtr projection_decl;
@@ -125,114 +125,114 @@ public:
ASTPtr projection;
/** Used in DROP PARTITION, ATTACH PARTITION FROM, UPDATE, DELETE queries.
- * The value or ID of the partition is stored here.
- */
- ASTPtr partition;
-
- /// For DELETE/UPDATE WHERE: the predicate that filters the rows to delete/update.
- ASTPtr predicate;
-
- /// A list of expressions of the form `column = expr` for the UPDATE command.
- ASTPtr update_assignments;
-
- /// A column comment
- ASTPtr comment;
-
- /// For MODIFY TTL query
- ASTPtr ttl;
-
- /// FOR MODIFY_SETTING
- ASTPtr settings_changes;
-
+ * The value or ID of the partition is stored here.
+ */
+ ASTPtr partition;
+
+ /// For DELETE/UPDATE WHERE: the predicate that filters the rows to delete/update.
+ ASTPtr predicate;
+
+ /// A list of expressions of the form `column = expr` for the UPDATE command.
+ ASTPtr update_assignments;
+
+ /// A column comment
+ ASTPtr comment;
+
+ /// For MODIFY TTL query
+ ASTPtr ttl;
+
+ /// FOR MODIFY_SETTING
+ ASTPtr settings_changes;
+
/// FOR RESET_SETTING
ASTPtr settings_resets;
- /// For MODIFY_QUERY
- ASTPtr select;
-
- /** In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here
- */
- ASTPtr values;
-
- bool detach = false; /// true for DETACH PARTITION
-
- bool part = false; /// true for ATTACH PART, DROP DETACHED PART and MOVE
-
- bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata)
-
- bool clear_index = false; /// for CLEAR INDEX (do not drop index from metadata)
-
+ /// For MODIFY_QUERY
+ ASTPtr select;
+
+ /** In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here
+ */
+ ASTPtr values;
+
+ bool detach = false; /// true for DETACH PARTITION
+
+ bool part = false; /// true for ATTACH PART, DROP DETACHED PART and MOVE
+
+ bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata)
+
+ bool clear_index = false; /// for CLEAR INDEX (do not drop index from metadata)
+
bool clear_projection = false; /// for CLEAR PROJECTION (do not drop projection from metadata)
- bool if_not_exists = false; /// option for ADD_COLUMN
-
- bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN
-
- bool first = false; /// option for ADD_COLUMN, MODIFY_COLUMN
-
- DataDestinationType move_destination_type; /// option for MOVE PART/PARTITION
-
- String move_destination_name; /// option for MOVE PART/PARTITION
-
- /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition.
- */
- String from;
-
+ bool if_not_exists = false; /// option for ADD_COLUMN
+
+ bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN
+
+ bool first = false; /// option for ADD_COLUMN, MODIFY_COLUMN
+
+ DataDestinationType move_destination_type; /// option for MOVE PART/PARTITION
+
+ String move_destination_name; /// option for MOVE PART/PARTITION
+
+ /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition.
+ */
+ String from;
+
/**
* For FREEZE PARTITION - place local backup to directory with specified name.
* For UNFREEZE - delete local backup at directory with specified name.
- */
- String with_name;
-
- /// REPLACE(ATTACH) PARTITION partition FROM db.table
- String from_database;
- String from_table;
- /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table
- bool replace = true;
- /// MOVE PARTITION partition TO TABLE db.table
- String to_database;
- String to_table;
-
- /// Target column name
- ASTPtr rename_to;
-
+ */
+ String with_name;
+
+ /// REPLACE(ATTACH) PARTITION partition FROM db.table
+ String from_database;
+ String from_table;
+ /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table
+ bool replace = true;
+ /// MOVE PARTITION partition TO TABLE db.table
+ String to_database;
+ String to_table;
+
+ /// Target column name
+ ASTPtr rename_to;
+
/// Which property user want to remove
String remove_property;
- String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast<int>(type))); }
-
- ASTPtr clone() const override;
-
-protected:
- void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-};
-
-class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
-{
-public:
- bool is_live_view{false}; /// true for ALTER LIVE VIEW
-
+ String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast<int>(type))); }
+
+ ASTPtr clone() const override;
+
+protected:
+ void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+};
+
+class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
+{
+public:
+ bool is_live_view{false}; /// true for ALTER LIVE VIEW
+
ASTExpressionList * command_list = nullptr;
-
+
bool isSettingsAlter() const;
bool isFreezeAlter() const;
- String getID(char) const override;
-
- ASTPtr clone() const override;
-
- ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override
- {
- return removeOnCluster<ASTAlterQuery>(clone(), new_database);
- }
-
+ String getID(char) const override;
+
+ ASTPtr clone() const override;
+
+ ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override
+ {
+ return removeOnCluster<ASTAlterQuery>(clone(), new_database);
+ }
+
const char * getQueryKindString() const override { return "Alter"; }
-protected:
- void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+protected:
+ void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
bool isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const;
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h
index fdd1179ec9..464697be4f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTCheckQuery.h
@@ -1,53 +1,53 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Common/quoteString.h>
-
-
-namespace DB
-{
-
-struct ASTCheckQuery : public ASTQueryWithTableAndOutput
-{
- ASTPtr partition;
-
- /** Get the text that identifies this element. */
- String getID(char delim) const override { return "CheckQuery" + (delim + database) + delim + table; }
-
- ASTPtr clone() const override
- {
- auto res = std::make_shared<ASTCheckQuery>(*this);
- res->children.clear();
- cloneOutputOptions(*res);
- return res;
- }
-
-protected:
- void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
- {
- std::string nl_or_nothing = settings.one_line ? "" : "\n";
-
- std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
- std::string nl_or_ws = settings.one_line ? " " : "\n";
-
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (settings.hilite ? hilite_none : "");
-
- if (!table.empty())
- {
- if (!database.empty())
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(database) << (settings.hilite ? hilite_none : "");
- settings.ostr << ".";
- }
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(table) << (settings.hilite ? hilite_none : "");
- }
-
- if (partition)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PARTITION " << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
- }
-};
-
-}
+#pragma once
+
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Common/quoteString.h>
+
+
+namespace DB
+{
+
+struct ASTCheckQuery : public ASTQueryWithTableAndOutput
+{
+ ASTPtr partition;
+
+ /** Get the text that identifies this element. */
+ String getID(char delim) const override { return "CheckQuery" + (delim + database) + delim + table; }
+
+ ASTPtr clone() const override
+ {
+ auto res = std::make_shared<ASTCheckQuery>(*this);
+ res->children.clear();
+ cloneOutputOptions(*res);
+ return res;
+ }
+
+protected:
+ void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
+ {
+ std::string nl_or_nothing = settings.one_line ? "" : "\n";
+
+ std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
+ std::string nl_or_ws = settings.one_line ? " " : "\n";
+
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (settings.hilite ? hilite_none : "");
+
+ if (!table.empty())
+ {
+ if (!database.empty())
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(database) << (settings.hilite ? hilite_none : "");
+ settings.ostr << ".";
+ }
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(table) << (settings.hilite ? hilite_none : "");
+ }
+
+ if (partition)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PARTITION " << (settings.hilite ? hilite_none : "");
+ partition->formatImpl(settings, state, frame);
+ }
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp
index 4c14230e92..c30a296053 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTColumnDeclaration.cpp
@@ -1,97 +1,97 @@
-#include <Parsers/ASTColumnDeclaration.h>
-#include <Common/quoteString.h>
+#include <Parsers/ASTColumnDeclaration.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-
-ASTPtr ASTColumnDeclaration::clone() const
-{
- const auto res = std::make_shared<ASTColumnDeclaration>(*this);
- res->children.clear();
-
- if (type)
- {
- // Type may be an ASTFunction (e.g. `create table t (a Decimal(9,0))`),
- // so we have to clone it properly as well.
- res->type = type->clone();
- res->children.push_back(res->type);
- }
-
- if (default_expression)
- {
- res->default_expression = default_expression->clone();
- res->children.push_back(res->default_expression);
- }
-
- if (comment)
- {
- res->comment = comment->clone();
- res->children.push_back(res->comment);
- }
-
- if (codec)
- {
- res->codec = codec->clone();
- res->children.push_back(res->codec);
- }
-
- if (ttl)
- {
- res->ttl = ttl->clone();
- res->children.push_back(res->ttl);
- }
-
- return res;
-}
-
-void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
-{
- frame.need_parens = false;
-
- /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query.
- settings.ostr << backQuote(name);
-
- if (type)
- {
- settings.ostr << ' ';
+
+
+namespace DB
+{
+
+ASTPtr ASTColumnDeclaration::clone() const
+{
+ const auto res = std::make_shared<ASTColumnDeclaration>(*this);
+ res->children.clear();
+
+ if (type)
+ {
+ // Type may be an ASTFunction (e.g. `create table t (a Decimal(9,0))`),
+ // so we have to clone it properly as well.
+ res->type = type->clone();
+ res->children.push_back(res->type);
+ }
+
+ if (default_expression)
+ {
+ res->default_expression = default_expression->clone();
+ res->children.push_back(res->default_expression);
+ }
+
+ if (comment)
+ {
+ res->comment = comment->clone();
+ res->children.push_back(res->comment);
+ }
+
+ if (codec)
+ {
+ res->codec = codec->clone();
+ res->children.push_back(res->codec);
+ }
+
+ if (ttl)
+ {
+ res->ttl = ttl->clone();
+ res->children.push_back(res->ttl);
+ }
+
+ return res;
+}
+
+void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+ frame.need_parens = false;
+
+ /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query.
+ settings.ostr << backQuote(name);
+
+ if (type)
+ {
+ settings.ostr << ' ';
FormatStateStacked type_frame = frame;
type_frame.indent = 0;
type->formatImpl(settings, state, type_frame);
- }
-
- if (null_modifier)
- {
- settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "")
- << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : "");
- }
-
- if (default_expression)
- {
- settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' ';
- default_expression->formatImpl(settings, state, frame);
- }
-
- if (comment)
- {
- settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' ';
- comment->formatImpl(settings, state, frame);
- }
-
- if (codec)
- {
- settings.ostr << ' ';
- codec->formatImpl(settings, state, frame);
- }
-
- if (ttl)
- {
- settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' ';
- ttl->formatImpl(settings, state, frame);
- }
-}
-
-}
+ }
+
+ if (null_modifier)
+ {
+ settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "")
+ << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : "");
+ }
+
+ if (default_expression)
+ {
+ settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' ';
+ default_expression->formatImpl(settings, state, frame);
+ }
+
+ if (comment)
+ {
+ settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' ';
+ comment->formatImpl(settings, state, frame);
+ }
+
+ if (codec)
+ {
+ settings.ostr << ' ';
+ codec->formatImpl(settings, state, frame);
+ }
+
+ if (ttl)
+ {
+ settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' ';
+ ttl->formatImpl(settings, state, frame);
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp
index 6ea6c81c3b..45b03bbc23 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.cpp
@@ -1,77 +1,77 @@
-#include <Parsers/ASTDropQuery.h>
-#include <Common/quoteString.h>
+#include <Parsers/ASTDropQuery.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int SYNTAX_ERROR;
-}
-
-
-String ASTDropQuery::getID(char delim) const
-{
- if (kind == ASTDropQuery::Kind::Drop)
- return "DropQuery" + (delim + database) + delim + table;
- else if (kind == ASTDropQuery::Kind::Detach)
- return "DetachQuery" + (delim + database) + delim + table;
- else if (kind == ASTDropQuery::Kind::Truncate)
- return "TruncateQuery" + (delim + database) + delim + table;
- else
- throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR);
-}
-
-ASTPtr ASTDropQuery::clone() const
-{
- auto res = std::make_shared<ASTDropQuery>(*this);
- cloneOutputOptions(*res);
- return res;
-}
-
-void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- settings.ostr << (settings.hilite ? hilite_keyword : "");
- if (kind == ASTDropQuery::Kind::Drop)
- settings.ostr << "DROP ";
- else if (kind == ASTDropQuery::Kind::Detach)
- settings.ostr << "DETACH ";
- else if (kind == ASTDropQuery::Kind::Truncate)
- settings.ostr << "TRUNCATE ";
- else
- throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR);
-
- if (temporary)
- settings.ostr << "TEMPORARY ";
-
- if (table.empty() && !database.empty())
- settings.ostr << "DATABASE ";
- else if (is_dictionary)
- settings.ostr << "DICTIONARY ";
- else if (is_view)
- settings.ostr << "VIEW ";
- else
- settings.ostr << "TABLE ";
-
- if (if_exists)
- settings.ostr << "IF EXISTS ";
-
- settings.ostr << (settings.hilite ? hilite_none : "");
-
- if (table.empty() && !database.empty())
- settings.ostr << backQuoteIfNeed(database);
- else
- settings.ostr << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
-
- formatOnCluster(settings);
-
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int SYNTAX_ERROR;
+}
+
+
+String ASTDropQuery::getID(char delim) const
+{
+ if (kind == ASTDropQuery::Kind::Drop)
+ return "DropQuery" + (delim + database) + delim + table;
+ else if (kind == ASTDropQuery::Kind::Detach)
+ return "DetachQuery" + (delim + database) + delim + table;
+ else if (kind == ASTDropQuery::Kind::Truncate)
+ return "TruncateQuery" + (delim + database) + delim + table;
+ else
+ throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR);
+}
+
+ASTPtr ASTDropQuery::clone() const
+{
+ auto res = std::make_shared<ASTDropQuery>(*this);
+ cloneOutputOptions(*res);
+ return res;
+}
+
+void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ settings.ostr << (settings.hilite ? hilite_keyword : "");
+ if (kind == ASTDropQuery::Kind::Drop)
+ settings.ostr << "DROP ";
+ else if (kind == ASTDropQuery::Kind::Detach)
+ settings.ostr << "DETACH ";
+ else if (kind == ASTDropQuery::Kind::Truncate)
+ settings.ostr << "TRUNCATE ";
+ else
+ throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR);
+
+ if (temporary)
+ settings.ostr << "TEMPORARY ";
+
+ if (table.empty() && !database.empty())
+ settings.ostr << "DATABASE ";
+ else if (is_dictionary)
+ settings.ostr << "DICTIONARY ";
+ else if (is_view)
+ settings.ostr << "VIEW ";
+ else
+ settings.ostr << "TABLE ";
+
+ if (if_exists)
+ settings.ostr << "IF EXISTS ";
+
+ settings.ostr << (settings.hilite ? hilite_none : "");
+
+ if (table.empty() && !database.empty())
+ settings.ostr << backQuoteIfNeed(database);
+ else
+ settings.ostr << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
+
+ formatOnCluster(settings);
+
if (permanently)
settings.ostr << " PERMANENTLY";
- if (no_delay)
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " NO DELAY" << (settings.hilite ? hilite_none : "");
-}
-
-}
+ if (no_delay)
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " NO DELAY" << (settings.hilite ? hilite_none : "");
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h
index 6e5fd5854d..b062eaf3e8 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropQuery.h
@@ -1,54 +1,54 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Parsers/ASTQueryWithOnCluster.h>
-
-
-namespace DB
-{
-
-/** DROP query
- */
-class ASTDropQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
-{
-public:
- enum Kind
- {
- Drop,
- Detach,
- Truncate,
- };
-
- Kind kind;
- bool if_exists{false};
-
- /// Useful if we already have a DDL lock
- bool no_ddl_lock{false};
-
- /// We dropping dictionary, so print correct word
- bool is_dictionary{false};
-
- /// Same as above
- bool is_view{false};
-
- bool no_delay{false};
-
+#pragma once
+
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Parsers/ASTQueryWithOnCluster.h>
+
+
+namespace DB
+{
+
+/** DROP query
+ */
+class ASTDropQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
+{
+public:
+ enum Kind
+ {
+ Drop,
+ Detach,
+ Truncate,
+ };
+
+ Kind kind;
+ bool if_exists{false};
+
+ /// Useful if we already have a DDL lock
+ bool no_ddl_lock{false};
+
+ /// We dropping dictionary, so print correct word
+ bool is_dictionary{false};
+
+ /// Same as above
+ bool is_view{false};
+
+ bool no_delay{false};
+
// We detach the object permanently, so it will not be reattached back during server restart.
bool permanently{false};
- /** Get the text that identifies this element. */
- String getID(char) const override;
- ASTPtr clone() const override;
-
- ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override
- {
- return removeOnCluster<ASTDropQuery>(clone(), new_database);
- }
-
+ /** Get the text that identifies this element. */
+ String getID(char) const override;
+ ASTPtr clone() const override;
+
+ ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override
+ {
+ return removeOnCluster<ASTDropQuery>(clone(), new_database);
+ }
+
const char * getQueryKindString() const override { return "Drop"; }
-protected:
- void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-};
-
-}
+protected:
+ void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h
index 5c50a8cd82..dffcad80a4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTExplainQuery.h
@@ -1,87 +1,87 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace DB
-{
-
-
-/// AST, EXPLAIN or other query with meaning of explanation query instead of execution
-class ASTExplainQuery : public ASTQueryWithOutput
-{
-public:
- enum ExplainKind
- {
- ParsedAST, /// 'EXPLAIN AST SELECT ...'
- AnalyzedSyntax, /// 'EXPLAIN SYNTAX SELECT ...'
- QueryPlan, /// 'EXPLAIN SELECT ...'
- QueryPipeline, /// 'EXPLAIN PIPELINE ...'
+#pragma once
+
+#include <Parsers/ASTQueryWithOutput.h>
+
+
+namespace DB
+{
+
+
+/// AST, EXPLAIN or other query with meaning of explanation query instead of execution
+class ASTExplainQuery : public ASTQueryWithOutput
+{
+public:
+ enum ExplainKind
+ {
+ ParsedAST, /// 'EXPLAIN AST SELECT ...'
+ AnalyzedSyntax, /// 'EXPLAIN SYNTAX SELECT ...'
+ QueryPlan, /// 'EXPLAIN SELECT ...'
+ QueryPipeline, /// 'EXPLAIN PIPELINE ...'
QueryEstimates, /// 'EXPLAIN ESTIMATE ...'
- };
-
+ };
+
explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {}
-
+
String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
- ExplainKind getKind() const { return kind; }
- ASTPtr clone() const override
- {
- auto res = std::make_shared<ASTExplainQuery>(*this);
- res->children.clear();
- res->children.push_back(children[0]->clone());
- cloneOutputOptions(*res);
- return res;
- }
-
- void setExplainedQuery(ASTPtr query_)
- {
- children.emplace_back(query_);
- query = std::move(query_);
- }
-
- void setSettings(ASTPtr settings_)
- {
- children.emplace_back(settings_);
- ast_settings = std::move(settings_);
- }
-
- const ASTPtr & getExplainedQuery() const { return query; }
- const ASTPtr & getSettings() const { return ast_settings; }
-
-protected:
- void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
- {
+ ExplainKind getKind() const { return kind; }
+ ASTPtr clone() const override
+ {
+ auto res = std::make_shared<ASTExplainQuery>(*this);
+ res->children.clear();
+ res->children.push_back(children[0]->clone());
+ cloneOutputOptions(*res);
+ return res;
+ }
+
+ void setExplainedQuery(ASTPtr query_)
+ {
+ children.emplace_back(query_);
+ query = std::move(query_);
+ }
+
+ void setSettings(ASTPtr settings_)
+ {
+ children.emplace_back(settings_);
+ ast_settings = std::move(settings_);
+ }
+
+ const ASTPtr & getExplainedQuery() const { return query; }
+ const ASTPtr & getSettings() const { return ast_settings; }
+
+protected:
+ void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
+ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : "");
-
- if (ast_settings)
- {
- settings.ostr << ' ';
- ast_settings->formatImpl(settings, state, frame);
- }
-
- settings.ostr << settings.nl_or_ws;
- query->formatImpl(settings, state, frame);
- }
-
-private:
- ExplainKind kind;
-
- ASTPtr query;
- ASTPtr ast_settings;
-
+
+ if (ast_settings)
+ {
+ settings.ostr << ' ';
+ ast_settings->formatImpl(settings, state, frame);
+ }
+
+ settings.ostr << settings.nl_or_ws;
+ query->formatImpl(settings, state, frame);
+ }
+
+private:
+ ExplainKind kind;
+
+ ASTPtr query;
+ ASTPtr ast_settings;
+
static String toString(ExplainKind kind)
- {
- switch (kind)
- {
+ {
+ switch (kind)
+ {
case ParsedAST: return "EXPLAIN AST";
case AnalyzedSyntax: return "EXPLAIN SYNTAX";
- case QueryPlan: return "EXPLAIN";
- case QueryPipeline: return "EXPLAIN PIPELINE";
+ case QueryPlan: return "EXPLAIN";
+ case QueryPipeline: return "EXPLAIN PIPELINE";
case QueryEstimates: return "EXPLAIN ESTIMATE";
- }
-
- __builtin_unreachable();
- }
-};
-
-}
+ }
+
+ __builtin_unreachable();
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp
index 72bdd7d6b0..ca10f23e58 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.cpp
@@ -1,28 +1,28 @@
-#include <Parsers/ASTKillQueryQuery.h>
+#include <Parsers/ASTKillQueryQuery.h>
#include <IO/Operators.h>
-
-namespace DB
-{
-
-String ASTKillQueryQuery::getID(char delim) const
-{
- return String("KillQueryQuery") + delim + (where_expression ? where_expression->getID() : "") + delim + String(sync ? "SYNC" : "ASYNC");
-}
-
-void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
-{
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "KILL "
- << (type == Type::Query ? "QUERY" : "MUTATION");
-
- formatOnCluster(settings);
-
- if (where_expression)
- {
- settings.ostr << " WHERE " << (settings.hilite ? hilite_none : "");
- where_expression->formatImpl(settings, state, frame);
- }
-
- settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << (test ? "TEST" : (sync ? "SYNC" : "ASYNC")) << (settings.hilite ? hilite_none : "");
-}
-
-}
+
+namespace DB
+{
+
+String ASTKillQueryQuery::getID(char delim) const
+{
+ return String("KillQueryQuery") + delim + (where_expression ? where_expression->getID() : "") + delim + String(sync ? "SYNC" : "ASYNC");
+}
+
+void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "KILL "
+ << (type == Type::Query ? "QUERY" : "MUTATION");
+
+ formatOnCluster(settings);
+
+ if (where_expression)
+ {
+ settings.ostr << " WHERE " << (settings.hilite ? hilite_none : "");
+ where_expression->formatImpl(settings, state, frame);
+ }
+
+ settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << (test ? "TEST" : (sync ? "SYNC" : "ASYNC")) << (settings.hilite ? hilite_none : "");
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h
index c1b3956962..fa0dbcda16 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTKillQueryQuery.h
@@ -1,45 +1,45 @@
#pragma once
-#include <Parsers/IAST.h>
-#include <Parsers/ASTQueryWithOutput.h>
-#include <Parsers/ASTQueryWithOnCluster.h>
-
-namespace DB
-{
-
-class ASTKillQueryQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
-{
-public:
- enum class Type
- {
- Query, /// KILL QUERY
- Mutation, /// KILL MUTATION
- };
-
- Type type = Type::Query;
- ASTPtr where_expression; // expression to filter processes from system.processes table
- bool sync = false; // SYNC or ASYNC mode
- bool test = false; // does it TEST mode? (doesn't cancel queries just checks and shows them)
-
- ASTPtr clone() const override
- {
- auto clone = std::make_shared<ASTKillQueryQuery>(*this);
- if (where_expression)
- {
- clone->where_expression = where_expression->clone();
- clone->children = {clone->where_expression};
- }
-
- return clone;
- }
-
- String getID(char) const override;
-
- void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-
- ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override
- {
- return removeOnCluster<ASTKillQueryQuery>(clone());
- }
-};
-
-}
+#include <Parsers/IAST.h>
+#include <Parsers/ASTQueryWithOutput.h>
+#include <Parsers/ASTQueryWithOnCluster.h>
+
+namespace DB
+{
+
+class ASTKillQueryQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
+{
+public:
+ enum class Type
+ {
+ Query, /// KILL QUERY
+ Mutation, /// KILL MUTATION
+ };
+
+ Type type = Type::Query;
+ ASTPtr where_expression; // expression to filter processes from system.processes table
+ bool sync = false; // SYNC or ASYNC mode
+ bool test = false; // does it TEST mode? (doesn't cancel queries just checks and shows them)
+
+ ASTPtr clone() const override
+ {
+ auto clone = std::make_shared<ASTKillQueryQuery>(*this);
+ if (where_expression)
+ {
+ clone->where_expression = where_expression->clone();
+ clone->children = {clone->where_expression};
+ }
+
+ return clone;
+ }
+
+ String getID(char) const override;
+
+ void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+
+ ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override
+ {
+ return removeOnCluster<ASTKillQueryQuery>(clone());
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp
index 6423e247ec..1af215c1ef 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.cpp
@@ -1,34 +1,34 @@
-#include <Parsers/ASTOptimizeQuery.h>
-#include <Common/quoteString.h>
+#include <Parsers/ASTOptimizeQuery.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-namespace DB
-{
-
-void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
-{
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "")
- << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
-
- formatOnCluster(settings);
-
- if (partition)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " PARTITION " << (settings.hilite ? hilite_none : "");
- partition->formatImpl(settings, state, frame);
- }
-
- if (final)
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " FINAL" << (settings.hilite ? hilite_none : "");
-
- if (deduplicate)
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : "");
+
+namespace DB
+{
+
+void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "")
+ << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
+
+ formatOnCluster(settings);
+
+ if (partition)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " PARTITION " << (settings.hilite ? hilite_none : "");
+ partition->formatImpl(settings, state, frame);
+ }
+
+ if (final)
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " FINAL" << (settings.hilite ? hilite_none : "");
+
+ if (deduplicate)
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : "");
if (deduplicate_by_columns)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " BY " << (settings.hilite ? hilite_none : "");
deduplicate_by_columns->formatImpl(settings, state, frame);
}
-}
-
-}
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h
index f4981d156c..f00cd48002 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTOptimizeQuery.h
@@ -1,59 +1,59 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-#include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Parsers/ASTQueryWithOnCluster.h>
-
-namespace DB
-{
-
-
-/** OPTIMIZE query
- */
-class ASTOptimizeQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
-{
-public:
- /// The partition to optimize can be specified.
- ASTPtr partition;
- /// A flag can be specified - perform optimization "to the end" instead of one step.
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Parsers/ASTQueryWithOnCluster.h>
+
+namespace DB
+{
+
+
+/** OPTIMIZE query
+ */
+class ASTOptimizeQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
+{
+public:
+ /// The partition to optimize can be specified.
+ ASTPtr partition;
+ /// A flag can be specified - perform optimization "to the end" instead of one step.
bool final = false;
- /// Do deduplicate (default: false)
+ /// Do deduplicate (default: false)
bool deduplicate = false;
/// Deduplicate by columns.
ASTPtr deduplicate_by_columns;
-
- /** Get the text that identifies this element. */
- String getID(char delim) const override
- {
- return "OptimizeQuery" + (delim + database) + delim + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : "");
- }
-
- ASTPtr clone() const override
- {
- auto res = std::make_shared<ASTOptimizeQuery>(*this);
- res->children.clear();
-
- if (partition)
- {
- res->partition = partition->clone();
- res->children.push_back(res->partition);
- }
-
+
+ /** Get the text that identifies this element. */
+ String getID(char delim) const override
+ {
+ return "OptimizeQuery" + (delim + database) + delim + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : "");
+ }
+
+ ASTPtr clone() const override
+ {
+ auto res = std::make_shared<ASTOptimizeQuery>(*this);
+ res->children.clear();
+
+ if (partition)
+ {
+ res->partition = partition->clone();
+ res->children.push_back(res->partition);
+ }
+
if (deduplicate_by_columns)
{
res->deduplicate_by_columns = deduplicate_by_columns->clone();
res->children.push_back(res->deduplicate_by_columns);
}
- return res;
- }
-
- void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-
- ASTPtr getRewrittenASTWithoutOnCluster(const std::string &new_database) const override
- {
- return removeOnCluster<ASTOptimizeQuery>(clone(), new_database);
- }
-};
-
-}
+ return res;
+ }
+
+ void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+
+ ASTPtr getRewrittenASTWithoutOnCluster(const std::string &new_database) const override
+ {
+ return removeOnCluster<ASTOptimizeQuery>(clone(), new_database);
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp
index 06bfe4f521..fa968c2ac8 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.cpp
@@ -1,45 +1,45 @@
-#include <Parsers/ASTPartition.h>
-#include <IO/WriteHelpers.h>
+#include <Parsers/ASTPartition.h>
+#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
-
-namespace DB
-{
-
-String ASTPartition::getID(char delim) const
-{
- if (value)
- return "Partition";
- else
- return "Partition_ID" + (delim + id);
-}
-
-ASTPtr ASTPartition::clone() const
-{
- auto res = std::make_shared<ASTPartition>(*this);
- res->children.clear();
-
- if (value)
- {
- res->value = value->clone();
- res->children.push_back(res->value);
- }
-
- return res;
-}
-
-void ASTPartition::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
-{
- if (value)
- {
- value->formatImpl(settings, state, frame);
- }
- else
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "ID " << (settings.hilite ? hilite_none : "");
- WriteBufferFromOwnString id_buf;
- writeQuoted(id, id_buf);
- settings.ostr << id_buf.str();
- }
-}
-
-}
+
+namespace DB
+{
+
+String ASTPartition::getID(char delim) const
+{
+ if (value)
+ return "Partition";
+ else
+ return "Partition_ID" + (delim + id);
+}
+
+ASTPtr ASTPartition::clone() const
+{
+ auto res = std::make_shared<ASTPartition>(*this);
+ res->children.clear();
+
+ if (value)
+ {
+ res->value = value->clone();
+ res->children.push_back(res->value);
+ }
+
+ return res;
+}
+
+void ASTPartition::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+ if (value)
+ {
+ value->formatImpl(settings, state, frame);
+ }
+ else
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "ID " << (settings.hilite ? hilite_none : "");
+ WriteBufferFromOwnString id_buf;
+ writeQuoted(id, id_buf);
+ settings.ostr << id_buf.str();
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h
index 8a837a1045..c0d71861a4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTPartition.h
@@ -1,27 +1,27 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-#include <common/StringRef.h>
-
-
-namespace DB
-{
-
-/// Either a (possibly compound) expression representing a partition value or a partition ID.
-class ASTPartition : public IAST
-{
-public:
- ASTPtr value;
- String fields_str; /// The extent of comma-separated partition expression fields without parentheses.
- size_t fields_count = 0;
-
- String id;
-
- String getID(char) const override;
- ASTPtr clone() const override;
-
-protected:
- void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <common/StringRef.h>
+
+
+namespace DB
+{
+
+/// Either a (possibly compound) expression representing a partition value or a partition ID.
+class ASTPartition : public IAST
+{
+public:
+ ASTPtr value;
+ String fields_str; /// The extent of comma-separated partition expression fields without parentheses.
+ size_t fields_count = 0;
+
+ String id;
+
+ String getID(char) const override;
+ ASTPtr clone() const override;
+
+protected:
+ void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp
index fc5385e4a5..d895c5b69f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.cpp
@@ -1,95 +1,95 @@
-#include <Parsers/ASTRolesOrUsersSet.h>
-#include <Common/quoteString.h>
+#include <Parsers/ASTRolesOrUsersSet.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-namespace
-{
+
+
+namespace DB
+{
+namespace
+{
void formatNameOrID(const String & str, bool is_id, const IAST::FormatSettings & settings)
- {
- if (is_id)
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ID" << (settings.hilite ? IAST::hilite_none : "") << "("
- << quoteString(str) << ")";
- }
- else
- {
- settings.ostr << backQuoteIfNeed(str);
- }
- }
-}
-
-void ASTRolesOrUsersSet::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- if (empty())
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : "");
- return;
- }
+ {
+ if (is_id)
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ID" << (settings.hilite ? IAST::hilite_none : "") << "("
+ << quoteString(str) << ")";
+ }
+ else
+ {
+ settings.ostr << backQuoteIfNeed(str);
+ }
+ }
+}
+
+void ASTRolesOrUsersSet::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ if (empty())
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : "");
+ return;
+ }
+
+ bool need_comma = false;
- bool need_comma = false;
-
- if (all)
- {
- if (std::exchange(need_comma, true))
- settings.ostr << ", ";
+ if (all)
+ {
+ if (std::exchange(need_comma, true))
+ settings.ostr << ", ";
settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (use_keyword_any ? "ANY" : "ALL")
<< (settings.hilite ? IAST::hilite_none : "");
- }
- else
- {
+ }
+ else
+ {
for (const auto & name : names)
- {
- if (std::exchange(need_comma, true))
- settings.ostr << ", ";
+ {
+ if (std::exchange(need_comma, true))
+ settings.ostr << ", ";
formatNameOrID(name, id_mode, settings);
- }
-
- if (current_user)
- {
- if (std::exchange(need_comma, true))
- settings.ostr << ", ";
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : "");
- }
- }
-
- if (except_current_user || !except_names.empty())
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " EXCEPT " << (settings.hilite ? IAST::hilite_none : "");
- need_comma = false;
-
+ }
+
+ if (current_user)
+ {
+ if (std::exchange(need_comma, true))
+ settings.ostr << ", ";
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : "");
+ }
+ }
+
+ if (except_current_user || !except_names.empty())
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " EXCEPT " << (settings.hilite ? IAST::hilite_none : "");
+ need_comma = false;
+
for (const auto & name : except_names)
- {
- if (std::exchange(need_comma, true))
- settings.ostr << ", ";
+ {
+ if (std::exchange(need_comma, true))
+ settings.ostr << ", ";
formatNameOrID(name, id_mode, settings);
- }
-
- if (except_current_user)
- {
- if (std::exchange(need_comma, true))
- settings.ostr << ", ";
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : "");
- }
- }
-}
-
-
+ }
+
+ if (except_current_user)
+ {
+ if (std::exchange(need_comma, true))
+ settings.ostr << ", ";
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : "");
+ }
+ }
+}
+
+
void ASTRolesOrUsersSet::replaceCurrentUserTag(const String & current_user_name)
-{
- if (current_user)
- {
- names.push_back(current_user_name);
- current_user = false;
- }
-
- if (except_current_user)
- {
- except_names.push_back(current_user_name);
- except_current_user = false;
- }
-}
-
-}
+{
+ if (current_user)
+ {
+ names.push_back(current_user_name);
+ current_user = false;
+ }
+
+ if (except_current_user)
+ {
+ except_names.push_back(current_user_name);
+ except_current_user = false;
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h
index 15d42ee39a..1efdab0cdb 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTRolesOrUsersSet.h
@@ -1,35 +1,35 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-
-
-namespace DB
-{
+#pragma once
+
+#include <Parsers/IAST.h>
+
+
+namespace DB
+{
using Strings = std::vector<String>;
-/// Represents a set of users/roles like
+/// Represents a set of users/roles like
/// {user_name | role_name | CURRENT_USER | ALL | NONE} [,...]
/// [EXCEPT {user_name | role_name | CURRENT_USER | ALL | NONE} [,...]]
-class ASTRolesOrUsersSet : public IAST
-{
-public:
+class ASTRolesOrUsersSet : public IAST
+{
+public:
bool all = false;
- Strings names;
- bool current_user = false;
- Strings except_names;
- bool except_current_user = false;
-
+ Strings names;
+ bool current_user = false;
+ Strings except_names;
+ bool except_current_user = false;
+
bool allow_users = true; /// whether this set can contain names of users
bool allow_roles = true; /// whether this set can contain names of roles
bool id_mode = false; /// whether this set keep UUIDs instead of names
bool use_keyword_any = false; /// whether the keyword ANY should be used instead of the keyword ALL
-
- bool empty() const { return names.empty() && !current_user && !all; }
+
+ bool empty() const { return names.empty() && !current_user && !all; }
void replaceCurrentUserTag(const String & current_user_name);
-
- String getID(char) const override { return "RolesOrUsersSet"; }
- ASTPtr clone() const override { return std::make_shared<ASTRolesOrUsersSet>(*this); }
- void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-};
-}
+
+ String getID(char) const override { return "RolesOrUsersSet"; }
+ ASTPtr clone() const override { return std::make_shared<ASTRolesOrUsersSet>(*this); }
+ void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp
index e59e103b77..66fc6c44ea 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.cpp
@@ -1,44 +1,44 @@
-#include <Parsers/ASTSetRoleQuery.h>
-#include <Parsers/ASTRolesOrUsersSet.h>
-#include <Common/quoteString.h>
+#include <Parsers/ASTSetRoleQuery.h>
+#include <Parsers/ASTRolesOrUsersSet.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-String ASTSetRoleQuery::getID(char) const
-{
- return "SetRoleQuery";
-}
-
-
-ASTPtr ASTSetRoleQuery::clone() const
-{
- return std::make_shared<ASTSetRoleQuery>(*this);
-}
-
-
-void ASTSetRoleQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- settings.ostr << (settings.hilite ? hilite_keyword : "");
- switch (kind)
- {
- case Kind::SET_ROLE: settings.ostr << "SET ROLE"; break;
- case Kind::SET_ROLE_DEFAULT: settings.ostr << "SET ROLE DEFAULT"; break;
- case Kind::SET_DEFAULT_ROLE: settings.ostr << "SET DEFAULT ROLE"; break;
- }
- settings.ostr << (settings.hilite ? hilite_none : "");
-
- if (kind == Kind::SET_ROLE_DEFAULT)
- return;
-
- settings.ostr << " ";
- roles->format(settings);
-
- if (kind == Kind::SET_ROLE)
- return;
-
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "");
- to_users->format(settings);
-}
-}
+
+
+namespace DB
+{
+String ASTSetRoleQuery::getID(char) const
+{
+ return "SetRoleQuery";
+}
+
+
+ASTPtr ASTSetRoleQuery::clone() const
+{
+ return std::make_shared<ASTSetRoleQuery>(*this);
+}
+
+
+void ASTSetRoleQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ settings.ostr << (settings.hilite ? hilite_keyword : "");
+ switch (kind)
+ {
+ case Kind::SET_ROLE: settings.ostr << "SET ROLE"; break;
+ case Kind::SET_ROLE_DEFAULT: settings.ostr << "SET ROLE DEFAULT"; break;
+ case Kind::SET_DEFAULT_ROLE: settings.ostr << "SET DEFAULT ROLE"; break;
+ }
+ settings.ostr << (settings.hilite ? hilite_none : "");
+
+ if (kind == Kind::SET_ROLE_DEFAULT)
+ return;
+
+ settings.ostr << " ";
+ roles->format(settings);
+
+ if (kind == Kind::SET_ROLE)
+ return;
+
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "");
+ to_users->format(settings);
+}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h
index f0170ae6af..725a2a1737 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSetRoleQuery.h
@@ -1,31 +1,31 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-
-
-namespace DB
-{
-class ASTRolesOrUsersSet;
-
-/** SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]}
- * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...]
- */
-class ASTSetRoleQuery : public IAST
-{
-public:
- enum class Kind
- {
- SET_ROLE,
- SET_ROLE_DEFAULT,
- SET_DEFAULT_ROLE,
- };
- Kind kind = Kind::SET_ROLE;
-
- std::shared_ptr<ASTRolesOrUsersSet> roles;
- std::shared_ptr<ASTRolesOrUsersSet> to_users;
-
- String getID(char) const override;
- ASTPtr clone() const override;
- void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-};
-}
+#pragma once
+
+#include <Parsers/IAST.h>
+
+
+namespace DB
+{
+class ASTRolesOrUsersSet;
+
+/** SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]}
+ * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...]
+ */
+class ASTSetRoleQuery : public IAST
+{
+public:
+ enum class Kind
+ {
+ SET_ROLE,
+ SET_ROLE_DEFAULT,
+ SET_DEFAULT_ROLE,
+ };
+ Kind kind = Kind::SET_ROLE;
+
+ std::shared_ptr<ASTRolesOrUsersSet> roles;
+ std::shared_ptr<ASTRolesOrUsersSet> to_users;
+
+ String getID(char) const override;
+ ASTPtr clone() const override;
+ void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp
index 8f35c154a7..0e6274d452 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.cpp
@@ -1,98 +1,98 @@
-#include <Parsers/ASTSettingsProfileElement.h>
+#include <Parsers/ASTSettingsProfileElement.h>
#include <Parsers/formatSettingName.h>
#include <Common/FieldVisitorToString.h>
-#include <Common/quoteString.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-namespace
-{
- void formatProfileNameOrID(const String & str, bool is_id, const IAST::FormatSettings & settings)
- {
- if (is_id)
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ID" << (settings.hilite ? IAST::hilite_none : "") << "("
- << quoteString(str) << ")";
- }
- else
- {
- settings.ostr << backQuoteIfNeed(str);
- }
- }
-}
-
-void ASTSettingsProfileElement::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- if (!parent_profile.empty())
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (use_inherit_keyword ? "INHERIT" : "PROFILE") << " "
- << (settings.hilite ? IAST::hilite_none : "");
- formatProfileNameOrID(parent_profile, id_mode, settings);
- return;
- }
-
+
+
+namespace DB
+{
+namespace
+{
+ void formatProfileNameOrID(const String & str, bool is_id, const IAST::FormatSettings & settings)
+ {
+ if (is_id)
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ID" << (settings.hilite ? IAST::hilite_none : "") << "("
+ << quoteString(str) << ")";
+ }
+ else
+ {
+ settings.ostr << backQuoteIfNeed(str);
+ }
+ }
+}
+
+void ASTSettingsProfileElement::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ if (!parent_profile.empty())
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (use_inherit_keyword ? "INHERIT" : "PROFILE") << " "
+ << (settings.hilite ? IAST::hilite_none : "");
+ formatProfileNameOrID(parent_profile, id_mode, settings);
+ return;
+ }
+
formatSettingName(setting_name, settings.ostr);
-
- if (!value.isNull())
- {
- settings.ostr << " = " << applyVisitor(FieldVisitorToString{}, value);
- }
-
- if (!min_value.isNull())
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MIN " << (settings.hilite ? IAST::hilite_none : "")
- << applyVisitor(FieldVisitorToString{}, min_value);
- }
-
- if (!max_value.isNull())
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX " << (settings.hilite ? IAST::hilite_none : "")
- << applyVisitor(FieldVisitorToString{}, max_value);
- }
-
- if (readonly)
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (*readonly ? " READONLY" : " WRITABLE")
- << (settings.hilite ? IAST::hilite_none : "");
- }
-}
-
-
-bool ASTSettingsProfileElements::empty() const
-{
- for (const auto & element : elements)
- if (!element->empty())
- return false;
- return true;
-}
-
-
-void ASTSettingsProfileElements::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- if (empty())
- {
- settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : "");
- return;
- }
-
- bool need_comma = false;
- for (const auto & element : elements)
- {
- if (need_comma)
- settings.ostr << ", ";
- need_comma = true;
-
- element->format(settings);
- }
-}
-
-
-void ASTSettingsProfileElements::setUseInheritKeyword(bool use_inherit_keyword_)
-{
- for (auto & element : elements)
- element->use_inherit_keyword = use_inherit_keyword_;
-}
-
-}
+
+ if (!value.isNull())
+ {
+ settings.ostr << " = " << applyVisitor(FieldVisitorToString{}, value);
+ }
+
+ if (!min_value.isNull())
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MIN " << (settings.hilite ? IAST::hilite_none : "")
+ << applyVisitor(FieldVisitorToString{}, min_value);
+ }
+
+ if (!max_value.isNull())
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX " << (settings.hilite ? IAST::hilite_none : "")
+ << applyVisitor(FieldVisitorToString{}, max_value);
+ }
+
+ if (readonly)
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (*readonly ? " READONLY" : " WRITABLE")
+ << (settings.hilite ? IAST::hilite_none : "");
+ }
+}
+
+
+bool ASTSettingsProfileElements::empty() const
+{
+ for (const auto & element : elements)
+ if (!element->empty())
+ return false;
+ return true;
+}
+
+
+void ASTSettingsProfileElements::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ if (empty())
+ {
+ settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : "");
+ return;
+ }
+
+ bool need_comma = false;
+ for (const auto & element : elements)
+ {
+ if (need_comma)
+ settings.ostr << ", ";
+ need_comma = true;
+
+ element->format(settings);
+ }
+}
+
+
+void ASTSettingsProfileElements::setUseInheritKeyword(bool use_inherit_keyword_)
+{
+ for (auto & element : elements)
+ element->use_inherit_keyword = use_inherit_keyword_;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h
index 6a54bca321..0a2bbc92b8 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSettingsProfileElement.h
@@ -1,48 +1,48 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-#include <Core/Field.h>
-
-
-namespace DB
-{
-/** Represents a settings profile's element like the following
- * {variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name'
- */
-class ASTSettingsProfileElement : public IAST
-{
-public:
- String parent_profile;
- String setting_name;
- Field value;
- Field min_value;
- Field max_value;
- std::optional<bool> readonly;
- bool id_mode = false; /// If true then `parent_profile` keeps UUID, not a name.
- bool use_inherit_keyword = false; /// If true then this element is a part of ASTCreateSettingsProfileQuery.
-
- bool empty() const { return parent_profile.empty() && setting_name.empty(); }
-
- String getID(char) const override { return "SettingsProfileElement"; }
- ASTPtr clone() const override { return std::make_shared<ASTSettingsProfileElement>(*this); }
- void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-};
-
-
-/** Represents settings profile's elements like the following
- * {{variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name'} [,...]
- */
-class ASTSettingsProfileElements : public IAST
-{
-public:
- std::vector<std::shared_ptr<ASTSettingsProfileElement>> elements;
-
- bool empty() const;
-
- String getID(char) const override { return "SettingsProfileElements"; }
- ASTPtr clone() const override { return std::make_shared<ASTSettingsProfileElements>(*this); }
- void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-
- void setUseInheritKeyword(bool use_inherit_keyword_);
-};
-}
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Core/Field.h>
+
+
+namespace DB
+{
+/** Represents a settings profile's element like the following
+ * {variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name'
+ */
+class ASTSettingsProfileElement : public IAST
+{
+public:
+ String parent_profile;
+ String setting_name;
+ Field value;
+ Field min_value;
+ Field max_value;
+ std::optional<bool> readonly;
+ bool id_mode = false; /// If true then `parent_profile` keeps UUID, not a name.
+ bool use_inherit_keyword = false; /// If true then this element is a part of ASTCreateSettingsProfileQuery.
+
+ bool empty() const { return parent_profile.empty() && setting_name.empty(); }
+
+ String getID(char) const override { return "SettingsProfileElement"; }
+ ASTPtr clone() const override { return std::make_shared<ASTSettingsProfileElement>(*this); }
+ void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+
+
+/** Represents settings profile's elements like the following
+ * {{variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name'} [,...]
+ */
+class ASTSettingsProfileElements : public IAST
+{
+public:
+ std::vector<std::shared_ptr<ASTSettingsProfileElement>> elements;
+
+ bool empty() const;
+
+ String getID(char) const override { return "SettingsProfileElements"; }
+ ASTPtr clone() const override { return std::make_shared<ASTSettingsProfileElements>(*this); }
+ void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+
+ void setUseInheritKeyword(bool use_inherit_keyword_);
+};
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h
index dffd7ff240..8b14660bce 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h
@@ -1,17 +1,17 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace DB
-{
-
-struct ASTShowAccessQueryNames
-{
- static constexpr auto ID = "ShowAccessQuery";
- static constexpr auto Query = "SHOW ACCESS";
-};
-
-using ASTShowAccessQuery = ASTQueryWithOutputImpl<ASTShowAccessQueryNames>;
-
-}
+#pragma once
+
+#include <Parsers/ASTQueryWithOutput.h>
+
+
+namespace DB
+{
+
+struct ASTShowAccessQueryNames
+{
+ static constexpr auto ID = "ShowAccessQuery";
+ static constexpr auto Query = "SHOW ACCESS";
+};
+
+using ASTShowAccessQuery = ASTQueryWithOutputImpl<ASTShowAccessQueryNames>;
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp
index 4011cfc522..d89c246e62 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.cpp
@@ -1,37 +1,37 @@
-#include <Parsers/ASTShowGrantsQuery.h>
-#include <Parsers/ASTRolesOrUsersSet.h>
-#include <Common/quoteString.h>
+#include <Parsers/ASTShowGrantsQuery.h>
+#include <Parsers/ASTRolesOrUsersSet.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-String ASTShowGrantsQuery::getID(char) const
-{
- return "ShowGrantsQuery";
-}
-
-
-ASTPtr ASTShowGrantsQuery::clone() const
-{
- return std::make_shared<ASTShowGrantsQuery>(*this);
-}
-
-
-void ASTShowGrantsQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW GRANTS"
- << (settings.hilite ? hilite_none : "");
-
- if (for_roles->current_user && !for_roles->all && for_roles->names.empty() && for_roles->except_names.empty()
- && !for_roles->except_current_user)
- {
- }
- else
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR "
- << (settings.hilite ? hilite_none : "");
- for_roles->format(settings);
- }
-}
-}
+
+
+namespace DB
+{
+String ASTShowGrantsQuery::getID(char) const
+{
+ return "ShowGrantsQuery";
+}
+
+
+ASTPtr ASTShowGrantsQuery::clone() const
+{
+ return std::make_shared<ASTShowGrantsQuery>(*this);
+}
+
+
+void ASTShowGrantsQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW GRANTS"
+ << (settings.hilite ? hilite_none : "");
+
+ if (for_roles->current_user && !for_roles->all && for_roles->names.empty() && for_roles->except_names.empty()
+ && !for_roles->except_current_user)
+ {
+ }
+ else
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR "
+ << (settings.hilite ? hilite_none : "");
+ for_roles->format(settings);
+ }
+}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h
index 04764fe350..b125efe784 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowGrantsQuery.h
@@ -1,21 +1,21 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace DB
-{
-class ASTRolesOrUsersSet;
-
-/** SHOW GRANTS [FOR user_name]
- */
-class ASTShowGrantsQuery : public ASTQueryWithOutput
-{
-public:
- std::shared_ptr<ASTRolesOrUsersSet> for_roles;
-
- String getID(char) const override;
- ASTPtr clone() const override;
- void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-};
-}
+#pragma once
+
+#include <Parsers/ASTQueryWithOutput.h>
+
+
+namespace DB
+{
+class ASTRolesOrUsersSet;
+
+/** SHOW GRANTS [FOR user_name]
+ */
+class ASTShowGrantsQuery : public ASTQueryWithOutput
+{
+public:
+ std::shared_ptr<ASTRolesOrUsersSet> for_roles;
+
+ String getID(char) const override;
+ ASTPtr clone() const override;
+ void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h
index 8cc4ed16f9..4cbab20024 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowPrivilegesQuery.h
@@ -1,17 +1,17 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace DB
-{
-
-struct ASTShowPrivilegesIDAndQueryName
-{
- static constexpr auto ID = "ShowPrivilegesQuery";
- static constexpr auto Query = "SHOW PRIVILEGES";
-};
-
-using ASTShowPrivilegesQuery = ASTQueryWithOutputImpl<ASTShowPrivilegesIDAndQueryName>;
-
-}
+#pragma once
+
+#include <Parsers/ASTQueryWithOutput.h>
+
+
+namespace DB
+{
+
+struct ASTShowPrivilegesIDAndQueryName
+{
+ static constexpr auto ID = "ShowPrivilegesQuery";
+ static constexpr auto Query = "SHOW PRIVILEGES";
+};
+
+using ASTShowPrivilegesQuery = ASTQueryWithOutputImpl<ASTShowPrivilegesIDAndQueryName>;
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h
index 2bf67c1951..194d25c5c3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h
@@ -1,17 +1,17 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace DB
-{
-
-struct ASTShowProcesslistIDAndQueryNames
-{
- static constexpr auto ID = "ShowProcesslistQuery";
- static constexpr auto Query = "SHOW PROCESSLIST";
-};
-
-using ASTShowProcesslistQuery = ASTQueryWithOutputImpl<ASTShowProcesslistIDAndQueryNames>;
-
-}
+#pragma once
+
+#include <Parsers/ASTQueryWithOutput.h>
+
+
+namespace DB
+{
+
+struct ASTShowProcesslistIDAndQueryNames
+{
+ static constexpr auto ID = "ShowProcesslistQuery";
+ static constexpr auto Query = "SHOW PROCESSLIST";
+};
+
+using ASTShowProcesslistQuery = ASTQueryWithOutputImpl<ASTShowProcesslistIDAndQueryNames>;
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp
index 61d68c4a27..fc7ed6b374 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.cpp
@@ -1,19 +1,19 @@
-#include <iomanip>
-#include <Parsers/ASTShowTablesQuery.h>
-#include <Common/quoteString.h>
+#include <iomanip>
+#include <Parsers/ASTShowTablesQuery.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-namespace DB
-{
-
-ASTPtr ASTShowTablesQuery::clone() const
-{
- auto res = std::make_shared<ASTShowTablesQuery>(*this);
- res->children.clear();
- cloneOutputOptions(*res);
- return res;
-}
-
+
+namespace DB
+{
+
+ASTPtr ASTShowTablesQuery::clone() const
+{
+ auto res = std::make_shared<ASTShowTablesQuery>(*this);
+ res->children.clear();
+ cloneOutputOptions(*res);
+ return res;
+}
+
void ASTShowTablesQuery::formatLike(const FormatSettings & settings) const
{
if (!like.empty())
@@ -34,52 +34,52 @@ void ASTShowTablesQuery::formatLimit(const FormatSettings & settings, FormatStat
}
}
-void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
-{
- if (databases)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : "");
+void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+ if (databases)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : "");
formatLike(settings);
formatLimit(settings, state, frame);
- }
- else if (clusters)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTERS" << (settings.hilite ? hilite_none : "");
+ }
+ else if (clusters)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTERS" << (settings.hilite ? hilite_none : "");
formatLike(settings);
formatLimit(settings, state, frame);
-
- }
- else if (cluster)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTER" << (settings.hilite ? hilite_none : "");
- settings.ostr << " " << backQuoteIfNeed(cluster_str);
- }
+
+ }
+ else if (cluster)
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTER" << (settings.hilite ? hilite_none : "");
+ settings.ostr << " " << backQuoteIfNeed(cluster_str);
+ }
else if (m_settings)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (changed ? "CHANGED " : "") << "SETTINGS" <<
(settings.hilite ? hilite_none : "");
formatLike(settings);
}
- else
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") <<
- (dictionaries ? "DICTIONARIES" : "TABLES") << (settings.hilite ? hilite_none : "");
-
- if (!from.empty())
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "")
- << backQuoteIfNeed(from);
-
+ else
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") <<
+ (dictionaries ? "DICTIONARIES" : "TABLES") << (settings.hilite ? hilite_none : "");
+
+ if (!from.empty())
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "")
+ << backQuoteIfNeed(from);
+
formatLike(settings);
-
+
if (where_expression)
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
- where_expression->formatImpl(settings, state, frame);
- }
-
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
+ where_expression->formatImpl(settings, state, frame);
+ }
+
formatLimit(settings, state, frame);
- }
-}
-
-}
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h
index 57383dff66..f5cbfe77e7 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowTablesQuery.h
@@ -1,46 +1,46 @@
-#pragma once
-
-#include <iomanip>
-#include <Parsers/IAST.h>
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace DB
-{
-
-
-/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS
- */
-class ASTShowTablesQuery : public ASTQueryWithOutput
-{
-public:
- bool databases{false};
- bool clusters{false};
- bool cluster{false};
- bool dictionaries{false};
+#pragma once
+
+#include <iomanip>
+#include <Parsers/IAST.h>
+#include <Parsers/ASTQueryWithOutput.h>
+
+
+namespace DB
+{
+
+
+/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS
+ */
+class ASTShowTablesQuery : public ASTQueryWithOutput
+{
+public:
+ bool databases{false};
+ bool clusters{false};
+ bool cluster{false};
+ bool dictionaries{false};
bool m_settings{false};
bool changed{false};
- bool temporary{false};
-
- String cluster_str;
- String from;
- String like;
-
- bool not_like{false};
- bool case_insensitive_like{false};
-
- ASTPtr where_expression;
- ASTPtr limit_length;
-
- /** Get the text that identifies this element. */
- String getID(char) const override { return "ShowTables"; }
-
- ASTPtr clone() const override;
-
-protected:
+ bool temporary{false};
+
+ String cluster_str;
+ String from;
+ String like;
+
+ bool not_like{false};
+ bool case_insensitive_like{false};
+
+ ASTPtr where_expression;
+ ASTPtr limit_length;
+
+ /** Get the text that identifies this element. */
+ String getID(char) const override { return "ShowTables"; }
+
+ ASTPtr clone() const override;
+
+protected:
void formatLike(const FormatSettings & settings) const;
void formatLimit(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const;
- void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-};
-
-}
+ void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp
index 5d01e124b0..3f140f4da4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.cpp
@@ -1,149 +1,149 @@
-#include <Parsers/IAST.h>
-#include <Parsers/ASTSystemQuery.h>
-#include <Common/quoteString.h>
+#include <Parsers/IAST.h>
+#include <Parsers/ASTSystemQuery.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-
-const char * ASTSystemQuery::typeToString(Type type)
-{
- switch (type)
- {
- case Type::SHUTDOWN:
- return "SHUTDOWN";
- case Type::KILL:
- return "KILL";
+
+
+namespace DB
+{
+
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+
+const char * ASTSystemQuery::typeToString(Type type)
+{
+ switch (type)
+ {
+ case Type::SHUTDOWN:
+ return "SHUTDOWN";
+ case Type::KILL:
+ return "KILL";
case Type::SUSPEND:
return "SUSPEND";
- case Type::DROP_DNS_CACHE:
- return "DROP DNS CACHE";
- case Type::DROP_MARK_CACHE:
- return "DROP MARK CACHE";
- case Type::DROP_UNCOMPRESSED_CACHE:
- return "DROP UNCOMPRESSED CACHE";
+ case Type::DROP_DNS_CACHE:
+ return "DROP DNS CACHE";
+ case Type::DROP_MARK_CACHE:
+ return "DROP MARK CACHE";
+ case Type::DROP_UNCOMPRESSED_CACHE:
+ return "DROP UNCOMPRESSED CACHE";
case Type::DROP_MMAP_CACHE:
return "DROP MMAP CACHE";
-#if USE_EMBEDDED_COMPILER
- case Type::DROP_COMPILED_EXPRESSION_CACHE:
- return "DROP COMPILED EXPRESSION CACHE";
-#endif
- case Type::STOP_LISTEN_QUERIES:
- return "STOP LISTEN QUERIES";
- case Type::START_LISTEN_QUERIES:
- return "START LISTEN QUERIES";
- case Type::RESTART_REPLICAS:
- return "RESTART REPLICAS";
- case Type::RESTART_REPLICA:
- return "RESTART REPLICA";
+#if USE_EMBEDDED_COMPILER
+ case Type::DROP_COMPILED_EXPRESSION_CACHE:
+ return "DROP COMPILED EXPRESSION CACHE";
+#endif
+ case Type::STOP_LISTEN_QUERIES:
+ return "STOP LISTEN QUERIES";
+ case Type::START_LISTEN_QUERIES:
+ return "START LISTEN QUERIES";
+ case Type::RESTART_REPLICAS:
+ return "RESTART REPLICAS";
+ case Type::RESTART_REPLICA:
+ return "RESTART REPLICA";
case Type::RESTORE_REPLICA:
return "RESTORE REPLICA";
- case Type::DROP_REPLICA:
- return "DROP REPLICA";
- case Type::SYNC_REPLICA:
- return "SYNC REPLICA";
- case Type::FLUSH_DISTRIBUTED:
- return "FLUSH DISTRIBUTED";
- case Type::RELOAD_DICTIONARY:
- return "RELOAD DICTIONARY";
- case Type::RELOAD_DICTIONARIES:
- return "RELOAD DICTIONARIES";
+ case Type::DROP_REPLICA:
+ return "DROP REPLICA";
+ case Type::SYNC_REPLICA:
+ return "SYNC REPLICA";
+ case Type::FLUSH_DISTRIBUTED:
+ return "FLUSH DISTRIBUTED";
+ case Type::RELOAD_DICTIONARY:
+ return "RELOAD DICTIONARY";
+ case Type::RELOAD_DICTIONARIES:
+ return "RELOAD DICTIONARIES";
case Type::RELOAD_MODEL:
return "RELOAD MODEL";
case Type::RELOAD_MODELS:
return "RELOAD MODELS";
- case Type::RELOAD_EMBEDDED_DICTIONARIES:
- return "RELOAD EMBEDDED DICTIONARIES";
- case Type::RELOAD_CONFIG:
- return "RELOAD CONFIG";
+ case Type::RELOAD_EMBEDDED_DICTIONARIES:
+ return "RELOAD EMBEDDED DICTIONARIES";
+ case Type::RELOAD_CONFIG:
+ return "RELOAD CONFIG";
case Type::RELOAD_SYMBOLS:
return "RELOAD SYMBOLS";
- case Type::STOP_MERGES:
- return "STOP MERGES";
- case Type::START_MERGES:
- return "START MERGES";
- case Type::STOP_TTL_MERGES:
- return "STOP TTL MERGES";
- case Type::START_TTL_MERGES:
- return "START TTL MERGES";
- case Type::STOP_MOVES:
- return "STOP MOVES";
- case Type::START_MOVES:
- return "START MOVES";
- case Type::STOP_FETCHES:
- return "STOP FETCHES";
- case Type::START_FETCHES:
- return "START FETCHES";
- case Type::STOP_REPLICATED_SENDS:
- return "STOP REPLICATED SENDS";
- case Type::START_REPLICATED_SENDS:
- return "START REPLICATED SENDS";
- case Type::STOP_REPLICATION_QUEUES:
- return "STOP REPLICATION QUEUES";
- case Type::START_REPLICATION_QUEUES:
- return "START REPLICATION QUEUES";
- case Type::STOP_DISTRIBUTED_SENDS:
- return "STOP DISTRIBUTED SENDS";
- case Type::START_DISTRIBUTED_SENDS:
- return "START DISTRIBUTED SENDS";
- case Type::FLUSH_LOGS:
- return "FLUSH LOGS";
+ case Type::STOP_MERGES:
+ return "STOP MERGES";
+ case Type::START_MERGES:
+ return "START MERGES";
+ case Type::STOP_TTL_MERGES:
+ return "STOP TTL MERGES";
+ case Type::START_TTL_MERGES:
+ return "START TTL MERGES";
+ case Type::STOP_MOVES:
+ return "STOP MOVES";
+ case Type::START_MOVES:
+ return "START MOVES";
+ case Type::STOP_FETCHES:
+ return "STOP FETCHES";
+ case Type::START_FETCHES:
+ return "START FETCHES";
+ case Type::STOP_REPLICATED_SENDS:
+ return "STOP REPLICATED SENDS";
+ case Type::START_REPLICATED_SENDS:
+ return "START REPLICATED SENDS";
+ case Type::STOP_REPLICATION_QUEUES:
+ return "STOP REPLICATION QUEUES";
+ case Type::START_REPLICATION_QUEUES:
+ return "START REPLICATION QUEUES";
+ case Type::STOP_DISTRIBUTED_SENDS:
+ return "STOP DISTRIBUTED SENDS";
+ case Type::START_DISTRIBUTED_SENDS:
+ return "START DISTRIBUTED SENDS";
+ case Type::FLUSH_LOGS:
+ return "FLUSH LOGS";
case Type::RESTART_DISK:
return "RESTART DISK";
- default:
- throw Exception("Unknown SYSTEM query command", ErrorCodes::LOGICAL_ERROR);
- }
-}
-
-
-void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM ";
- settings.ostr << typeToString(type) << (settings.hilite ? hilite_none : "");
-
- auto print_database_table = [&]
- {
- settings.ostr << " ";
- if (!database.empty())
- {
- settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database)
- << (settings.hilite ? hilite_none : "") << ".";
- }
- settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(table)
- << (settings.hilite ? hilite_none : "");
- };
-
+ default:
+ throw Exception("Unknown SYSTEM query command", ErrorCodes::LOGICAL_ERROR);
+ }
+}
+
+
+void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM ";
+ settings.ostr << typeToString(type) << (settings.hilite ? hilite_none : "");
+
+ auto print_database_table = [&]
+ {
+ settings.ostr << " ";
+ if (!database.empty())
+ {
+ settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database)
+ << (settings.hilite ? hilite_none : "") << ".";
+ }
+ settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(table)
+ << (settings.hilite ? hilite_none : "");
+ };
+
auto print_drop_replica = [&]
{
- settings.ostr << " " << quoteString(replica);
- if (!table.empty())
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM TABLE"
- << (settings.hilite ? hilite_none : "");
- print_database_table();
- }
- else if (!replica_zk_path.empty())
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM ZKPATH "
- << (settings.hilite ? hilite_none : "") << quoteString(replica_zk_path);
- }
- else if (!database.empty())
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM DATABASE "
- << (settings.hilite ? hilite_none : "");
- settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database)
- << (settings.hilite ? hilite_none : "");
- }
- };
-
+ settings.ostr << " " << quoteString(replica);
+ if (!table.empty())
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM TABLE"
+ << (settings.hilite ? hilite_none : "");
+ print_database_table();
+ }
+ else if (!replica_zk_path.empty())
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM ZKPATH "
+ << (settings.hilite ? hilite_none : "") << quoteString(replica_zk_path);
+ }
+ else if (!database.empty())
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM DATABASE "
+ << (settings.hilite ? hilite_none : "");
+ settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database)
+ << (settings.hilite ? hilite_none : "");
+ }
+ };
+
auto print_on_volume = [&]
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON VOLUME "
@@ -154,40 +154,40 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
<< (settings.hilite ? hilite_none : "");
};
- if (!cluster.empty())
- formatOnCluster(settings);
-
- if ( type == Type::STOP_MERGES
- || type == Type::START_MERGES
- || type == Type::STOP_TTL_MERGES
- || type == Type::START_TTL_MERGES
- || type == Type::STOP_MOVES
- || type == Type::START_MOVES
- || type == Type::STOP_FETCHES
- || type == Type::START_FETCHES
- || type == Type::STOP_REPLICATED_SENDS
- || type == Type::START_REPLICATED_SENDS
- || type == Type::STOP_REPLICATION_QUEUES
- || type == Type::START_REPLICATION_QUEUES
- || type == Type::STOP_DISTRIBUTED_SENDS
- || type == Type::START_DISTRIBUTED_SENDS)
- {
- if (!table.empty())
- print_database_table();
+ if (!cluster.empty())
+ formatOnCluster(settings);
+
+ if ( type == Type::STOP_MERGES
+ || type == Type::START_MERGES
+ || type == Type::STOP_TTL_MERGES
+ || type == Type::START_TTL_MERGES
+ || type == Type::STOP_MOVES
+ || type == Type::START_MOVES
+ || type == Type::STOP_FETCHES
+ || type == Type::START_FETCHES
+ || type == Type::STOP_REPLICATED_SENDS
+ || type == Type::START_REPLICATED_SENDS
+ || type == Type::STOP_REPLICATION_QUEUES
+ || type == Type::START_REPLICATION_QUEUES
+ || type == Type::STOP_DISTRIBUTED_SENDS
+ || type == Type::START_DISTRIBUTED_SENDS)
+ {
+ if (!table.empty())
+ print_database_table();
else if (!volume.empty())
print_on_volume();
- }
+ }
else if ( type == Type::RESTART_REPLICA
|| type == Type::RESTORE_REPLICA
|| type == Type::SYNC_REPLICA
|| type == Type::FLUSH_DISTRIBUTED
|| type == Type::RELOAD_DICTIONARY)
+ {
+ print_database_table();
+ }
+ else if (type == Type::DROP_REPLICA)
{
- print_database_table();
- }
- else if (type == Type::DROP_REPLICA)
- {
- print_drop_replica();
+ print_drop_replica();
}
else if (type == Type::SUSPEND)
{
@@ -196,7 +196,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
<< (settings.hilite ? hilite_keyword : "") << " SECOND"
<< (settings.hilite ? hilite_none : "");
}
-}
-
-
-}
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h
index 8f9144d438..df9684cfc7 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTSystemQuery.h
@@ -1,92 +1,92 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOnCluster.h>
-#include <Parsers/IAST.h>
-
-namespace DB
-{
-
-class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster
-{
-public:
-
- enum class Type
- {
- UNKNOWN,
- SHUTDOWN,
- KILL,
+#pragma once
+
+#include <Parsers/ASTQueryWithOnCluster.h>
+#include <Parsers/IAST.h>
+
+namespace DB
+{
+
+class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster
+{
+public:
+
+ enum class Type
+ {
+ UNKNOWN,
+ SHUTDOWN,
+ KILL,
SUSPEND,
- DROP_DNS_CACHE,
- DROP_MARK_CACHE,
- DROP_UNCOMPRESSED_CACHE,
+ DROP_DNS_CACHE,
+ DROP_MARK_CACHE,
+ DROP_UNCOMPRESSED_CACHE,
DROP_MMAP_CACHE,
-#if USE_EMBEDDED_COMPILER
- DROP_COMPILED_EXPRESSION_CACHE,
-#endif
- STOP_LISTEN_QUERIES,
- START_LISTEN_QUERIES,
- RESTART_REPLICAS,
- RESTART_REPLICA,
+#if USE_EMBEDDED_COMPILER
+ DROP_COMPILED_EXPRESSION_CACHE,
+#endif
+ STOP_LISTEN_QUERIES,
+ START_LISTEN_QUERIES,
+ RESTART_REPLICAS,
+ RESTART_REPLICA,
RESTORE_REPLICA,
- DROP_REPLICA,
- SYNC_REPLICA,
- RELOAD_DICTIONARY,
- RELOAD_DICTIONARIES,
+ DROP_REPLICA,
+ SYNC_REPLICA,
+ RELOAD_DICTIONARY,
+ RELOAD_DICTIONARIES,
RELOAD_MODEL,
RELOAD_MODELS,
- RELOAD_EMBEDDED_DICTIONARIES,
- RELOAD_CONFIG,
+ RELOAD_EMBEDDED_DICTIONARIES,
+ RELOAD_CONFIG,
RELOAD_SYMBOLS,
RESTART_DISK,
- STOP_MERGES,
- START_MERGES,
- STOP_TTL_MERGES,
- START_TTL_MERGES,
- STOP_FETCHES,
- START_FETCHES,
- STOP_MOVES,
- START_MOVES,
- STOP_REPLICATED_SENDS,
- START_REPLICATED_SENDS,
- STOP_REPLICATION_QUEUES,
- START_REPLICATION_QUEUES,
- FLUSH_LOGS,
- FLUSH_DISTRIBUTED,
- STOP_DISTRIBUTED_SENDS,
- START_DISTRIBUTED_SENDS,
- END
- };
-
- static const char * typeToString(Type type);
-
- Type type = Type::UNKNOWN;
-
+ STOP_MERGES,
+ START_MERGES,
+ STOP_TTL_MERGES,
+ START_TTL_MERGES,
+ STOP_FETCHES,
+ START_FETCHES,
+ STOP_MOVES,
+ START_MOVES,
+ STOP_REPLICATED_SENDS,
+ START_REPLICATED_SENDS,
+ STOP_REPLICATION_QUEUES,
+ START_REPLICATION_QUEUES,
+ FLUSH_LOGS,
+ FLUSH_DISTRIBUTED,
+ STOP_DISTRIBUTED_SENDS,
+ START_DISTRIBUTED_SENDS,
+ END
+ };
+
+ static const char * typeToString(Type type);
+
+ Type type = Type::UNKNOWN;
+
String target_model;
- String database;
- String table;
- String replica;
- String replica_zk_path;
+ String database;
+ String table;
+ String replica;
+ String replica_zk_path;
bool is_drop_whole_replica{};
String storage_policy;
String volume;
String disk;
UInt64 seconds{};
-
- String getID(char) const override { return "SYSTEM query"; }
-
- ASTPtr clone() const override { return std::make_shared<ASTSystemQuery>(*this); }
-
- ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override
- {
- return removeOnCluster<ASTSystemQuery>(clone(), new_database);
- }
-
+
+ String getID(char) const override { return "SYSTEM query"; }
+
+ ASTPtr clone() const override { return std::make_shared<ASTSystemQuery>(*this); }
+
+ ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override
+ {
+ return removeOnCluster<ASTSystemQuery>(clone(), new_database);
+ }
+
const char * getQueryKindString() const override { return "System"; }
-protected:
-
- void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-};
-
-
-}
+protected:
+
+ void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+};
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h
index 4e4a13c2a7..aa181ee28e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUseQuery.h
@@ -1,32 +1,32 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-#include <Common/quoteString.h>
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-
-
-/** USE query
- */
-class ASTUseQuery : public IAST
-{
-public:
- String database;
-
- /** Get the text that identifies this element. */
- String getID(char delim) const override { return "UseQuery" + (delim + database); }
-
- ASTPtr clone() const override { return std::make_shared<ASTUseQuery>(*this); }
-
-protected:
- void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database);
- return;
- }
-};
-
-}
+
+
+namespace DB
+{
+
+
+/** USE query
+ */
+class ASTUseQuery : public IAST
+{
+public:
+ String database;
+
+ /** Get the text that identifies this element. */
+ String getID(char delim) const override { return "UseQuery" + (delim + database); }
+
+ ASTPtr clone() const override { return std::make_shared<ASTUseQuery>(*this); }
+
+protected:
+ void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database);
+ return;
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp
index b99ea5ab8d..9d81727138 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.cpp
@@ -1,75 +1,75 @@
-#include <Parsers/ASTUserNameWithHost.h>
-#include <Common/quoteString.h>
+#include <Parsers/ASTUserNameWithHost.h>
+#include <Common/quoteString.h>
#include <IO/Operators.h>
-
-
-namespace DB
-{
-
-void ASTUserNameWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- settings.ostr << backQuoteIfNeed(base_name);
-
- if (!host_pattern.empty())
- settings.ostr << "@" << backQuoteIfNeed(host_pattern);
-}
-
-String ASTUserNameWithHost::toString() const
-{
- String res = base_name;
- if (!host_pattern.empty())
- res += '@' + host_pattern;
- return res;
-}
-
-void ASTUserNameWithHost::concatParts()
-{
- base_name = toString();
- host_pattern.clear();
-}
-
-
-void ASTUserNamesWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
-{
- assert(!names.empty());
- bool need_comma = false;
- for (const auto & name : names)
- {
- if (std::exchange(need_comma, true))
- settings.ostr << ", ";
- name->format(settings);
- }
-}
-
-Strings ASTUserNamesWithHost::toStrings() const
-{
- Strings res;
- res.reserve(names.size());
- for (const auto & name : names)
- res.emplace_back(name->toString());
- return res;
-}
-
-void ASTUserNamesWithHost::concatParts()
-{
- for (auto & name : names)
- name->concatParts();
-}
-
-
-bool ASTUserNamesWithHost::getHostPatternIfCommon(String & out_common_host_pattern) const
-{
- out_common_host_pattern.clear();
-
- if (names.empty())
- return true;
-
- for (size_t i = 1; i != names.size(); ++i)
- if (names[i]->host_pattern != names[0]->host_pattern)
- return false;
-
- out_common_host_pattern = names[0]->host_pattern;
- return true;
-}
-
-}
+
+
+namespace DB
+{
+
+void ASTUserNameWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ settings.ostr << backQuoteIfNeed(base_name);
+
+ if (!host_pattern.empty())
+ settings.ostr << "@" << backQuoteIfNeed(host_pattern);
+}
+
+String ASTUserNameWithHost::toString() const
+{
+ String res = base_name;
+ if (!host_pattern.empty())
+ res += '@' + host_pattern;
+ return res;
+}
+
+void ASTUserNameWithHost::concatParts()
+{
+ base_name = toString();
+ host_pattern.clear();
+}
+
+
+void ASTUserNamesWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+ assert(!names.empty());
+ bool need_comma = false;
+ for (const auto & name : names)
+ {
+ if (std::exchange(need_comma, true))
+ settings.ostr << ", ";
+ name->format(settings);
+ }
+}
+
+Strings ASTUserNamesWithHost::toStrings() const
+{
+ Strings res;
+ res.reserve(names.size());
+ for (const auto & name : names)
+ res.emplace_back(name->toString());
+ return res;
+}
+
+void ASTUserNamesWithHost::concatParts()
+{
+ for (auto & name : names)
+ name->concatParts();
+}
+
+
+bool ASTUserNamesWithHost::getHostPatternIfCommon(String & out_common_host_pattern) const
+{
+ out_common_host_pattern.clear();
+
+ if (names.empty())
+ return true;
+
+ for (size_t i = 1; i != names.size(); ++i)
+ if (names[i]->host_pattern != names[0]->host_pattern)
+ return false;
+
+ out_common_host_pattern = names[0]->host_pattern;
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h
index 00b1570e06..6d768629e3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTUserNameWithHost.h
@@ -1,53 +1,53 @@
-#pragma once
-
-#include <Parsers/IParser.h>
-
-
-namespace DB
-{
-
-/** Represents a user name.
- * It can be a simple string or identifier or something like `name@host`.
- * In the last case `host` specifies the hosts user is allowed to connect from.
- * The `host` can be an ip address, ip subnet, or a host name.
- * The % and _ wildcard characters are permitted in `host`.
- * These have the same meaning as for pattern-matching operations performed with the LIKE operator.
- */
-class ASTUserNameWithHost : public IAST
-{
-public:
- String base_name;
- String host_pattern;
-
- String toString() const;
- void concatParts();
-
- ASTUserNameWithHost() = default;
- ASTUserNameWithHost(const String & name_) : base_name(name_) {}
- String getID(char) const override { return "UserNameWithHost"; }
- ASTPtr clone() const override { return std::make_shared<ASTUserNameWithHost>(*this); }
- void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-};
-
-
-class ASTUserNamesWithHost : public IAST
-{
-public:
- std::vector<std::shared_ptr<ASTUserNameWithHost>> names;
-
- size_t size() const { return names.size(); }
- auto begin() const { return names.begin(); }
- auto end() const { return names.end(); }
- auto front() const { return *begin(); }
- void push_back(const String & name_) { names.push_back(std::make_shared<ASTUserNameWithHost>(name_)); }
-
- Strings toStrings() const;
- void concatParts();
- bool getHostPatternIfCommon(String & out_common_host_pattern) const;
-
- String getID(char) const override { return "UserNamesWithHost"; }
- ASTPtr clone() const override { return std::make_shared<ASTUserNamesWithHost>(*this); }
- void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParser.h>
+
+
+namespace DB
+{
+
+/** Represents a user name.
+ * It can be a simple string or identifier or something like `name@host`.
+ * In the last case `host` specifies the hosts user is allowed to connect from.
+ * The `host` can be an ip address, ip subnet, or a host name.
+ * The % and _ wildcard characters are permitted in `host`.
+ * These have the same meaning as for pattern-matching operations performed with the LIKE operator.
+ */
+class ASTUserNameWithHost : public IAST
+{
+public:
+ String base_name;
+ String host_pattern;
+
+ String toString() const;
+ void concatParts();
+
+ ASTUserNameWithHost() = default;
+ ASTUserNameWithHost(const String & name_) : base_name(name_) {}
+ String getID(char) const override { return "UserNameWithHost"; }
+ ASTPtr clone() const override { return std::make_shared<ASTUserNameWithHost>(*this); }
+ void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+
+
+class ASTUserNamesWithHost : public IAST
+{
+public:
+ std::vector<std::shared_ptr<ASTUserNameWithHost>> names;
+
+ size_t size() const { return names.size(); }
+ auto begin() const { return names.begin(); }
+ auto end() const { return names.end(); }
+ auto front() const { return *begin(); }
+ void push_back(const String & name_) { names.push_back(std::make_shared<ASTUserNameWithHost>(name_)); }
+
+ Strings toStrings() const;
+ void concatParts();
+ bool getHostPatternIfCommon(String & out_common_host_pattern) const;
+
+ String getID(char) const override { return "UserNamesWithHost"; }
+ ASTPtr clone() const override { return std::make_shared<ASTUserNamesWithHost>(*this); }
+ void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h
index 9285742e3c..2983198e19 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTWatchQuery.h
@@ -1,48 +1,48 @@
#pragma once
-
-#include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Common/quoteString.h>
-
-namespace DB
-{
-
-class ASTWatchQuery : public ASTQueryWithTableAndOutput
-{
-
-public:
- ASTPtr limit_length;
- bool is_watch_events;
-
- ASTWatchQuery() = default;
- String getID(char) const override { return "WatchQuery_" + database + "_" + table; }
-
- ASTPtr clone() const override
- {
- std::shared_ptr<ASTWatchQuery> res = std::make_shared<ASTWatchQuery>(*this);
- res->children.clear();
- cloneOutputOptions(*res);
- return res;
- }
-
-protected:
- void formatQueryImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override
- {
- std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' ');
-
+
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Common/quoteString.h>
+
+namespace DB
+{
+
+class ASTWatchQuery : public ASTQueryWithTableAndOutput
+{
+
+public:
+ ASTPtr limit_length;
+ bool is_watch_events;
+
+ ASTWatchQuery() = default;
+ String getID(char) const override { return "WatchQuery_" + database + "_" + table; }
+
+ ASTPtr clone() const override
+ {
+ std::shared_ptr<ASTWatchQuery> res = std::make_shared<ASTWatchQuery>(*this);
+ res->children.clear();
+ cloneOutputOptions(*res);
+ return res;
+ }
+
+protected:
+ void formatQueryImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override
+ {
+ std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' ');
+
s.ostr << (s.hilite ? hilite_keyword : "") << "WATCH " << (s.hilite ? hilite_none : "")
- << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
-
- if (is_watch_events)
- {
- s.ostr << " " << (s.hilite ? hilite_keyword : "") << "EVENTS" << (s.hilite ? hilite_none : "");
- }
-
- if (limit_length)
- {
- s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : "");
- limit_length->formatImpl(s, state, frame);
- }
- }
-};
-
-}
+ << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
+
+ if (is_watch_events)
+ {
+ s.ostr << " " << (s.hilite ? hilite_keyword : "") << "EVENTS" << (s.hilite ? hilite_none : "");
+ }
+
+ if (limit_length)
+ {
+ s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : "");
+ limit_length->formatImpl(s, state, frame);
+ }
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp
index 24390773d1..66e39deac6 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/Lexer.cpp
@@ -1,156 +1,156 @@
-#include <Parsers/Lexer.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <common/find_symbols.h>
-
-namespace DB
-{
-
-namespace
-{
-
-/// This must be consistent with functions in ReadHelpers.h
-template <char quote, TokenType success_token, TokenType error_token>
-Token quotedString(const char *& pos, const char * const token_begin, const char * const end)
-{
- ++pos;
- while (true)
- {
- pos = find_first_symbols<quote, '\\'>(pos, end);
- if (pos >= end)
- return Token(error_token, token_begin, end);
-
- if (*pos == quote)
- {
- ++pos;
- if (pos < end && *pos == quote)
- {
- ++pos;
- continue;
- }
- return Token(success_token, token_begin, pos);
- }
-
- if (*pos == '\\')
- {
- ++pos;
- if (pos >= end)
- return Token(error_token, token_begin, end);
- ++pos;
- continue;
- }
-
- __builtin_unreachable();
- }
-}
-
-}
-
-
-Token Lexer::nextToken()
-{
- Token res = nextTokenImpl();
+#include <Parsers/Lexer.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <common/find_symbols.h>
+
+namespace DB
+{
+
+namespace
+{
+
+/// This must be consistent with functions in ReadHelpers.h
+template <char quote, TokenType success_token, TokenType error_token>
+Token quotedString(const char *& pos, const char * const token_begin, const char * const end)
+{
+ ++pos;
+ while (true)
+ {
+ pos = find_first_symbols<quote, '\\'>(pos, end);
+ if (pos >= end)
+ return Token(error_token, token_begin, end);
+
+ if (*pos == quote)
+ {
+ ++pos;
+ if (pos < end && *pos == quote)
+ {
+ ++pos;
+ continue;
+ }
+ return Token(success_token, token_begin, pos);
+ }
+
+ if (*pos == '\\')
+ {
+ ++pos;
+ if (pos >= end)
+ return Token(error_token, token_begin, end);
+ ++pos;
+ continue;
+ }
+
+ __builtin_unreachable();
+ }
+}
+
+}
+
+
+Token Lexer::nextToken()
+{
+ Token res = nextTokenImpl();
if (max_query_size && res.end > begin + max_query_size)
- res.type = TokenType::ErrorMaxQuerySizeExceeded;
- if (res.isSignificant())
- prev_significant_token_type = res.type;
- return res;
-}
-
-
-Token Lexer::nextTokenImpl()
-{
- if (pos >= end)
- return Token(TokenType::EndOfStream, end, end);
-
- const char * const token_begin = pos;
-
- auto comment_until_end_of_line = [&]() mutable
- {
- pos = find_first_symbols<'\n'>(pos, end); /// This means that newline in single-line comment cannot be escaped.
- return Token(TokenType::Comment, token_begin, pos);
- };
-
- switch (*pos)
- {
- case ' ': [[fallthrough]];
- case '\t': [[fallthrough]];
- case '\n': [[fallthrough]];
- case '\r': [[fallthrough]];
- case '\f': [[fallthrough]];
- case '\v':
- {
- ++pos;
- while (pos < end && isWhitespaceASCII(*pos))
- ++pos;
- return Token(TokenType::Whitespace, token_begin, pos);
- }
-
- case '0': [[fallthrough]];
- case '1': [[fallthrough]];
- case '2': [[fallthrough]];
- case '3': [[fallthrough]];
- case '4': [[fallthrough]];
- case '5': [[fallthrough]];
- case '6': [[fallthrough]];
- case '7': [[fallthrough]];
- case '8': [[fallthrough]];
- case '9':
- {
- /// The task is not to parse a number or check correctness, but only to skip it.
-
- /// Disambiguation: if previous token was dot, then we could parse only simple integer,
- /// for chained tuple access operators (x.1.1) to work.
- // Otherwise it will be tokenized as x . 1.1, not as x . 1 . 1
- if (prev_significant_token_type == TokenType::Dot)
- {
- ++pos;
- while (pos < end && isNumericASCII(*pos))
- ++pos;
- }
- else
- {
- /// 0x, 0b
- bool hex = false;
- if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B'))
- {
- if (pos[1] == 'x' || pos[1] == 'X')
- hex = true;
- pos += 2;
- }
- else
- ++pos;
-
- while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
- ++pos;
-
- /// decimal point
- if (pos < end && *pos == '.')
- {
- ++pos;
- while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
- ++pos;
- }
-
- /// exponentiation (base 10 or base 2)
- if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E')))
- {
- ++pos;
-
- /// sign of exponent. It is always decimal.
- if (pos + 1 < end && (*pos == '-' || *pos == '+'))
- ++pos;
-
- while (pos < end && isNumericASCII(*pos))
- ++pos;
- }
- }
-
+ res.type = TokenType::ErrorMaxQuerySizeExceeded;
+ if (res.isSignificant())
+ prev_significant_token_type = res.type;
+ return res;
+}
+
+
+Token Lexer::nextTokenImpl()
+{
+ if (pos >= end)
+ return Token(TokenType::EndOfStream, end, end);
+
+ const char * const token_begin = pos;
+
+ auto comment_until_end_of_line = [&]() mutable
+ {
+ pos = find_first_symbols<'\n'>(pos, end); /// This means that newline in single-line comment cannot be escaped.
+ return Token(TokenType::Comment, token_begin, pos);
+ };
+
+ switch (*pos)
+ {
+ case ' ': [[fallthrough]];
+ case '\t': [[fallthrough]];
+ case '\n': [[fallthrough]];
+ case '\r': [[fallthrough]];
+ case '\f': [[fallthrough]];
+ case '\v':
+ {
+ ++pos;
+ while (pos < end && isWhitespaceASCII(*pos))
+ ++pos;
+ return Token(TokenType::Whitespace, token_begin, pos);
+ }
+
+ case '0': [[fallthrough]];
+ case '1': [[fallthrough]];
+ case '2': [[fallthrough]];
+ case '3': [[fallthrough]];
+ case '4': [[fallthrough]];
+ case '5': [[fallthrough]];
+ case '6': [[fallthrough]];
+ case '7': [[fallthrough]];
+ case '8': [[fallthrough]];
+ case '9':
+ {
+ /// The task is not to parse a number or check correctness, but only to skip it.
+
+ /// Disambiguation: if previous token was dot, then we could parse only simple integer,
+ /// for chained tuple access operators (x.1.1) to work.
+ // Otherwise it will be tokenized as x . 1.1, not as x . 1 . 1
+ if (prev_significant_token_type == TokenType::Dot)
+ {
+ ++pos;
+ while (pos < end && isNumericASCII(*pos))
+ ++pos;
+ }
+ else
+ {
+ /// 0x, 0b
+ bool hex = false;
+ if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B'))
+ {
+ if (pos[1] == 'x' || pos[1] == 'X')
+ hex = true;
+ pos += 2;
+ }
+ else
+ ++pos;
+
+ while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
+ ++pos;
+
+ /// decimal point
+ if (pos < end && *pos == '.')
+ {
+ ++pos;
+ while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
+ ++pos;
+ }
+
+ /// exponentiation (base 10 or base 2)
+ if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E')))
+ {
+ ++pos;
+
+ /// sign of exponent. It is always decimal.
+ if (pos + 1 < end && (*pos == '-' || *pos == '+'))
+ ++pos;
+
+ while (pos < end && isNumericASCII(*pos))
+ ++pos;
+ }
+ }
+
/// Try to parse it to a identifier(1identifier_name), otherwise it return ErrorWrongNumber
- if (pos < end && isWordCharASCII(*pos))
- {
- ++pos;
- while (pos < end && isWordCharASCII(*pos))
- ++pos;
+ if (pos < end && isWordCharASCII(*pos))
+ {
+ ++pos;
+ while (pos < end && isWordCharASCII(*pos))
+ ++pos;
for (const char * iterator = token_begin; iterator < pos; ++iterator)
{
@@ -159,110 +159,110 @@ Token Lexer::nextTokenImpl()
}
return Token(TokenType::BareWord, token_begin, pos);
- }
-
- return Token(TokenType::Number, token_begin, pos);
- }
-
- case '\'':
- return quotedString<'\'', TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed>(pos, token_begin, end);
- case '"':
- return quotedString<'"', TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed>(pos, token_begin, end);
- case '`':
- return quotedString<'`', TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed>(pos, token_begin, end);
-
- case '(':
- return Token(TokenType::OpeningRoundBracket, token_begin, ++pos);
- case ')':
- return Token(TokenType::ClosingRoundBracket, token_begin, ++pos);
- case '[':
- return Token(TokenType::OpeningSquareBracket, token_begin, ++pos);
- case ']':
- return Token(TokenType::ClosingSquareBracket, token_begin, ++pos);
- case '{':
- return Token(TokenType::OpeningCurlyBrace, token_begin, ++pos);
- case '}':
- return Token(TokenType::ClosingCurlyBrace, token_begin, ++pos);
- case ',':
- return Token(TokenType::Comma, token_begin, ++pos);
- case ';':
- return Token(TokenType::Semicolon, token_begin, ++pos);
-
- case '.': /// qualifier, tuple access operator or start of floating point number
- {
- /// Just after identifier or complex expression or number (for chained tuple access like x.1.1 to work properly).
- if (pos > begin
- && (!(pos + 1 < end && isNumericASCII(pos[1]))
- || prev_significant_token_type == TokenType::ClosingRoundBracket
- || prev_significant_token_type == TokenType::ClosingSquareBracket
- || prev_significant_token_type == TokenType::BareWord
- || prev_significant_token_type == TokenType::QuotedIdentifier
- || prev_significant_token_type == TokenType::Number))
- return Token(TokenType::Dot, token_begin, ++pos);
-
- ++pos;
- while (pos < end && isNumericASCII(*pos))
- ++pos;
-
- /// exponentiation
- if (pos + 1 < end && (*pos == 'e' || *pos == 'E'))
- {
- ++pos;
-
- /// sign of exponent
- if (pos + 1 < end && (*pos == '-' || *pos == '+'))
- ++pos;
-
- while (pos < end && isNumericASCII(*pos))
- ++pos;
- }
-
- return Token(TokenType::Number, token_begin, pos);
- }
-
- case '+':
- return Token(TokenType::Plus, token_begin, ++pos);
- case '-': /// minus (-), arrow (->) or start of comment (--)
- {
- ++pos;
- if (pos < end && *pos == '>')
- return Token(TokenType::Arrow, token_begin, ++pos);
-
- if (pos < end && *pos == '-')
- {
- ++pos;
- return comment_until_end_of_line();
- }
-
- return Token(TokenType::Minus, token_begin, pos);
- }
- case '*':
- ++pos;
- return Token(TokenType::Asterisk, token_begin, pos);
- case '/': /// division (/) or start of comment (//, /*)
- {
- ++pos;
- if (pos < end && (*pos == '/' || *pos == '*'))
- {
- if (*pos == '/')
- {
- ++pos;
- return comment_until_end_of_line();
- }
- else
- {
- ++pos;
+ }
+
+ return Token(TokenType::Number, token_begin, pos);
+ }
+
+ case '\'':
+ return quotedString<'\'', TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed>(pos, token_begin, end);
+ case '"':
+ return quotedString<'"', TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed>(pos, token_begin, end);
+ case '`':
+ return quotedString<'`', TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed>(pos, token_begin, end);
+
+ case '(':
+ return Token(TokenType::OpeningRoundBracket, token_begin, ++pos);
+ case ')':
+ return Token(TokenType::ClosingRoundBracket, token_begin, ++pos);
+ case '[':
+ return Token(TokenType::OpeningSquareBracket, token_begin, ++pos);
+ case ']':
+ return Token(TokenType::ClosingSquareBracket, token_begin, ++pos);
+ case '{':
+ return Token(TokenType::OpeningCurlyBrace, token_begin, ++pos);
+ case '}':
+ return Token(TokenType::ClosingCurlyBrace, token_begin, ++pos);
+ case ',':
+ return Token(TokenType::Comma, token_begin, ++pos);
+ case ';':
+ return Token(TokenType::Semicolon, token_begin, ++pos);
+
+ case '.': /// qualifier, tuple access operator or start of floating point number
+ {
+ /// Just after identifier or complex expression or number (for chained tuple access like x.1.1 to work properly).
+ if (pos > begin
+ && (!(pos + 1 < end && isNumericASCII(pos[1]))
+ || prev_significant_token_type == TokenType::ClosingRoundBracket
+ || prev_significant_token_type == TokenType::ClosingSquareBracket
+ || prev_significant_token_type == TokenType::BareWord
+ || prev_significant_token_type == TokenType::QuotedIdentifier
+ || prev_significant_token_type == TokenType::Number))
+ return Token(TokenType::Dot, token_begin, ++pos);
+
+ ++pos;
+ while (pos < end && isNumericASCII(*pos))
+ ++pos;
+
+ /// exponentiation
+ if (pos + 1 < end && (*pos == 'e' || *pos == 'E'))
+ {
+ ++pos;
+
+ /// sign of exponent
+ if (pos + 1 < end && (*pos == '-' || *pos == '+'))
+ ++pos;
+
+ while (pos < end && isNumericASCII(*pos))
+ ++pos;
+ }
+
+ return Token(TokenType::Number, token_begin, pos);
+ }
+
+ case '+':
+ return Token(TokenType::Plus, token_begin, ++pos);
+ case '-': /// minus (-), arrow (->) or start of comment (--)
+ {
+ ++pos;
+ if (pos < end && *pos == '>')
+ return Token(TokenType::Arrow, token_begin, ++pos);
+
+ if (pos < end && *pos == '-')
+ {
+ ++pos;
+ return comment_until_end_of_line();
+ }
+
+ return Token(TokenType::Minus, token_begin, pos);
+ }
+ case '*':
+ ++pos;
+ return Token(TokenType::Asterisk, token_begin, pos);
+ case '/': /// division (/) or start of comment (//, /*)
+ {
+ ++pos;
+ if (pos < end && (*pos == '/' || *pos == '*'))
+ {
+ if (*pos == '/')
+ {
+ ++pos;
+ return comment_until_end_of_line();
+ }
+ else
+ {
+ ++pos;
/// Nested multiline comments are supported according to the SQL standard.
size_t nesting_level = 1;
- while (pos + 2 <= end)
- {
+ while (pos + 2 <= end)
+ {
if (pos[0] == '/' && pos[1] == '*')
- {
- pos += 2;
+ {
+ pos += 2;
++nesting_level;
- }
+ }
else if (pos[0] == '*' && pos[1] == '/')
{
pos += 2;
@@ -273,70 +273,70 @@ Token Lexer::nextTokenImpl()
}
else
++pos;
- }
+ }
pos = end;
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, pos);
- }
- }
- return Token(TokenType::Slash, token_begin, pos);
- }
- case '%':
- return Token(TokenType::Percent, token_begin, ++pos);
- case '=': /// =, ==
- {
- ++pos;
- if (pos < end && *pos == '=')
- ++pos;
- return Token(TokenType::Equals, token_begin, pos);
- }
- case '!': /// !=
- {
- ++pos;
- if (pos < end && *pos == '=')
- return Token(TokenType::NotEquals, token_begin, ++pos);
- return Token(TokenType::ErrorSingleExclamationMark, token_begin, pos);
- }
- case '<': /// <, <=, <>
- {
- ++pos;
- if (pos < end && *pos == '=')
- return Token(TokenType::LessOrEquals, token_begin, ++pos);
- if (pos < end && *pos == '>')
- return Token(TokenType::NotEquals, token_begin, ++pos);
- return Token(TokenType::Less, token_begin, pos);
- }
- case '>': /// >, >=
- {
- ++pos;
- if (pos < end && *pos == '=')
- return Token(TokenType::GreaterOrEquals, token_begin, ++pos);
- return Token(TokenType::Greater, token_begin, pos);
- }
- case '?':
- return Token(TokenType::QuestionMark, token_begin, ++pos);
- case ':':
+ }
+ }
+ return Token(TokenType::Slash, token_begin, pos);
+ }
+ case '%':
+ return Token(TokenType::Percent, token_begin, ++pos);
+ case '=': /// =, ==
+ {
+ ++pos;
+ if (pos < end && *pos == '=')
+ ++pos;
+ return Token(TokenType::Equals, token_begin, pos);
+ }
+ case '!': /// !=
+ {
+ ++pos;
+ if (pos < end && *pos == '=')
+ return Token(TokenType::NotEquals, token_begin, ++pos);
+ return Token(TokenType::ErrorSingleExclamationMark, token_begin, pos);
+ }
+ case '<': /// <, <=, <>
+ {
+ ++pos;
+ if (pos < end && *pos == '=')
+ return Token(TokenType::LessOrEquals, token_begin, ++pos);
+ if (pos < end && *pos == '>')
+ return Token(TokenType::NotEquals, token_begin, ++pos);
+ return Token(TokenType::Less, token_begin, pos);
+ }
+ case '>': /// >, >=
+ {
+ ++pos;
+ if (pos < end && *pos == '=')
+ return Token(TokenType::GreaterOrEquals, token_begin, ++pos);
+ return Token(TokenType::Greater, token_begin, pos);
+ }
+ case '?':
+ return Token(TokenType::QuestionMark, token_begin, ++pos);
+ case ':':
{
++pos;
if (pos < end && *pos == ':')
return Token(TokenType::DoubleColon, token_begin, ++pos);
return Token(TokenType::Colon, token_begin, pos);
}
- case '|':
- {
- ++pos;
- if (pos < end && *pos == '|')
- return Token(TokenType::Concatenation, token_begin, ++pos);
- return Token(TokenType::ErrorSinglePipeMark, token_begin, pos);
- }
- case '@':
- {
- ++pos;
- if (pos < end && *pos == '@')
- return Token(TokenType::DoubleAt, token_begin, ++pos);
- return Token(TokenType::At, token_begin, pos);
- }
-
- default:
+ case '|':
+ {
+ ++pos;
+ if (pos < end && *pos == '|')
+ return Token(TokenType::Concatenation, token_begin, ++pos);
+ return Token(TokenType::ErrorSinglePipeMark, token_begin, pos);
+ }
+ case '@':
+ {
+ ++pos;
+ if (pos < end && *pos == '@')
+ return Token(TokenType::DoubleAt, token_begin, ++pos);
+ return Token(TokenType::At, token_begin, pos);
+ }
+
+ default:
if (*pos == '$')
{
/// Try to capture dollar sign as start of here doc
@@ -366,64 +366,64 @@ Token Lexer::nextTokenImpl()
}
}
if (isWordCharASCII(*pos) || *pos == '$')
- {
- ++pos;
+ {
+ ++pos;
while (pos < end && (isWordCharASCII(*pos) || *pos == '$'))
- ++pos;
- return Token(TokenType::BareWord, token_begin, pos);
- }
- else
- {
- /// We will also skip unicode whitespaces in UTF-8 to support for queries copy-pasted from MS Word and similar.
- pos = skipWhitespacesUTF8(pos, end);
- if (pos > token_begin)
- return Token(TokenType::Whitespace, token_begin, pos);
- else
- return Token(TokenType::Error, token_begin, ++pos);
- }
- }
-}
-
-
-const char * getTokenName(TokenType type)
-{
- switch (type)
- {
-#define M(TOKEN) \
- case TokenType::TOKEN: return #TOKEN;
-APPLY_FOR_TOKENS(M)
-#undef M
- }
-
- __builtin_unreachable();
-}
-
-
-const char * getErrorTokenDescription(TokenType type)
-{
- switch (type)
- {
- case TokenType::Error:
- return "Unrecognized token";
- case TokenType::ErrorMultilineCommentIsNotClosed:
- return "Multiline comment is not closed";
- case TokenType::ErrorSingleQuoteIsNotClosed:
- return "Single quoted string is not closed";
- case TokenType::ErrorDoubleQuoteIsNotClosed:
- return "Double quoted string is not closed";
- case TokenType::ErrorBackQuoteIsNotClosed:
- return "Back quoted string is not closed";
- case TokenType::ErrorSingleExclamationMark:
- return "Exclamation mark can only occur in != operator";
- case TokenType::ErrorSinglePipeMark:
- return "Pipe symbol could only occur in || operator";
- case TokenType::ErrorWrongNumber:
- return "Wrong number";
- case TokenType::ErrorMaxQuerySizeExceeded:
- return "Max query size exceeded";
- default:
- return "Not an error";
- }
-}
-
-}
+ ++pos;
+ return Token(TokenType::BareWord, token_begin, pos);
+ }
+ else
+ {
+ /// We will also skip unicode whitespaces in UTF-8 to support for queries copy-pasted from MS Word and similar.
+ pos = skipWhitespacesUTF8(pos, end);
+ if (pos > token_begin)
+ return Token(TokenType::Whitespace, token_begin, pos);
+ else
+ return Token(TokenType::Error, token_begin, ++pos);
+ }
+ }
+}
+
+
+const char * getTokenName(TokenType type)
+{
+ switch (type)
+ {
+#define M(TOKEN) \
+ case TokenType::TOKEN: return #TOKEN;
+APPLY_FOR_TOKENS(M)
+#undef M
+ }
+
+ __builtin_unreachable();
+}
+
+
+const char * getErrorTokenDescription(TokenType type)
+{
+ switch (type)
+ {
+ case TokenType::Error:
+ return "Unrecognized token";
+ case TokenType::ErrorMultilineCommentIsNotClosed:
+ return "Multiline comment is not closed";
+ case TokenType::ErrorSingleQuoteIsNotClosed:
+ return "Single quoted string is not closed";
+ case TokenType::ErrorDoubleQuoteIsNotClosed:
+ return "Double quoted string is not closed";
+ case TokenType::ErrorBackQuoteIsNotClosed:
+ return "Back quoted string is not closed";
+ case TokenType::ErrorSingleExclamationMark:
+ return "Exclamation mark can only occur in != operator";
+ case TokenType::ErrorSinglePipeMark:
+ return "Pipe symbol could only occur in || operator";
+ case TokenType::ErrorWrongNumber:
+ return "Wrong number";
+ case TokenType::ErrorMaxQuerySizeExceeded:
+ return "Max query size exceeded";
+ default:
+ return "Not an error";
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp
index f654033b29..fb842fdd80 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.cpp
@@ -1,99 +1,99 @@
-#include <Common/typeid_cast.h>
-#include <Parsers/ParserAlterQuery.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/ParserPartition.h>
-#include <Parsers/ParserSelectWithUnionQuery.h>
-#include <Parsers/ParserSetQuery.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTIndexDeclaration.h>
-#include <Parsers/ASTAlterQuery.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/parseDatabaseAndTableName.h>
-
-
-namespace DB
-{
-
-bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- auto command = std::make_shared<ASTAlterCommand>();
- node = command;
-
- ParserKeyword s_add_column("ADD COLUMN");
- ParserKeyword s_drop_column("DROP COLUMN");
- ParserKeyword s_clear_column("CLEAR COLUMN");
- ParserKeyword s_modify_column("MODIFY COLUMN");
- ParserKeyword s_rename_column("RENAME COLUMN");
- ParserKeyword s_comment_column("COMMENT COLUMN");
+#include <Common/typeid_cast.h>
+#include <Parsers/ParserAlterQuery.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/ParserPartition.h>
+#include <Parsers/ParserSelectWithUnionQuery.h>
+#include <Parsers/ParserSetQuery.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTIndexDeclaration.h>
+#include <Parsers/ASTAlterQuery.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/parseDatabaseAndTableName.h>
+
+
+namespace DB
+{
+
+bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ auto command = std::make_shared<ASTAlterCommand>();
+ node = command;
+
+ ParserKeyword s_add_column("ADD COLUMN");
+ ParserKeyword s_drop_column("DROP COLUMN");
+ ParserKeyword s_clear_column("CLEAR COLUMN");
+ ParserKeyword s_modify_column("MODIFY COLUMN");
+ ParserKeyword s_rename_column("RENAME COLUMN");
+ ParserKeyword s_comment_column("COMMENT COLUMN");
ParserKeyword s_materialize_column("MATERIALIZE COLUMN");
- ParserKeyword s_modify_order_by("MODIFY ORDER BY");
+ ParserKeyword s_modify_order_by("MODIFY ORDER BY");
ParserKeyword s_modify_sample_by("MODIFY SAMPLE BY");
- ParserKeyword s_modify_ttl("MODIFY TTL");
- ParserKeyword s_materialize_ttl("MATERIALIZE TTL");
- ParserKeyword s_modify_setting("MODIFY SETTING");
+ ParserKeyword s_modify_ttl("MODIFY TTL");
+ ParserKeyword s_materialize_ttl("MATERIALIZE TTL");
+ ParserKeyword s_modify_setting("MODIFY SETTING");
ParserKeyword s_reset_setting("RESET SETTING");
- ParserKeyword s_modify_query("MODIFY QUERY");
-
- ParserKeyword s_add_index("ADD INDEX");
- ParserKeyword s_drop_index("DROP INDEX");
- ParserKeyword s_clear_index("CLEAR INDEX");
- ParserKeyword s_materialize_index("MATERIALIZE INDEX");
-
- ParserKeyword s_add_constraint("ADD CONSTRAINT");
- ParserKeyword s_drop_constraint("DROP CONSTRAINT");
-
+ ParserKeyword s_modify_query("MODIFY QUERY");
+
+ ParserKeyword s_add_index("ADD INDEX");
+ ParserKeyword s_drop_index("DROP INDEX");
+ ParserKeyword s_clear_index("CLEAR INDEX");
+ ParserKeyword s_materialize_index("MATERIALIZE INDEX");
+
+ ParserKeyword s_add_constraint("ADD CONSTRAINT");
+ ParserKeyword s_drop_constraint("DROP CONSTRAINT");
+
ParserKeyword s_add_projection("ADD PROJECTION");
ParserKeyword s_drop_projection("DROP PROJECTION");
ParserKeyword s_clear_projection("CLEAR PROJECTION");
ParserKeyword s_materialize_projection("MATERIALIZE PROJECTION");
- ParserKeyword s_add("ADD");
- ParserKeyword s_drop("DROP");
- ParserKeyword s_suspend("SUSPEND");
- ParserKeyword s_resume("RESUME");
- ParserKeyword s_refresh("REFRESH");
- ParserKeyword s_modify("MODIFY");
-
- ParserKeyword s_attach_partition("ATTACH PARTITION");
+ ParserKeyword s_add("ADD");
+ ParserKeyword s_drop("DROP");
+ ParserKeyword s_suspend("SUSPEND");
+ ParserKeyword s_resume("RESUME");
+ ParserKeyword s_refresh("REFRESH");
+ ParserKeyword s_modify("MODIFY");
+
+ ParserKeyword s_attach_partition("ATTACH PARTITION");
ParserKeyword s_attach_part("ATTACH PART");
- ParserKeyword s_detach_partition("DETACH PARTITION");
+ ParserKeyword s_detach_partition("DETACH PARTITION");
ParserKeyword s_detach_part("DETACH PART");
- ParserKeyword s_drop_partition("DROP PARTITION");
+ ParserKeyword s_drop_partition("DROP PARTITION");
ParserKeyword s_drop_part("DROP PART");
- ParserKeyword s_move_partition("MOVE PARTITION");
+ ParserKeyword s_move_partition("MOVE PARTITION");
ParserKeyword s_move_part("MOVE PART");
- ParserKeyword s_drop_detached_partition("DROP DETACHED PARTITION");
- ParserKeyword s_drop_detached_part("DROP DETACHED PART");
- ParserKeyword s_fetch_partition("FETCH PARTITION");
+ ParserKeyword s_drop_detached_partition("DROP DETACHED PARTITION");
+ ParserKeyword s_drop_detached_part("DROP DETACHED PART");
+ ParserKeyword s_fetch_partition("FETCH PARTITION");
ParserKeyword s_fetch_part("FETCH PART");
- ParserKeyword s_replace_partition("REPLACE PARTITION");
- ParserKeyword s_freeze("FREEZE");
+ ParserKeyword s_replace_partition("REPLACE PARTITION");
+ ParserKeyword s_freeze("FREEZE");
ParserKeyword s_unfreeze("UNFREEZE");
- ParserKeyword s_partition("PARTITION");
-
- ParserKeyword s_first("FIRST");
- ParserKeyword s_after("AFTER");
- ParserKeyword s_if_not_exists("IF NOT EXISTS");
- ParserKeyword s_if_exists("IF EXISTS");
- ParserKeyword s_from("FROM");
- ParserKeyword s_in_partition("IN PARTITION");
- ParserKeyword s_with("WITH");
- ParserKeyword s_name("NAME");
-
- ParserKeyword s_to_disk("TO DISK");
- ParserKeyword s_to_volume("TO VOLUME");
- ParserKeyword s_to_table("TO TABLE");
+ ParserKeyword s_partition("PARTITION");
+
+ ParserKeyword s_first("FIRST");
+ ParserKeyword s_after("AFTER");
+ ParserKeyword s_if_not_exists("IF NOT EXISTS");
+ ParserKeyword s_if_exists("IF EXISTS");
+ ParserKeyword s_from("FROM");
+ ParserKeyword s_in_partition("IN PARTITION");
+ ParserKeyword s_with("WITH");
+ ParserKeyword s_name("NAME");
+
+ ParserKeyword s_to_disk("TO DISK");
+ ParserKeyword s_to_volume("TO VOLUME");
+ ParserKeyword s_to_table("TO TABLE");
ParserKeyword s_to_shard("TO SHARD");
-
+
ParserKeyword s_delete("DELETE");
- ParserKeyword s_update("UPDATE");
- ParserKeyword s_where("WHERE");
- ParserKeyword s_to("TO");
-
+ ParserKeyword s_update("UPDATE");
+ ParserKeyword s_where("WHERE");
+ ParserKeyword s_to("TO");
+
ParserKeyword s_remove("REMOVE");
ParserKeyword s_default("DEFAULT");
ParserKeyword s_materialized("MATERIALIZED");
@@ -104,72 +104,72 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_remove_ttl("REMOVE TTL");
- ParserCompoundIdentifier parser_name;
- ParserStringLiteral parser_string_literal;
+ ParserCompoundIdentifier parser_name;
+ ParserStringLiteral parser_string_literal;
ParserIdentifier parser_remove_property;
- ParserCompoundColumnDeclaration parser_col_decl;
- ParserIndexDeclaration parser_idx_decl;
- ParserConstraintDeclaration parser_constraint_decl;
+ ParserCompoundColumnDeclaration parser_col_decl;
+ ParserIndexDeclaration parser_idx_decl;
+ ParserConstraintDeclaration parser_constraint_decl;
ParserProjectionDeclaration parser_projection_decl;
ParserCompoundColumnDeclaration parser_modify_col_decl(false, false, true);
- ParserPartition parser_partition;
- ParserExpression parser_exp_elem;
- ParserList parser_assignment_list(
- std::make_unique<ParserAssignment>(), std::make_unique<ParserToken>(TokenType::Comma),
- /* allow_empty = */ false);
- ParserSetQuery parser_settings(true);
+ ParserPartition parser_partition;
+ ParserExpression parser_exp_elem;
+ ParserList parser_assignment_list(
+ std::make_unique<ParserAssignment>(), std::make_unique<ParserToken>(TokenType::Comma),
+ /* allow_empty = */ false);
+ ParserSetQuery parser_settings(true);
ParserList parser_reset_setting(
std::make_unique<ParserIdentifier>(), std::make_unique<ParserToken>(TokenType::Comma),
/* allow_empty = */ false);
- ParserNameList values_p;
- ParserSelectWithUnionQuery select_p;
- ParserTTLExpressionList parser_ttl_list;
-
- if (is_live_view)
- {
- if (s_refresh.ignore(pos, expected))
- {
- command->type = ASTAlterCommand::LIVE_VIEW_REFRESH;
- }
- else
- return false;
- }
- else
- {
- if (s_add_column.ignore(pos, expected))
- {
- if (s_if_not_exists.ignore(pos, expected))
- command->if_not_exists = true;
-
- if (!parser_col_decl.parse(pos, command->col_decl, expected))
- return false;
-
- if (s_first.ignore(pos, expected))
- command->first = true;
- else if (s_after.ignore(pos, expected))
- {
- if (!parser_name.parse(pos, command->column, expected))
- return false;
- }
-
- command->type = ASTAlterCommand::ADD_COLUMN;
- }
- else if (s_rename_column.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_name.parse(pos, command->column, expected))
- return false;
-
- if (!s_to.ignore(pos, expected))
- return false;
-
- if (!parser_name.parse(pos, command->rename_to, expected))
- return false;
-
- command->type = ASTAlterCommand::RENAME_COLUMN;
- }
+ ParserNameList values_p;
+ ParserSelectWithUnionQuery select_p;
+ ParserTTLExpressionList parser_ttl_list;
+
+ if (is_live_view)
+ {
+ if (s_refresh.ignore(pos, expected))
+ {
+ command->type = ASTAlterCommand::LIVE_VIEW_REFRESH;
+ }
+ else
+ return false;
+ }
+ else
+ {
+ if (s_add_column.ignore(pos, expected))
+ {
+ if (s_if_not_exists.ignore(pos, expected))
+ command->if_not_exists = true;
+
+ if (!parser_col_decl.parse(pos, command->col_decl, expected))
+ return false;
+
+ if (s_first.ignore(pos, expected))
+ command->first = true;
+ else if (s_after.ignore(pos, expected))
+ {
+ if (!parser_name.parse(pos, command->column, expected))
+ return false;
+ }
+
+ command->type = ASTAlterCommand::ADD_COLUMN;
+ }
+ else if (s_rename_column.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_name.parse(pos, command->column, expected))
+ return false;
+
+ if (!s_to.ignore(pos, expected))
+ return false;
+
+ if (!parser_name.parse(pos, command->rename_to, expected))
+ return false;
+
+ command->type = ASTAlterCommand::RENAME_COLUMN;
+ }
else if (s_materialize_column.ignore(pos, expected))
{
if (!parser_name.parse(pos, command->column, expected))
@@ -184,13 +184,13 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
return false;
}
}
- else if (s_drop_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_PARTITION;
- }
+ else if (s_drop_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_PARTITION;
+ }
else if (s_drop_part.ignore(pos, expected))
{
if (!parser_string_literal.parse(pos, command->partition, expected))
@@ -199,114 +199,114 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->type = ASTAlterCommand::DROP_PARTITION;
command->part = true;
}
- else if (s_drop_detached_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_DETACHED_PARTITION;
- }
- else if (s_drop_detached_part.ignore(pos, expected))
- {
- if (!parser_string_literal.parse(pos, command->partition, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_DETACHED_PARTITION;
- command->part = true;
- }
- else if (s_drop_column.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_name.parse(pos, command->column, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_COLUMN;
- command->detach = false;
- }
- else if (s_clear_column.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_name.parse(pos, command->column, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_COLUMN;
- command->clear_column = true;
- command->detach = false;
-
- if (s_in_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
- }
- }
- else if (s_add_index.ignore(pos, expected))
- {
- if (s_if_not_exists.ignore(pos, expected))
- command->if_not_exists = true;
-
- if (!parser_idx_decl.parse(pos, command->index_decl, expected))
- return false;
-
+ else if (s_drop_detached_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_DETACHED_PARTITION;
+ }
+ else if (s_drop_detached_part.ignore(pos, expected))
+ {
+ if (!parser_string_literal.parse(pos, command->partition, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_DETACHED_PARTITION;
+ command->part = true;
+ }
+ else if (s_drop_column.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_name.parse(pos, command->column, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_COLUMN;
+ command->detach = false;
+ }
+ else if (s_clear_column.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_name.parse(pos, command->column, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_COLUMN;
+ command->clear_column = true;
+ command->detach = false;
+
+ if (s_in_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+ }
+ }
+ else if (s_add_index.ignore(pos, expected))
+ {
+ if (s_if_not_exists.ignore(pos, expected))
+ command->if_not_exists = true;
+
+ if (!parser_idx_decl.parse(pos, command->index_decl, expected))
+ return false;
+
if (s_first.ignore(pos, expected))
command->first = true;
else if (s_after.ignore(pos, expected))
- {
- if (!parser_name.parse(pos, command->index, expected))
- return false;
- }
-
- command->type = ASTAlterCommand::ADD_INDEX;
- }
- else if (s_drop_index.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_name.parse(pos, command->index, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_INDEX;
- command->detach = false;
- }
- else if (s_clear_index.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_name.parse(pos, command->index, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_INDEX;
- command->clear_index = true;
- command->detach = false;
-
- if (s_in_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
- }
- }
- else if (s_materialize_index.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_name.parse(pos, command->index, expected))
- return false;
-
- command->type = ASTAlterCommand::MATERIALIZE_INDEX;
- command->detach = false;
-
+ {
+ if (!parser_name.parse(pos, command->index, expected))
+ return false;
+ }
+
+ command->type = ASTAlterCommand::ADD_INDEX;
+ }
+ else if (s_drop_index.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_name.parse(pos, command->index, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_INDEX;
+ command->detach = false;
+ }
+ else if (s_clear_index.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_name.parse(pos, command->index, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_INDEX;
+ command->clear_index = true;
+ command->detach = false;
+
if (s_in_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, command->partition, expected))
return false;
}
- }
+ }
+ else if (s_materialize_index.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_name.parse(pos, command->index, expected))
+ return false;
+
+ command->type = ASTAlterCommand::MATERIALIZE_INDEX;
+ command->detach = false;
+
+ if (s_in_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+ }
+ }
else if (s_add_projection.ignore(pos, expected))
{
if (s_if_not_exists.ignore(pos, expected))
@@ -371,98 +371,98 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
return false;
}
}
- else if (s_move_part.ignore(pos, expected))
- {
- if (!parser_string_literal.parse(pos, command->partition, expected))
- return false;
-
- command->type = ASTAlterCommand::MOVE_PARTITION;
- command->part = true;
-
- if (s_to_disk.ignore(pos))
- command->move_destination_type = DataDestinationType::DISK;
- else if (s_to_volume.ignore(pos))
- command->move_destination_type = DataDestinationType::VOLUME;
- else if (s_to_table.ignore(pos))
- {
- if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table))
- return false;
- command->move_destination_type = DataDestinationType::TABLE;
- }
+ else if (s_move_part.ignore(pos, expected))
+ {
+ if (!parser_string_literal.parse(pos, command->partition, expected))
+ return false;
+
+ command->type = ASTAlterCommand::MOVE_PARTITION;
+ command->part = true;
+
+ if (s_to_disk.ignore(pos))
+ command->move_destination_type = DataDestinationType::DISK;
+ else if (s_to_volume.ignore(pos))
+ command->move_destination_type = DataDestinationType::VOLUME;
+ else if (s_to_table.ignore(pos))
+ {
+ if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table))
+ return false;
+ command->move_destination_type = DataDestinationType::TABLE;
+ }
else if (s_to_shard.ignore(pos))
{
command->move_destination_type = DataDestinationType::SHARD;
}
- else
- return false;
-
- if (command->move_destination_type != DataDestinationType::TABLE)
- {
- ASTPtr ast_space_name;
- if (!parser_string_literal.parse(pos, ast_space_name, expected))
- return false;
-
- command->move_destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>();
- }
- }
- else if (s_move_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
-
- command->type = ASTAlterCommand::MOVE_PARTITION;
-
- if (s_to_disk.ignore(pos))
- command->move_destination_type = DataDestinationType::DISK;
- else if (s_to_volume.ignore(pos))
- command->move_destination_type = DataDestinationType::VOLUME;
- else if (s_to_table.ignore(pos))
- {
- if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table))
- return false;
- command->move_destination_type = DataDestinationType::TABLE;
- }
- else
- return false;
-
- if (command->move_destination_type != DataDestinationType::TABLE)
- {
- ASTPtr ast_space_name;
- if (!parser_string_literal.parse(pos, ast_space_name, expected))
- return false;
-
- command->move_destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>();
- }
- }
- else if (s_add_constraint.ignore(pos, expected))
- {
- if (s_if_not_exists.ignore(pos, expected))
- command->if_not_exists = true;
-
- if (!parser_constraint_decl.parse(pos, command->constraint_decl, expected))
- return false;
-
- command->type = ASTAlterCommand::ADD_CONSTRAINT;
- }
- else if (s_drop_constraint.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_name.parse(pos, command->constraint, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_CONSTRAINT;
- command->detach = false;
- }
- else if (s_detach_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
-
- command->type = ASTAlterCommand::DROP_PARTITION;
- command->detach = true;
- }
+ else
+ return false;
+
+ if (command->move_destination_type != DataDestinationType::TABLE)
+ {
+ ASTPtr ast_space_name;
+ if (!parser_string_literal.parse(pos, ast_space_name, expected))
+ return false;
+
+ command->move_destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>();
+ }
+ }
+ else if (s_move_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+
+ command->type = ASTAlterCommand::MOVE_PARTITION;
+
+ if (s_to_disk.ignore(pos))
+ command->move_destination_type = DataDestinationType::DISK;
+ else if (s_to_volume.ignore(pos))
+ command->move_destination_type = DataDestinationType::VOLUME;
+ else if (s_to_table.ignore(pos))
+ {
+ if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table))
+ return false;
+ command->move_destination_type = DataDestinationType::TABLE;
+ }
+ else
+ return false;
+
+ if (command->move_destination_type != DataDestinationType::TABLE)
+ {
+ ASTPtr ast_space_name;
+ if (!parser_string_literal.parse(pos, ast_space_name, expected))
+ return false;
+
+ command->move_destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>();
+ }
+ }
+ else if (s_add_constraint.ignore(pos, expected))
+ {
+ if (s_if_not_exists.ignore(pos, expected))
+ command->if_not_exists = true;
+
+ if (!parser_constraint_decl.parse(pos, command->constraint_decl, expected))
+ return false;
+
+ command->type = ASTAlterCommand::ADD_CONSTRAINT;
+ }
+ else if (s_drop_constraint.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_name.parse(pos, command->constraint, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_CONSTRAINT;
+ command->detach = false;
+ }
+ else if (s_detach_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DROP_PARTITION;
+ command->detach = true;
+ }
else if (s_detach_part.ignore(pos, expected))
{
if (!parser_string_literal.parse(pos, command->partition, expected))
@@ -472,61 +472,61 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->part = true;
command->detach = true;
}
- else if (s_attach_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
-
- if (s_from.ignore(pos))
- {
- if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table))
- return false;
-
- command->replace = false;
- command->type = ASTAlterCommand::REPLACE_PARTITION;
- }
- else
- {
- command->type = ASTAlterCommand::ATTACH_PARTITION;
- }
- }
- else if (s_replace_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
-
- if (!s_from.ignore(pos, expected))
- return false;
-
- if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table))
- return false;
-
- command->replace = true;
- command->type = ASTAlterCommand::REPLACE_PARTITION;
- }
- else if (s_attach_part.ignore(pos, expected))
- {
- if (!parser_string_literal.parse(pos, command->partition, expected))
- return false;
-
- command->part = true;
- command->type = ASTAlterCommand::ATTACH_PARTITION;
- }
- else if (s_fetch_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
-
- if (!s_from.ignore(pos, expected))
- return false;
-
- ASTPtr ast_from;
- if (!parser_string_literal.parse(pos, ast_from, expected))
- return false;
-
- command->from = ast_from->as<ASTLiteral &>().value.get<const String &>();
- command->type = ASTAlterCommand::FETCH_PARTITION;
- }
+ else if (s_attach_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+
+ if (s_from.ignore(pos))
+ {
+ if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table))
+ return false;
+
+ command->replace = false;
+ command->type = ASTAlterCommand::REPLACE_PARTITION;
+ }
+ else
+ {
+ command->type = ASTAlterCommand::ATTACH_PARTITION;
+ }
+ }
+ else if (s_replace_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+
+ if (!s_from.ignore(pos, expected))
+ return false;
+
+ if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table))
+ return false;
+
+ command->replace = true;
+ command->type = ASTAlterCommand::REPLACE_PARTITION;
+ }
+ else if (s_attach_part.ignore(pos, expected))
+ {
+ if (!parser_string_literal.parse(pos, command->partition, expected))
+ return false;
+
+ command->part = true;
+ command->type = ASTAlterCommand::ATTACH_PARTITION;
+ }
+ else if (s_fetch_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+
+ if (!s_from.ignore(pos, expected))
+ return false;
+
+ ASTPtr ast_from;
+ if (!parser_string_literal.parse(pos, ast_from, expected))
+ return false;
+
+ command->from = ast_from->as<ASTLiteral &>().value.get<const String &>();
+ command->type = ASTAlterCommand::FETCH_PARTITION;
+ }
else if (s_fetch_part.ignore(pos, expected))
{
if (!parser_string_literal.parse(pos, command->partition, expected))
@@ -542,33 +542,33 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->part = true;
command->type = ASTAlterCommand::FETCH_PARTITION;
}
- else if (s_freeze.ignore(pos, expected))
- {
- if (s_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
-
- command->type = ASTAlterCommand::FREEZE_PARTITION;
- }
- else
- {
- command->type = ASTAlterCommand::FREEZE_ALL;
- }
-
- /// WITH NAME 'name' - place local backup to directory with specified name
- if (s_with.ignore(pos, expected))
- {
- if (!s_name.ignore(pos, expected))
- return false;
-
- ASTPtr ast_with_name;
- if (!parser_string_literal.parse(pos, ast_with_name, expected))
- return false;
-
- command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>();
- }
- }
+ else if (s_freeze.ignore(pos, expected))
+ {
+ if (s_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+
+ command->type = ASTAlterCommand::FREEZE_PARTITION;
+ }
+ else
+ {
+ command->type = ASTAlterCommand::FREEZE_ALL;
+ }
+
+ /// WITH NAME 'name' - place local backup to directory with specified name
+ if (s_with.ignore(pos, expected))
+ {
+ if (!s_name.ignore(pos, expected))
+ return false;
+
+ ASTPtr ast_with_name;
+ if (!parser_string_literal.parse(pos, ast_with_name, expected))
+ return false;
+
+ command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>();
+ }
+ }
else if (s_unfreeze.ignore(pos, expected))
{
if (s_partition.ignore(pos, expected))
@@ -600,16 +600,16 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
return false;
}
}
- else if (s_modify_column.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
- return false;
-
+ else if (s_modify_column.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
+ return false;
+
if (s_remove.ignore(pos, expected))
- {
+ {
if (s_default.ignore(pos, expected))
command->remove_property = "DEFAULT";
else if (s_materialized.ignore(pos, expected))
@@ -623,8 +623,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
else if (s_ttl.ignore(pos, expected))
command->remove_property = "TTL";
else
- return false;
- }
+ return false;
+ }
else
{
if (s_first.ignore(pos, expected))
@@ -635,15 +635,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
return false;
}
}
- command->type = ASTAlterCommand::MODIFY_COLUMN;
- }
- else if (s_modify_order_by.ignore(pos, expected))
- {
- if (!parser_exp_elem.parse(pos, command->order_by, expected))
- return false;
-
- command->type = ASTAlterCommand::MODIFY_ORDER_BY;
- }
+ command->type = ASTAlterCommand::MODIFY_COLUMN;
+ }
+ else if (s_modify_order_by.ignore(pos, expected))
+ {
+ if (!parser_exp_elem.parse(pos, command->order_by, expected))
+ return false;
+
+ command->type = ASTAlterCommand::MODIFY_ORDER_BY;
+ }
else if (s_modify_sample_by.ignore(pos, expected))
{
if (!parser_exp_elem.parse(pos, command->sample_by, expected))
@@ -652,7 +652,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->type = ASTAlterCommand::MODIFY_SAMPLE_BY;
}
else if (s_delete.ignore(pos, expected))
- {
+ {
if (s_in_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, command->partition, expected))
@@ -662,93 +662,93 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
if (!s_where.ignore(pos, expected))
return false;
- if (!parser_exp_elem.parse(pos, command->predicate, expected))
- return false;
-
- command->type = ASTAlterCommand::DELETE;
- }
- else if (s_update.ignore(pos, expected))
- {
- if (!parser_assignment_list.parse(pos, command->update_assignments, expected))
- return false;
-
+ if (!parser_exp_elem.parse(pos, command->predicate, expected))
+ return false;
+
+ command->type = ASTAlterCommand::DELETE;
+ }
+ else if (s_update.ignore(pos, expected))
+ {
+ if (!parser_assignment_list.parse(pos, command->update_assignments, expected))
+ return false;
+
if (s_in_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, command->partition, expected))
return false;
}
- if (!s_where.ignore(pos, expected))
- return false;
-
- if (!parser_exp_elem.parse(pos, command->predicate, expected))
- return false;
-
- command->type = ASTAlterCommand::UPDATE;
- }
- else if (s_comment_column.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- command->if_exists = true;
-
- if (!parser_name.parse(pos, command->column, expected))
- return false;
-
- if (!parser_string_literal.parse(pos, command->comment, expected))
- return false;
-
- command->type = ASTAlterCommand::COMMENT_COLUMN;
- }
- else if (s_modify_ttl.ignore(pos, expected))
- {
- if (!parser_ttl_list.parse(pos, command->ttl, expected))
- return false;
- command->type = ASTAlterCommand::MODIFY_TTL;
- }
+ if (!s_where.ignore(pos, expected))
+ return false;
+
+ if (!parser_exp_elem.parse(pos, command->predicate, expected))
+ return false;
+
+ command->type = ASTAlterCommand::UPDATE;
+ }
+ else if (s_comment_column.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ command->if_exists = true;
+
+ if (!parser_name.parse(pos, command->column, expected))
+ return false;
+
+ if (!parser_string_literal.parse(pos, command->comment, expected))
+ return false;
+
+ command->type = ASTAlterCommand::COMMENT_COLUMN;
+ }
+ else if (s_modify_ttl.ignore(pos, expected))
+ {
+ if (!parser_ttl_list.parse(pos, command->ttl, expected))
+ return false;
+ command->type = ASTAlterCommand::MODIFY_TTL;
+ }
else if (s_remove_ttl.ignore(pos, expected))
{
command->type = ASTAlterCommand::REMOVE_TTL;
}
- else if (s_materialize_ttl.ignore(pos, expected))
- {
- command->type = ASTAlterCommand::MATERIALIZE_TTL;
-
- if (s_in_partition.ignore(pos, expected))
- {
- if (!parser_partition.parse(pos, command->partition, expected))
- return false;
- }
- }
- else if (s_modify_setting.ignore(pos, expected))
- {
- if (!parser_settings.parse(pos, command->settings_changes, expected))
- return false;
- command->type = ASTAlterCommand::MODIFY_SETTING;
- }
+ else if (s_materialize_ttl.ignore(pos, expected))
+ {
+ command->type = ASTAlterCommand::MATERIALIZE_TTL;
+
+ if (s_in_partition.ignore(pos, expected))
+ {
+ if (!parser_partition.parse(pos, command->partition, expected))
+ return false;
+ }
+ }
+ else if (s_modify_setting.ignore(pos, expected))
+ {
+ if (!parser_settings.parse(pos, command->settings_changes, expected))
+ return false;
+ command->type = ASTAlterCommand::MODIFY_SETTING;
+ }
else if (s_reset_setting.ignore(pos, expected))
{
if (!parser_reset_setting.parse(pos, command->settings_resets, expected))
return false;
command->type = ASTAlterCommand::RESET_SETTING;
}
- else if (s_modify_query.ignore(pos, expected))
- {
- if (!select_p.parse(pos, command->select, expected))
- return false;
- command->type = ASTAlterCommand::MODIFY_QUERY;
- }
- else
- return false;
- }
-
- if (command->col_decl)
- command->children.push_back(command->col_decl);
- if (command->column)
- command->children.push_back(command->column);
- if (command->partition)
- command->children.push_back(command->partition);
- if (command->order_by)
- command->children.push_back(command->order_by);
+ else if (s_modify_query.ignore(pos, expected))
+ {
+ if (!select_p.parse(pos, command->select, expected))
+ return false;
+ command->type = ASTAlterCommand::MODIFY_QUERY;
+ }
+ else
+ return false;
+ }
+
+ if (command->col_decl)
+ command->children.push_back(command->col_decl);
+ if (command->column)
+ command->children.push_back(command->column);
+ if (command->partition)
+ command->children.push_back(command->partition);
+ if (command->order_by)
+ command->children.push_back(command->order_by);
if (command->sample_by)
command->children.push_back(command->sample_by);
if (command->index_decl)
@@ -759,89 +759,89 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->children.push_back(command->constraint_decl);
if (command->constraint)
command->children.push_back(command->constraint);
- if (command->predicate)
- command->children.push_back(command->predicate);
- if (command->update_assignments)
- command->children.push_back(command->update_assignments);
- if (command->values)
- command->children.push_back(command->values);
- if (command->comment)
- command->children.push_back(command->comment);
- if (command->ttl)
- command->children.push_back(command->ttl);
- if (command->settings_changes)
- command->children.push_back(command->settings_changes);
+ if (command->predicate)
+ command->children.push_back(command->predicate);
+ if (command->update_assignments)
+ command->children.push_back(command->update_assignments);
+ if (command->values)
+ command->children.push_back(command->values);
+ if (command->comment)
+ command->children.push_back(command->comment);
+ if (command->ttl)
+ command->children.push_back(command->ttl);
+ if (command->settings_changes)
+ command->children.push_back(command->settings_changes);
if (command->select)
command->children.push_back(command->select);
if (command->rename_to)
command->children.push_back(command->rename_to);
-
- return true;
-}
-
-
-bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
+
+ return true;
+}
+
+
+bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
auto command_list = std::make_shared<ASTExpressionList>();
- node = command_list;
-
- ParserToken s_comma(TokenType::Comma);
- ParserAlterCommand p_command(is_live_view);
-
- do
- {
- ASTPtr command;
- if (!p_command.parse(pos, command, expected))
- return false;
-
+ node = command_list;
+
+ ParserToken s_comma(TokenType::Comma);
+ ParserAlterCommand p_command(is_live_view);
+
+ do
+ {
+ ASTPtr command;
+ if (!p_command.parse(pos, command, expected))
+ return false;
+
command_list->children.push_back(command);
- }
- while (s_comma.ignore(pos, expected));
-
- return true;
-}
-
-
-bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- auto query = std::make_shared<ASTAlterQuery>();
- node = query;
-
- ParserKeyword s_alter_table("ALTER TABLE");
- ParserKeyword s_alter_live_view("ALTER LIVE VIEW");
-
- bool is_live_view = false;
-
- if (!s_alter_table.ignore(pos, expected))
- {
- if (!s_alter_live_view.ignore(pos, expected))
- return false;
- else
- is_live_view = true;
- }
-
- if (is_live_view)
- query->is_live_view = true;
-
- if (!parseDatabaseAndTableName(pos, expected, query->database, query->table))
- return false;
-
- String cluster_str;
- if (ParserKeyword{"ON"}.ignore(pos, expected))
- {
- if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
- return false;
- }
- query->cluster = cluster_str;
-
- ParserAlterCommandList p_command_list(is_live_view);
- ASTPtr command_list;
- if (!p_command_list.parse(pos, command_list, expected))
- return false;
-
- query->set(query->command_list, command_list);
-
- return true;
-}
-
-}
+ }
+ while (s_comma.ignore(pos, expected));
+
+ return true;
+}
+
+
+bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ auto query = std::make_shared<ASTAlterQuery>();
+ node = query;
+
+ ParserKeyword s_alter_table("ALTER TABLE");
+ ParserKeyword s_alter_live_view("ALTER LIVE VIEW");
+
+ bool is_live_view = false;
+
+ if (!s_alter_table.ignore(pos, expected))
+ {
+ if (!s_alter_live_view.ignore(pos, expected))
+ return false;
+ else
+ is_live_view = true;
+ }
+
+ if (is_live_view)
+ query->is_live_view = true;
+
+ if (!parseDatabaseAndTableName(pos, expected, query->database, query->table))
+ return false;
+
+ String cluster_str;
+ if (ParserKeyword{"ON"}.ignore(pos, expected))
+ {
+ if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
+ return false;
+ }
+ query->cluster = cluster_str;
+
+ ParserAlterCommandList p_command_list(is_live_view);
+ ASTPtr command_list;
+ if (!p_command_list.parse(pos, command_list, expected))
+ return false;
+
+ query->set(query->command_list, command_list);
+
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h
index 2e54c4ddba..88e11f4c98 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserAlterQuery.h
@@ -1,67 +1,67 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/ExpressionElementParsers.h>
-
-namespace DB
-{
-
-/** Query like this:
- * ALTER TABLE [db.]name [ON CLUSTER cluster]
- * [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],]
- * [DROP COLUMN [IF EXISTS] col_to_drop, ...]
+#pragma once
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+namespace DB
+{
+
+/** Query like this:
+ * ALTER TABLE [db.]name [ON CLUSTER cluster]
+ * [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],]
+ * [DROP COLUMN [IF EXISTS] col_to_drop, ...]
* [CLEAR COLUMN [IF EXISTS] col_to_clear[ IN PARTITION partition],]
- * [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...]
- * [RENAME COLUMN [IF EXISTS] col_name TO col_name]
- * [MODIFY PRIMARY KEY (a, b, c...)]
- * [MODIFY SETTING setting_name=setting_value, ...]
+ * [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...]
+ * [RENAME COLUMN [IF EXISTS] col_name TO col_name]
+ * [MODIFY PRIMARY KEY (a, b, c...)]
+ * [MODIFY SETTING setting_name=setting_value, ...]
* [RESET SETTING setting_name, ...]
- * [COMMENT COLUMN [IF EXISTS] col_name string]
- * [DROP|DETACH|ATTACH PARTITION|PART partition, ...]
- * [FETCH PARTITION partition FROM ...]
- * [FREEZE [PARTITION] [WITH NAME name]]
+ * [COMMENT COLUMN [IF EXISTS] col_name string]
+ * [DROP|DETACH|ATTACH PARTITION|PART partition, ...]
+ * [FETCH PARTITION partition FROM ...]
+ * [FREEZE [PARTITION] [WITH NAME name]]
* [DELETE[ IN PARTITION partition] WHERE ...]
* [UPDATE col_name = expr, ...[ IN PARTITION partition] WHERE ...]
* [ADD INDEX [IF NOT EXISTS] index_name [AFTER index_name]]
* [DROP INDEX [IF EXISTS] index_name]
* [CLEAR INDEX [IF EXISTS] index_name IN PARTITION partition]
* [MATERIALIZE INDEX [IF EXISTS] index_name [IN PARTITION partition]]
- * ALTER LIVE VIEW [db.name]
- * [REFRESH]
- */
-
-class ParserAlterQuery : public IParserBase
-{
-protected:
- const char * getName() const override{ return "ALTER query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-
-class ParserAlterCommandList : public IParserBase
-{
-protected:
- const char * getName() const override{ return "a list of ALTER commands"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-
-public:
- bool is_live_view;
-
- ParserAlterCommandList(bool is_live_view_ = false) : is_live_view(is_live_view_) {}
-};
-
-
-class ParserAlterCommand : public IParserBase
-{
-protected:
- const char * getName() const override{ return "ALTER command"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-
-public:
- bool is_live_view;
-
- ParserAlterCommand(bool is_live_view_ = false) : is_live_view(is_live_view_) {}
-};
-
-
-}
+ * ALTER LIVE VIEW [db.name]
+ * [REFRESH]
+ */
+
+class ParserAlterQuery : public IParserBase
+{
+protected:
+ const char * getName() const override{ return "ALTER query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+
+class ParserAlterCommandList : public IParserBase
+{
+protected:
+ const char * getName() const override{ return "a list of ALTER commands"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+public:
+ bool is_live_view;
+
+ ParserAlterCommandList(bool is_live_view_ = false) : is_live_view(is_live_view_) {}
+};
+
+
+class ParserAlterCommand : public IParserBase
+{
+protected:
+ const char * getName() const override{ return "ALTER command"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+public:
+ bool is_live_view;
+
+ ParserAlterCommand(bool is_live_view_ = false) : is_live_view(is_live_view_) {}
+};
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp
index c397e1c33c..db0df02772 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.cpp
@@ -1,54 +1,54 @@
-#include <Parsers/ParserCheckQuery.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTCheckQuery.h>
-#include <Parsers/ParserPartition.h>
-
-
-namespace DB
-{
-
-bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_check_table("CHECK TABLE");
- ParserKeyword s_partition("PARTITION");
- ParserToken s_dot(TokenType::Dot);
-
- ParserIdentifier table_parser;
- ParserPartition partition_parser;
-
- ASTPtr table;
- ASTPtr database;
-
- if (!s_check_table.ignore(pos, expected))
- return false;
- if (!table_parser.parse(pos, database, expected))
- return false;
-
- auto query = std::make_shared<ASTCheckQuery>();
- if (s_dot.ignore(pos))
- {
- if (!table_parser.parse(pos, table, expected))
- return false;
-
- tryGetIdentifierNameInto(database, query->database);
- tryGetIdentifierNameInto(table, query->table);
- }
- else
- {
- table = database;
- tryGetIdentifierNameInto(table, query->table);
- }
-
- if (s_partition.ignore(pos, expected))
- {
- if (!partition_parser.parse(pos, query->partition, expected))
- return false;
- }
-
- node = query;
- return true;
-}
-
-}
+#include <Parsers/ParserCheckQuery.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ASTCheckQuery.h>
+#include <Parsers/ParserPartition.h>
+
+
+namespace DB
+{
+
+bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_check_table("CHECK TABLE");
+ ParserKeyword s_partition("PARTITION");
+ ParserToken s_dot(TokenType::Dot);
+
+ ParserIdentifier table_parser;
+ ParserPartition partition_parser;
+
+ ASTPtr table;
+ ASTPtr database;
+
+ if (!s_check_table.ignore(pos, expected))
+ return false;
+ if (!table_parser.parse(pos, database, expected))
+ return false;
+
+ auto query = std::make_shared<ASTCheckQuery>();
+ if (s_dot.ignore(pos))
+ {
+ if (!table_parser.parse(pos, table, expected))
+ return false;
+
+ tryGetIdentifierNameInto(database, query->database);
+ tryGetIdentifierNameInto(table, query->table);
+ }
+ else
+ {
+ table = database;
+ tryGetIdentifierNameInto(table, query->table);
+ }
+
+ if (s_partition.ignore(pos, expected))
+ {
+ if (!partition_parser.parse(pos, query->partition, expected))
+ return false;
+ }
+
+ node = query;
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h
index fb0c390fa0..2a378c6683 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserCheckQuery.h
@@ -1,17 +1,17 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-namespace DB
-{
-/** Query of form
- * CHECK [TABLE] [database.]table
- */
-class ParserCheckQuery : public IParserBase
-{
-protected:
- const char * getName() const override{ return "ALTER query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+namespace DB
+{
+/** Query of form
+ * CHECK [TABLE] [database.]table
+ */
+class ParserCheckQuery : public IParserBase
+{
+protected:
+ const char * getName() const override{ return "ALTER query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp
index 0f768e2232..4bd7371636 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.cpp
@@ -1,44 +1,44 @@
-#include <Parsers/TablePropertiesQueriesASTs.h>
-
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ParserDescribeTableQuery.h>
-#include <Parsers/ParserTablesInSelectQuery.h>
-
-#include <Common/typeid_cast.h>
-
-
-namespace DB
-{
-
-
-bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_describe("DESCRIBE");
- ParserKeyword s_desc("DESC");
- ParserKeyword s_table("TABLE");
- ParserToken s_dot(TokenType::Dot);
- ParserIdentifier name_p;
-
- ASTPtr database;
- ASTPtr table;
-
- if (!s_describe.ignore(pos, expected) && !s_desc.ignore(pos, expected))
- return false;
-
- auto query = std::make_shared<ASTDescribeQuery>();
-
- s_table.ignore(pos, expected);
-
- ASTPtr table_expression;
- if (!ParserTableExpression().parse(pos, table_expression, expected))
- return false;
-
- query->table_expression = table_expression;
-
- node = query;
-
- return true;
-}
-
-
-}
+#include <Parsers/TablePropertiesQueriesASTs.h>
+
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ParserDescribeTableQuery.h>
+#include <Parsers/ParserTablesInSelectQuery.h>
+
+#include <Common/typeid_cast.h>
+
+
+namespace DB
+{
+
+
+bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_describe("DESCRIBE");
+ ParserKeyword s_desc("DESC");
+ ParserKeyword s_table("TABLE");
+ ParserToken s_dot(TokenType::Dot);
+ ParserIdentifier name_p;
+
+ ASTPtr database;
+ ASTPtr table;
+
+ if (!s_describe.ignore(pos, expected) && !s_desc.ignore(pos, expected))
+ return false;
+
+ auto query = std::make_shared<ASTDescribeQuery>();
+
+ s_table.ignore(pos, expected);
+
+ ASTPtr table_expression;
+ if (!ParserTableExpression().parse(pos, table_expression, expected))
+ return false;
+
+ query->table_expression = table_expression;
+
+ node = query;
+
+ return true;
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h
index caf3590ed3..9878eb2d1c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDescribeTableQuery.h
@@ -1,20 +1,20 @@
-#pragma once
-
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/ExpressionElementParsers.h>
-
-
-namespace DB
-{
-
-/** Query (DESCRIBE | DESC) ([TABLE] [db.]name | tableFunction) [FORMAT format]
- */
-class ParserDescribeTableQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "DESCRIBE query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+
+namespace DB
+{
+
+/** Query (DESCRIBE | DESC) ([TABLE] [db.]name | tableFunction) [FORMAT format]
+ */
+class ParserDescribeTableQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "DESCRIBE query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp
index 5400f33fbd..37206e85a1 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.cpp
@@ -1,84 +1,84 @@
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTDropQuery.h>
-
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ParserDropQuery.h>
-
-
-namespace DB
-{
-
-namespace
-{
-
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTDropQuery.h>
+
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ParserDropQuery.h>
+
+
+namespace DB
+{
+
+namespace
+{
+
bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, const ASTDropQuery::Kind kind)
-{
- ParserKeyword s_temporary("TEMPORARY");
- ParserKeyword s_table("TABLE");
- ParserKeyword s_dictionary("DICTIONARY");
- ParserKeyword s_view("VIEW");
- ParserKeyword s_database("DATABASE");
- ParserToken s_dot(TokenType::Dot);
- ParserKeyword s_if_exists("IF EXISTS");
- ParserIdentifier name_p;
+{
+ ParserKeyword s_temporary("TEMPORARY");
+ ParserKeyword s_table("TABLE");
+ ParserKeyword s_dictionary("DICTIONARY");
+ ParserKeyword s_view("VIEW");
+ ParserKeyword s_database("DATABASE");
+ ParserToken s_dot(TokenType::Dot);
+ ParserKeyword s_if_exists("IF EXISTS");
+ ParserIdentifier name_p;
ParserKeyword s_permanently("PERMANENTLY");
- ParserKeyword s_no_delay("NO DELAY");
+ ParserKeyword s_no_delay("NO DELAY");
ParserKeyword s_sync("SYNC");
-
- ASTPtr database;
- ASTPtr table;
- String cluster_str;
- bool if_exists = false;
- bool temporary = false;
- bool is_dictionary = false;
- bool is_view = false;
- bool no_delay = false;
+
+ ASTPtr database;
+ ASTPtr table;
+ String cluster_str;
+ bool if_exists = false;
+ bool temporary = false;
+ bool is_dictionary = false;
+ bool is_view = false;
+ bool no_delay = false;
bool permanently = false;
-
- if (s_database.ignore(pos, expected))
- {
- if (s_if_exists.ignore(pos, expected))
- if_exists = true;
-
- if (!name_p.parse(pos, database, expected))
- return false;
- }
- else
- {
- if (s_view.ignore(pos, expected))
- is_view = true;
- else if (s_dictionary.ignore(pos, expected))
- is_dictionary = true;
- else if (s_temporary.ignore(pos, expected))
- temporary = true;
-
+
+ if (s_database.ignore(pos, expected))
+ {
+ if (s_if_exists.ignore(pos, expected))
+ if_exists = true;
+
+ if (!name_p.parse(pos, database, expected))
+ return false;
+ }
+ else
+ {
+ if (s_view.ignore(pos, expected))
+ is_view = true;
+ else if (s_dictionary.ignore(pos, expected))
+ is_dictionary = true;
+ else if (s_temporary.ignore(pos, expected))
+ temporary = true;
+
/// for TRUNCATE queries TABLE keyword is assumed as default and can be skipped
if (!is_view && !is_dictionary && (!s_table.ignore(pos, expected) && kind != ASTDropQuery::Kind::Truncate))
- {
- return false;
- }
-
- if (s_if_exists.ignore(pos, expected))
- if_exists = true;
-
- if (!name_p.parse(pos, table, expected))
- return false;
-
- if (s_dot.ignore(pos, expected))
- {
- database = table;
- if (!name_p.parse(pos, table, expected))
- return false;
- }
+ {
+ return false;
+ }
+
+ if (s_if_exists.ignore(pos, expected))
+ if_exists = true;
+
+ if (!name_p.parse(pos, table, expected))
+ return false;
+
+ if (s_dot.ignore(pos, expected))
+ {
+ database = table;
+ if (!name_p.parse(pos, table, expected))
+ return false;
+ }
}
-
+
/// common for tables / dictionaries / databases
if (ParserKeyword{"ON"}.ignore(pos, expected))
{
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
return false;
- }
-
+ }
+
if (kind == ASTDropQuery::Kind::Detach && s_permanently.ignore(pos, expected))
permanently = true;
@@ -86,41 +86,41 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons
if (s_no_delay.ignore(pos, expected) || s_sync.ignore(pos, expected))
no_delay = true;
- auto query = std::make_shared<ASTDropQuery>();
- node = query;
-
+ auto query = std::make_shared<ASTDropQuery>();
+ node = query;
+
query->kind = kind;
- query->if_exists = if_exists;
- query->temporary = temporary;
- query->is_dictionary = is_dictionary;
- query->is_view = is_view;
- query->no_delay = no_delay;
+ query->if_exists = if_exists;
+ query->temporary = temporary;
+ query->is_dictionary = is_dictionary;
+ query->is_view = is_view;
+ query->no_delay = no_delay;
query->permanently = permanently;
-
- tryGetIdentifierNameInto(database, query->database);
- tryGetIdentifierNameInto(table, query->table);
-
- query->cluster = cluster_str;
-
- return true;
-}
-
-}
-
-bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_drop("DROP");
- ParserKeyword s_detach("DETACH");
- ParserKeyword s_truncate("TRUNCATE");
-
- if (s_drop.ignore(pos, expected))
+
+ tryGetIdentifierNameInto(database, query->database);
+ tryGetIdentifierNameInto(table, query->table);
+
+ query->cluster = cluster_str;
+
+ return true;
+}
+
+}
+
+bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_drop("DROP");
+ ParserKeyword s_detach("DETACH");
+ ParserKeyword s_truncate("TRUNCATE");
+
+ if (s_drop.ignore(pos, expected))
return parseDropQuery(pos, node, expected, ASTDropQuery::Kind::Drop);
- else if (s_detach.ignore(pos, expected))
+ else if (s_detach.ignore(pos, expected))
return parseDropQuery(pos, node, expected, ASTDropQuery::Kind::Detach);
- else if (s_truncate.ignore(pos, expected))
+ else if (s_truncate.ignore(pos, expected))
return parseDropQuery(pos, node, expected, ASTDropQuery::Kind::Truncate);
- else
- return false;
-}
-
-}
+ else
+ return false;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h
index 39ff5b7c4f..5dea24aa43 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropQuery.h
@@ -1,26 +1,26 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/ExpressionElementParsers.h>
-
-
-namespace DB
-{
-
-/** Query like this:
+#pragma once
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+
+namespace DB
+{
+
+/** Query like this:
* DROP|DETACH|TRUNCATE TABLE [IF EXISTS] [db.]name [PERMANENTLY]
- *
- * Or:
- * DROP DATABASE [IF EXISTS] db
- *
- * Or:
- * DROP DICTIONARY [IF EXISTS] [db.]name
- */
-class ParserDropQuery : public IParserBase
-{
-protected:
- const char * getName() const override{ return "DROP query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+ *
+ * Or:
+ * DROP DATABASE [IF EXISTS] db
+ *
+ * Or:
+ * DROP DICTIONARY [IF EXISTS] [db.]name
+ */
+class ParserDropQuery : public IParserBase
+{
+protected:
+ const char * getName() const override{ return "DROP query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp
index b4ba052323..8f0ab6d12c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.cpp
@@ -1,60 +1,60 @@
-#include <Parsers/ParserExplainQuery.h>
+#include <Parsers/ParserExplainQuery.h>
-#include <Parsers/ASTExplainQuery.h>
-#include <Parsers/CommonParsers.h>
+#include <Parsers/ASTExplainQuery.h>
+#include <Parsers/CommonParsers.h>
#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/ParserSelectWithUnionQuery.h>
-#include <Parsers/ParserSetQuery.h>
+#include <Parsers/ParserSelectWithUnionQuery.h>
+#include <Parsers/ParserSetQuery.h>
#include <Parsers/ParserQuery.h>
-
-namespace DB
-{
-
-bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ASTExplainQuery::ExplainKind kind;
-
- ParserKeyword s_ast("AST");
- ParserKeyword s_explain("EXPLAIN");
- ParserKeyword s_syntax("SYNTAX");
- ParserKeyword s_pipeline("PIPELINE");
- ParserKeyword s_plan("PLAN");
+
+namespace DB
+{
+
+bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ASTExplainQuery::ExplainKind kind;
+
+ ParserKeyword s_ast("AST");
+ ParserKeyword s_explain("EXPLAIN");
+ ParserKeyword s_syntax("SYNTAX");
+ ParserKeyword s_pipeline("PIPELINE");
+ ParserKeyword s_plan("PLAN");
ParserKeyword s_estimates("ESTIMATE");
-
+
if (s_explain.ignore(pos, expected))
- {
- kind = ASTExplainQuery::QueryPlan;
-
- if (s_ast.ignore(pos, expected))
- kind = ASTExplainQuery::ExplainKind::ParsedAST;
- else if (s_syntax.ignore(pos, expected))
- kind = ASTExplainQuery::ExplainKind::AnalyzedSyntax;
- else if (s_pipeline.ignore(pos, expected))
- kind = ASTExplainQuery::ExplainKind::QueryPipeline;
- else if (s_plan.ignore(pos, expected))
+ {
+ kind = ASTExplainQuery::QueryPlan;
+
+ if (s_ast.ignore(pos, expected))
+ kind = ASTExplainQuery::ExplainKind::ParsedAST;
+ else if (s_syntax.ignore(pos, expected))
+ kind = ASTExplainQuery::ExplainKind::AnalyzedSyntax;
+ else if (s_pipeline.ignore(pos, expected))
+ kind = ASTExplainQuery::ExplainKind::QueryPipeline;
+ else if (s_plan.ignore(pos, expected))
kind = ASTExplainQuery::ExplainKind::QueryPlan; //-V1048
else if (s_estimates.ignore(pos, expected))
kind = ASTExplainQuery::ExplainKind::QueryEstimates; //-V1048
- }
- else
- return false;
-
+ }
+ else
+ return false;
+
auto explain_query = std::make_shared<ASTExplainQuery>(kind);
-
- {
- ASTPtr settings;
- ParserSetQuery parser_settings(true);
-
- auto begin = pos;
- if (parser_settings.parse(pos, settings, expected))
- explain_query->setSettings(std::move(settings));
- else
- pos = begin;
- }
-
+
+ {
+ ASTPtr settings;
+ ParserSetQuery parser_settings(true);
+
+ auto begin = pos;
+ if (parser_settings.parse(pos, settings, expected))
+ explain_query->setSettings(std::move(settings));
+ else
+ pos = begin;
+ }
+
ParserCreateTableQuery create_p;
- ParserSelectWithUnionQuery select_p;
- ASTPtr query;
+ ParserSelectWithUnionQuery select_p;
+ ASTPtr query;
if (kind == ASTExplainQuery::ExplainKind::ParsedAST)
{
ParserQuery p(end);
@@ -67,10 +67,10 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
create_p.parse(pos, query, expected))
explain_query->setExplainedQuery(std::move(query));
else
- return false;
-
- node = std::move(explain_query);
- return true;
-}
-
-}
+ return false;
+
+ node = std::move(explain_query);
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h
index a1865e3023..77e8ab90ac 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserExplainQuery.h
@@ -1,20 +1,20 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-namespace DB
-{
-
-
-class ParserExplainQuery : public IParserBase
-{
-protected:
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+namespace DB
+{
+
+
+class ParserExplainQuery : public IParserBase
+{
+protected:
const char * end;
- const char * getName() const override { return "EXPLAIN"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+ const char * getName() const override { return "EXPLAIN"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
ParserExplainQuery(const char* end_) : end(end_) {}
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp
index 19457f027b..ac25d0b1e3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.cpp
@@ -1,107 +1,107 @@
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Parsers/ASTInsertQuery.h>
-
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Parsers/ParserSelectWithUnionQuery.h>
-#include <Parsers/ParserWatchQuery.h>
-#include <Parsers/ParserInsertQuery.h>
-#include <Parsers/ParserSetQuery.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Parsers/ASTInsertQuery.h>
+
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Parsers/ParserSelectWithUnionQuery.h>
+#include <Parsers/ParserWatchQuery.h>
+#include <Parsers/ParserInsertQuery.h>
+#include <Parsers/ParserSetQuery.h>
#include <Parsers/InsertQuerySettingsPushDownVisitor.h>
-#include <Common/typeid_cast.h>
+#include <Common/typeid_cast.h>
#include "Parsers/IAST_fwd.h"
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int SYNTAX_ERROR;
-}
-
-
-bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_insert_into("INSERT INTO");
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int SYNTAX_ERROR;
+}
+
+
+bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_insert_into("INSERT INTO");
ParserKeyword s_from_infile("FROM INFILE");
- ParserKeyword s_table("TABLE");
- ParserKeyword s_function("FUNCTION");
- ParserToken s_dot(TokenType::Dot);
- ParserKeyword s_values("VALUES");
- ParserKeyword s_format("FORMAT");
- ParserKeyword s_settings("SETTINGS");
- ParserKeyword s_select("SELECT");
- ParserKeyword s_watch("WATCH");
+ ParserKeyword s_table("TABLE");
+ ParserKeyword s_function("FUNCTION");
+ ParserToken s_dot(TokenType::Dot);
+ ParserKeyword s_values("VALUES");
+ ParserKeyword s_format("FORMAT");
+ ParserKeyword s_settings("SETTINGS");
+ ParserKeyword s_select("SELECT");
+ ParserKeyword s_watch("WATCH");
ParserKeyword s_partition_by("PARTITION BY");
- ParserKeyword s_with("WITH");
- ParserToken s_lparen(TokenType::OpeningRoundBracket);
- ParserToken s_rparen(TokenType::ClosingRoundBracket);
- ParserIdentifier name_p;
+ ParserKeyword s_with("WITH");
+ ParserToken s_lparen(TokenType::OpeningRoundBracket);
+ ParserToken s_rparen(TokenType::ClosingRoundBracket);
+ ParserIdentifier name_p;
ParserList columns_p(std::make_unique<ParserInsertElement>(), std::make_unique<ParserToken>(TokenType::Comma), false);
- ParserFunction table_function_p{false};
+ ParserFunction table_function_p{false};
ParserStringLiteral infile_name_p;
ParserExpressionWithOptionalAlias exp_elem_p(false);
-
- ASTPtr database;
- ASTPtr table;
+
+ ASTPtr database;
+ ASTPtr table;
ASTPtr infile;
- ASTPtr columns;
- ASTPtr format;
- ASTPtr select;
- ASTPtr watch;
- ASTPtr table_function;
- ASTPtr settings_ast;
+ ASTPtr columns;
+ ASTPtr format;
+ ASTPtr select;
+ ASTPtr watch;
+ ASTPtr table_function;
+ ASTPtr settings_ast;
ASTPtr partition_by_expr;
- /// Insertion data
- const char * data = nullptr;
-
- if (!s_insert_into.ignore(pos, expected))
- return false;
-
- s_table.ignore(pos, expected);
-
- if (s_function.ignore(pos, expected))
- {
- if (!table_function_p.parse(pos, table_function, expected))
- return false;
+ /// Insertion data
+ const char * data = nullptr;
+
+ if (!s_insert_into.ignore(pos, expected))
+ return false;
+
+ s_table.ignore(pos, expected);
+
+ if (s_function.ignore(pos, expected))
+ {
+ if (!table_function_p.parse(pos, table_function, expected))
+ return false;
if (s_partition_by.ignore(pos, expected))
{
if (!exp_elem_p.parse(pos, partition_by_expr, expected))
return false;
}
- }
- else
- {
- if (!name_p.parse(pos, table, expected))
- return false;
-
- if (s_dot.ignore(pos, expected))
- {
- database = table;
- if (!name_p.parse(pos, table, expected))
- return false;
- }
- }
-
- /// Is there a list of columns
- if (s_lparen.ignore(pos, expected))
- {
- if (!columns_p.parse(pos, columns, expected))
- return false;
-
- if (!s_rparen.ignore(pos, expected))
- return false;
- }
-
- Pos before_values = pos;
-
+ }
+ else
+ {
+ if (!name_p.parse(pos, table, expected))
+ return false;
+
+ if (s_dot.ignore(pos, expected))
+ {
+ database = table;
+ if (!name_p.parse(pos, table, expected))
+ return false;
+ }
+ }
+
+ /// Is there a list of columns
+ if (s_lparen.ignore(pos, expected))
+ {
+ if (!columns_p.parse(pos, columns, expected))
+ return false;
+
+ if (!s_rparen.ignore(pos, expected))
+ return false;
+ }
+
+ Pos before_values = pos;
+
if (s_from_infile.ignore(pos, expected))
- {
+ {
if (!infile_name_p.parse(pos, infile, expected))
return false;
}
@@ -109,45 +109,45 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
/// VALUES or FROM INFILE or FORMAT or SELECT
if (!infile && s_values.ignore(pos, expected))
{
- data = pos->begin;
- }
- else if (s_format.ignore(pos, expected))
- {
- if (!name_p.parse(pos, format, expected))
- return false;
- }
- else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected))
- {
- pos = before_values;
- ParserSelectWithUnionQuery select_p;
- select_p.parse(pos, select, expected);
-
- /// FORMAT section is expected if we have input() in SELECT part
- if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected))
- return false;
- }
- else if (s_watch.ignore(pos, expected))
- {
- pos = before_values;
- ParserWatchQuery watch_p;
- watch_p.parse(pos, watch, expected);
-
- /// FORMAT section is expected if we have input() in SELECT part
- if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected))
- return false;
- }
- else
- {
- return false;
- }
-
- if (s_settings.ignore(pos, expected))
- {
- ParserSetQuery parser_settings(true);
- if (!parser_settings.parse(pos, settings_ast, expected))
- return false;
- }
-
+ data = pos->begin;
+ }
+ else if (s_format.ignore(pos, expected))
+ {
+ if (!name_p.parse(pos, format, expected))
+ return false;
+ }
+ else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected))
+ {
+ pos = before_values;
+ ParserSelectWithUnionQuery select_p;
+ select_p.parse(pos, select, expected);
+
+ /// FORMAT section is expected if we have input() in SELECT part
+ if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected))
+ return false;
+ }
+ else if (s_watch.ignore(pos, expected))
+ {
+ pos = before_values;
+ ParserWatchQuery watch_p;
+ watch_p.parse(pos, watch, expected);
+
+ /// FORMAT section is expected if we have input() in SELECT part
+ if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected))
+ return false;
+ }
+ else
+ {
+ return false;
+ }
+
+ if (s_settings.ignore(pos, expected))
+ {
+ ParserSetQuery parser_settings(true);
+ if (!parser_settings.parse(pos, settings_ast, expected))
+ return false;
+ }
+
if (select)
{
/// Copy SETTINGS from the INSERT ... SELECT ... SETTINGS
@@ -157,71 +157,71 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (format && !infile)
- {
- Pos last_token = pos;
- --last_token;
- data = last_token->end;
-
- if (data < end && *data == ';')
- throw Exception("You have excessive ';' symbol before data for INSERT.\n"
- "Example:\n\n"
- "INSERT INTO t (x, y) FORMAT TabSeparated\n"
- ";\tHello\n"
- "2\tWorld\n"
- "\n"
- "Note that there is no ';' just after format name, "
- "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR);
-
- while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
- ++data;
-
- /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
-
- if (data < end && *data == '\r')
- ++data;
-
- if (data < end && *data == '\n')
- ++data;
- }
-
- auto query = std::make_shared<ASTInsertQuery>();
- node = query;
-
+ {
+ Pos last_token = pos;
+ --last_token;
+ data = last_token->end;
+
+ if (data < end && *data == ';')
+ throw Exception("You have excessive ';' symbol before data for INSERT.\n"
+ "Example:\n\n"
+ "INSERT INTO t (x, y) FORMAT TabSeparated\n"
+ ";\tHello\n"
+ "2\tWorld\n"
+ "\n"
+ "Note that there is no ';' just after format name, "
+ "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR);
+
+ while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
+ ++data;
+
+ /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
+
+ if (data < end && *data == '\r')
+ ++data;
+
+ if (data < end && *data == '\n')
+ ++data;
+ }
+
+ auto query = std::make_shared<ASTInsertQuery>();
+ node = query;
+
if (infile)
query->infile = infile;
- if (table_function)
- {
- query->table_function = table_function;
+ if (table_function)
+ {
+ query->table_function = table_function;
query->partition_by = partition_by_expr;
- }
- else
- {
- tryGetIdentifierNameInto(database, query->table_id.database_name);
- tryGetIdentifierNameInto(table, query->table_id.table_name);
- }
-
- tryGetIdentifierNameInto(format, query->format);
-
- query->columns = columns;
- query->select = select;
- query->watch = watch;
- query->settings_ast = settings_ast;
- query->data = data != end ? data : nullptr;
- query->end = end;
-
- if (columns)
- query->children.push_back(columns);
- if (select)
- query->children.push_back(select);
- if (watch)
- query->children.push_back(watch);
- if (settings_ast)
- query->children.push_back(settings_ast);
-
- return true;
-}
-
+ }
+ else
+ {
+ tryGetIdentifierNameInto(database, query->table_id.database_name);
+ tryGetIdentifierNameInto(table, query->table_id.table_name);
+ }
+
+ tryGetIdentifierNameInto(format, query->format);
+
+ query->columns = columns;
+ query->select = select;
+ query->watch = watch;
+ query->settings_ast = settings_ast;
+ query->data = data != end ? data : nullptr;
+ query->end = end;
+
+ if (columns)
+ query->children.push_back(columns);
+ if (select)
+ query->children.push_back(select);
+ if (watch)
+ query->children.push_back(watch);
+ if (settings_ast)
+ query->children.push_back(settings_ast);
+
+ return true;
+}
+
bool ParserInsertElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
return ParserColumnsMatcher().parse(pos, node, expected)
@@ -229,5 +229,5 @@ bool ParserInsertElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
|| ParserAsterisk().parse(pos, node, expected)
|| ParserCompoundIdentifier().parse(pos, node, expected);
}
-
-}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h
index f98e433551..1f24400c4c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserInsertQuery.h
@@ -1,38 +1,38 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-
-
-/** Cases:
- *
- * Normal case:
- * INSERT INTO [db.]table (c1, c2, c3) VALUES (v11, v12, v13), (v21, v22, v23), ...
- * INSERT INTO [db.]table VALUES (v11, v12, v13), (v21, v22, v23), ...
- *
- * Insert of data in an arbitrary format.
- * The data itself comes after LF(line feed), if it exists, or after all the whitespace characters, otherwise.
- * INSERT INTO [db.]table (c1, c2, c3) FORMAT format \n ...
- * INSERT INTO [db.]table FORMAT format \n ...
- *
- * Insert the result of the SELECT or WATCH query.
- * INSERT INTO [db.]table (c1, c2, c3) SELECT | WATCH ...
- * INSERT INTO [db.]table SELECT | WATCH ...
- */
-class ParserInsertQuery : public IParserBase
-{
-private:
- const char * end;
-
- const char * getName() const override { return "INSERT query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-public:
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+
+
+/** Cases:
+ *
+ * Normal case:
+ * INSERT INTO [db.]table (c1, c2, c3) VALUES (v11, v12, v13), (v21, v22, v23), ...
+ * INSERT INTO [db.]table VALUES (v11, v12, v13), (v21, v22, v23), ...
+ *
+ * Insert of data in an arbitrary format.
+ * The data itself comes after LF(line feed), if it exists, or after all the whitespace characters, otherwise.
+ * INSERT INTO [db.]table (c1, c2, c3) FORMAT format \n ...
+ * INSERT INTO [db.]table FORMAT format \n ...
+ *
+ * Insert the result of the SELECT or WATCH query.
+ * INSERT INTO [db.]table (c1, c2, c3) SELECT | WATCH ...
+ * INSERT INTO [db.]table SELECT | WATCH ...
+ */
+class ParserInsertQuery : public IParserBase
+{
+private:
+ const char * end;
+
+ const char * getName() const override { return "INSERT query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+public:
explicit ParserInsertQuery(const char * end_) : end(end_) {}
-};
-
+};
+
/** Insert accepts an identifier and an asterisk with variants.
*/
class ParserInsertElement : public IParserBase
@@ -42,4 +42,4 @@ protected:
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
-}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp
index a195a778ed..485a3acf83 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.cpp
@@ -1,56 +1,56 @@
-#include <Parsers/ParserKillQueryQuery.h>
-#include <Parsers/ASTKillQueryQuery.h>
-
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionListParsers.h>
-
-
-namespace DB
-{
-
-
-bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- String cluster_str;
- auto query = std::make_shared<ASTKillQueryQuery>();
-
- ParserKeyword p_kill{"KILL"};
- ParserKeyword p_query{"QUERY"};
- ParserKeyword p_mutation{"MUTATION"};
- ParserKeyword p_on{"ON"};
- ParserKeyword p_test{"TEST"};
- ParserKeyword p_sync{"SYNC"};
- ParserKeyword p_async{"ASYNC"};
- ParserKeyword p_where{"WHERE"};
- ParserExpression p_where_expression;
-
- if (!p_kill.ignore(pos, expected))
- return false;
-
- if (p_query.ignore(pos, expected))
- query->type = ASTKillQueryQuery::Type::Query;
- else if (p_mutation.ignore(pos, expected))
- query->type = ASTKillQueryQuery::Type::Mutation;
- else
- return false;
-
- if (p_on.ignore(pos, expected) && !ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
- return false;
-
- if (!p_where.ignore(pos, expected) || !p_where_expression.parse(pos, query->where_expression, expected))
- return false;
-
- if (p_sync.ignore(pos, expected))
- query->sync = true;
- else if (p_async.ignore(pos, expected))
- query->sync = false;
- else if (p_test.ignore(pos, expected))
- query->test = true;
-
- query->cluster = cluster_str;
- query->children.emplace_back(query->where_expression);
- node = std::move(query);
- return true;
-}
-
-}
+#include <Parsers/ParserKillQueryQuery.h>
+#include <Parsers/ASTKillQueryQuery.h>
+
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionListParsers.h>
+
+
+namespace DB
+{
+
+
+bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ String cluster_str;
+ auto query = std::make_shared<ASTKillQueryQuery>();
+
+ ParserKeyword p_kill{"KILL"};
+ ParserKeyword p_query{"QUERY"};
+ ParserKeyword p_mutation{"MUTATION"};
+ ParserKeyword p_on{"ON"};
+ ParserKeyword p_test{"TEST"};
+ ParserKeyword p_sync{"SYNC"};
+ ParserKeyword p_async{"ASYNC"};
+ ParserKeyword p_where{"WHERE"};
+ ParserExpression p_where_expression;
+
+ if (!p_kill.ignore(pos, expected))
+ return false;
+
+ if (p_query.ignore(pos, expected))
+ query->type = ASTKillQueryQuery::Type::Query;
+ else if (p_mutation.ignore(pos, expected))
+ query->type = ASTKillQueryQuery::Type::Mutation;
+ else
+ return false;
+
+ if (p_on.ignore(pos, expected) && !ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
+ return false;
+
+ if (!p_where.ignore(pos, expected) || !p_where_expression.parse(pos, query->where_expression, expected))
+ return false;
+
+ if (p_sync.ignore(pos, expected))
+ query->sync = true;
+ else if (p_async.ignore(pos, expected))
+ query->sync = false;
+ else if (p_test.ignore(pos, expected))
+ query->test = true;
+
+ query->cluster = cluster_str;
+ query->children.emplace_back(query->where_expression);
+ node = std::move(query);
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h
index da75317c52..7019e8a9dd 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserKillQueryQuery.h
@@ -1,19 +1,19 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-
-/** KILL QUERY WHERE <logical expression upon system.processes fields> [SYNC|ASYNC|TEST]
- */
-class ParserKillQueryQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "KILL QUERY query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
-
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+
+/** KILL QUERY WHERE <logical expression upon system.processes fields> [SYNC|ASYNC|TEST]
+ */
+class ParserKillQueryQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "KILL QUERY query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp
index 441cec1465..ccff3d0282 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.cpp
@@ -1,74 +1,74 @@
-#include <Parsers/ParserOptimizeQuery.h>
-#include <Parsers/ParserPartition.h>
-#include <Parsers/CommonParsers.h>
-
-#include <Parsers/ASTOptimizeQuery.h>
-#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ParserOptimizeQuery.h>
+#include <Parsers/ParserPartition.h>
+#include <Parsers/CommonParsers.h>
+
+#include <Parsers/ASTOptimizeQuery.h>
+#include <Parsers/ASTIdentifier.h>
#include <Parsers/ExpressionListParsers.h>
-
-
-namespace DB
-{
-
+
+
+namespace DB
+{
+
bool ParserOptimizeQueryColumnsSpecification::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
// Do not allow APPLY and REPLACE transformers.
// Since we use Columns Transformers only to get list of columns,
// we can't actually modify content of the columns for deduplication.
const auto allowed_transformers = ParserColumnsTransformers::ColumnTransformers{ParserColumnsTransformers::ColumnTransformer::EXCEPT};
-
+
return ParserColumnsMatcher(allowed_transformers).parse(pos, node, expected)
|| ParserAsterisk(allowed_transformers).parse(pos, node, expected)
|| ParserIdentifier(false).parse(pos, node, expected);
}
-bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_optimize_table("OPTIMIZE TABLE");
- ParserKeyword s_partition("PARTITION");
- ParserKeyword s_final("FINAL");
- ParserKeyword s_deduplicate("DEDUPLICATE");
+bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_optimize_table("OPTIMIZE TABLE");
+ ParserKeyword s_partition("PARTITION");
+ ParserKeyword s_final("FINAL");
+ ParserKeyword s_deduplicate("DEDUPLICATE");
ParserKeyword s_by("BY");
- ParserToken s_dot(TokenType::Dot);
- ParserIdentifier name_p;
- ParserPartition partition_p;
-
- ASTPtr database;
- ASTPtr table;
- ASTPtr partition;
- bool final = false;
- bool deduplicate = false;
- String cluster_str;
-
- if (!s_optimize_table.ignore(pos, expected))
- return false;
-
- if (!name_p.parse(pos, table, expected))
- return false;
-
- if (s_dot.ignore(pos, expected))
- {
- database = table;
- if (!name_p.parse(pos, table, expected))
- return false;
- }
-
- if (ParserKeyword{"ON"}.ignore(pos, expected) && !ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
- return false;
-
- if (s_partition.ignore(pos, expected))
- {
- if (!partition_p.parse(pos, partition, expected))
- return false;
- }
-
- if (s_final.ignore(pos, expected))
- final = true;
-
- if (s_deduplicate.ignore(pos, expected))
- deduplicate = true;
-
+ ParserToken s_dot(TokenType::Dot);
+ ParserIdentifier name_p;
+ ParserPartition partition_p;
+
+ ASTPtr database;
+ ASTPtr table;
+ ASTPtr partition;
+ bool final = false;
+ bool deduplicate = false;
+ String cluster_str;
+
+ if (!s_optimize_table.ignore(pos, expected))
+ return false;
+
+ if (!name_p.parse(pos, table, expected))
+ return false;
+
+ if (s_dot.ignore(pos, expected))
+ {
+ database = table;
+ if (!name_p.parse(pos, table, expected))
+ return false;
+ }
+
+ if (ParserKeyword{"ON"}.ignore(pos, expected) && !ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
+ return false;
+
+ if (s_partition.ignore(pos, expected))
+ {
+ if (!partition_p.parse(pos, partition, expected))
+ return false;
+ }
+
+ if (s_final.ignore(pos, expected))
+ final = true;
+
+ if (s_deduplicate.ignore(pos, expected))
+ deduplicate = true;
+
ASTPtr deduplicate_by_columns;
if (deduplicate && s_by.ignore(pos, expected))
{
@@ -77,21 +77,21 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
return false;
}
- auto query = std::make_shared<ASTOptimizeQuery>();
- node = query;
-
- tryGetIdentifierNameInto(database, query->database);
- tryGetIdentifierNameInto(table, query->table);
-
- query->cluster = cluster_str;
+ auto query = std::make_shared<ASTOptimizeQuery>();
+ node = query;
+
+ tryGetIdentifierNameInto(database, query->database);
+ tryGetIdentifierNameInto(table, query->table);
+
+ query->cluster = cluster_str;
if ((query->partition = partition))
query->children.push_back(partition);
- query->final = final;
- query->deduplicate = deduplicate;
+ query->final = final;
+ query->deduplicate = deduplicate;
query->deduplicate_by_columns = deduplicate_by_columns;
-
- return true;
-}
-
-
-}
+
+ return true;
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h
index c8294d9ff6..631ff8cbb4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserOptimizeQuery.h
@@ -1,12 +1,12 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/ExpressionElementParsers.h>
-
-
-namespace DB
-{
-
+#pragma once
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+
+namespace DB
+{
+
class ParserOptimizeQueryColumnsSpecification : public IParserBase
{
protected:
@@ -14,13 +14,13 @@ protected:
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
-/** Query OPTIMIZE TABLE [db.]name [PARTITION partition] [FINAL] [DEDUPLICATE]
- */
-class ParserOptimizeQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "OPTIMIZE query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+/** Query OPTIMIZE TABLE [db.]name [PARTITION partition] [FINAL] [DEDUPLICATE]
+ */
+class ParserOptimizeQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "OPTIMIZE query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp
index a3ec4943e1..6ffaf9f2dd 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.cpp
@@ -1,95 +1,95 @@
-#include <Parsers/ParserPartition.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Parsers/ASTPartition.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTFunction.h>
-#include <Common/typeid_cast.h>
-
-namespace DB
-{
-
-bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_id("ID");
- ParserStringLiteral parser_string_literal;
- ParserExpression parser_expr;
-
- Pos begin = pos;
-
- auto partition = std::make_shared<ASTPartition>();
-
- if (s_id.ignore(pos, expected))
- {
- ASTPtr partition_id;
- if (!parser_string_literal.parse(pos, partition_id, expected))
- return false;
-
- partition->id = partition_id->as<ASTLiteral &>().value.get<String>();
- }
- else
- {
- ASTPtr value;
- if (!parser_expr.parse(pos, value, expected))
- return false;
-
- size_t fields_count;
- String fields_str;
-
- const auto * tuple_ast = value->as<ASTFunction>();
- bool surrounded_by_parens = false;
- if (tuple_ast && tuple_ast->name == "tuple")
- {
- surrounded_by_parens = true;
- const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
- if (arguments_ast)
- fields_count = arguments_ast->children.size();
- else
- fields_count = 0;
- }
- else if (const auto * literal = value->as<ASTLiteral>())
- {
- if (literal->value.getType() == Field::Types::Tuple)
- {
- surrounded_by_parens = true;
- fields_count = literal->value.get<const Tuple &>().size();
- }
- else
- {
- fields_count = 1;
- fields_str = String(begin->begin, pos->begin - begin->begin);
- }
- }
- else
- return false;
-
- if (surrounded_by_parens)
- {
- Pos left_paren = begin;
- Pos right_paren = pos;
-
- while (left_paren != right_paren && left_paren->type != TokenType::OpeningRoundBracket)
- ++left_paren;
- if (left_paren->type != TokenType::OpeningRoundBracket)
- return false;
-
- while (right_paren != left_paren && right_paren->type != TokenType::ClosingRoundBracket)
- --right_paren;
- if (right_paren->type != TokenType::ClosingRoundBracket)
- return false;
-
- fields_str = String(left_paren->end, right_paren->begin - left_paren->end);
- }
-
- partition->value = value;
- partition->children.push_back(value);
- partition->fields_str = std::move(fields_str);
- partition->fields_count = fields_count;
- }
-
- node = partition;
- return true;
-}
-
-}
+#include <Parsers/ParserPartition.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Parsers/ASTPartition.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTFunction.h>
+#include <Common/typeid_cast.h>
+
+namespace DB
+{
+
+bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_id("ID");
+ ParserStringLiteral parser_string_literal;
+ ParserExpression parser_expr;
+
+ Pos begin = pos;
+
+ auto partition = std::make_shared<ASTPartition>();
+
+ if (s_id.ignore(pos, expected))
+ {
+ ASTPtr partition_id;
+ if (!parser_string_literal.parse(pos, partition_id, expected))
+ return false;
+
+ partition->id = partition_id->as<ASTLiteral &>().value.get<String>();
+ }
+ else
+ {
+ ASTPtr value;
+ if (!parser_expr.parse(pos, value, expected))
+ return false;
+
+ size_t fields_count;
+ String fields_str;
+
+ const auto * tuple_ast = value->as<ASTFunction>();
+ bool surrounded_by_parens = false;
+ if (tuple_ast && tuple_ast->name == "tuple")
+ {
+ surrounded_by_parens = true;
+ const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
+ if (arguments_ast)
+ fields_count = arguments_ast->children.size();
+ else
+ fields_count = 0;
+ }
+ else if (const auto * literal = value->as<ASTLiteral>())
+ {
+ if (literal->value.getType() == Field::Types::Tuple)
+ {
+ surrounded_by_parens = true;
+ fields_count = literal->value.get<const Tuple &>().size();
+ }
+ else
+ {
+ fields_count = 1;
+ fields_str = String(begin->begin, pos->begin - begin->begin);
+ }
+ }
+ else
+ return false;
+
+ if (surrounded_by_parens)
+ {
+ Pos left_paren = begin;
+ Pos right_paren = pos;
+
+ while (left_paren != right_paren && left_paren->type != TokenType::OpeningRoundBracket)
+ ++left_paren;
+ if (left_paren->type != TokenType::OpeningRoundBracket)
+ return false;
+
+ while (right_paren != left_paren && right_paren->type != TokenType::ClosingRoundBracket)
+ --right_paren;
+ if (right_paren->type != TokenType::ClosingRoundBracket)
+ return false;
+
+ fields_str = String(left_paren->end, right_paren->begin - left_paren->end);
+ }
+
+ partition->value = value;
+ partition->children.push_back(value);
+ partition->fields_str = std::move(fields_str);
+ partition->fields_count = fields_count;
+ }
+
+ node = partition;
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h
index 2bb7048fd8..5f454438bf 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserPartition.h
@@ -1,17 +1,17 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-namespace DB
-{
-
-/// Parse either a partition value as a (possibly compound) literal or a partition ID.
-/// Produce ASTPartition.
-class ParserPartition : public IParserBase
-{
-protected:
- const char * getName() const override { return "partition"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+namespace DB
+{
+
+/// Parse either a partition value as a (possibly compound) literal or a partition ID.
+/// Produce ASTPartition.
+class ParserPartition : public IParserBase
+{
+protected:
+ const char * getName() const override { return "partition"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp
index c42a0af88b..2893611fb0 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.cpp
@@ -1,56 +1,56 @@
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTRenameQuery.h>
-
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ParserRenameQuery.h>
-
-
-namespace DB
-{
-
-
-/// Parse database.table or table.
-static bool parseDatabaseAndTable(
- ASTRenameQuery::Table & db_and_table, IParser::Pos & pos, Expected & expected)
-{
- ParserIdentifier name_p;
- ParserToken s_dot(TokenType::Dot);
-
- ASTPtr database;
- ASTPtr table;
-
- if (!name_p.parse(pos, table, expected))
- return false;
-
- if (s_dot.ignore(pos, expected))
- {
- database = table;
- if (!name_p.parse(pos, table, expected))
- return false;
- }
-
- db_and_table.database.clear();
- tryGetIdentifierNameInto(database, db_and_table.database);
- tryGetIdentifierNameInto(table, db_and_table.table);
-
- return true;
-}
-
-
-bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_rename_table("RENAME TABLE");
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTRenameQuery.h>
+
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ParserRenameQuery.h>
+
+
+namespace DB
+{
+
+
+/// Parse database.table or table.
+static bool parseDatabaseAndTable(
+ ASTRenameQuery::Table & db_and_table, IParser::Pos & pos, Expected & expected)
+{
+ ParserIdentifier name_p;
+ ParserToken s_dot(TokenType::Dot);
+
+ ASTPtr database;
+ ASTPtr table;
+
+ if (!name_p.parse(pos, table, expected))
+ return false;
+
+ if (s_dot.ignore(pos, expected))
+ {
+ database = table;
+ if (!name_p.parse(pos, table, expected))
+ return false;
+ }
+
+ db_and_table.database.clear();
+ tryGetIdentifierNameInto(database, db_and_table.database);
+ tryGetIdentifierNameInto(table, db_and_table.table);
+
+ return true;
+}
+
+
+bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_rename_table("RENAME TABLE");
ParserKeyword s_exchange_tables("EXCHANGE TABLES");
ParserKeyword s_rename_dictionary("RENAME DICTIONARY");
ParserKeyword s_exchange_dictionaries("EXCHANGE DICTIONARIES");
ParserKeyword s_rename_database("RENAME DATABASE");
- ParserKeyword s_to("TO");
- ParserKeyword s_and("AND");
- ParserToken s_comma(TokenType::Comma);
-
- bool exchange = false;
+ ParserKeyword s_to("TO");
+ ParserKeyword s_and("AND");
+ ParserToken s_comma(TokenType::Comma);
+
+ bool exchange = false;
bool dictionary = false;
-
+
if (s_rename_table.ignore(pos, expected))
;
else if (s_exchange_tables.ignore(pos, expected))
@@ -63,12 +63,12 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
dictionary = true;
}
else if (s_rename_database.ignore(pos, expected))
- {
+ {
ASTPtr from_db;
ASTPtr to_db;
ParserIdentifier db_name_p;
if (!db_name_p.parse(pos, from_db, expected))
- return false;
+ return false;
if (!s_to.ignore(pos, expected))
return false;
if (!db_name_p.parse(pos, to_db, expected))
@@ -89,43 +89,43 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
query->cluster = cluster_str;
node = query;
return true;
- }
+ }
else
return false;
-
- ASTRenameQuery::Elements elements;
-
+
+ ASTRenameQuery::Elements elements;
+
const auto ignore_delim = [&] { return exchange ? s_and.ignore(pos) : s_to.ignore(pos); };
-
- while (true)
- {
- if (!elements.empty() && !s_comma.ignore(pos))
- break;
-
+
+ while (true)
+ {
+ if (!elements.empty() && !s_comma.ignore(pos))
+ break;
+
ASTRenameQuery::Element& ref = elements.emplace_back();
-
+
if (!parseDatabaseAndTable(ref.from, pos, expected)
- || !ignore_delim()
+ || !ignore_delim()
|| !parseDatabaseAndTable(ref.to, pos, expected))
- return false;
- }
-
- String cluster_str;
- if (ParserKeyword{"ON"}.ignore(pos, expected))
- {
- if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
- return false;
- }
-
- auto query = std::make_shared<ASTRenameQuery>();
- query->cluster = cluster_str;
- node = query;
-
- query->elements = elements;
- query->exchange = exchange;
+ return false;
+ }
+
+ String cluster_str;
+ if (ParserKeyword{"ON"}.ignore(pos, expected))
+ {
+ if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
+ return false;
+ }
+
+ auto query = std::make_shared<ASTRenameQuery>();
+ query->cluster = cluster_str;
+ node = query;
+
+ query->elements = elements;
+ query->exchange = exchange;
query->dictionary = dictionary;
- return true;
-}
-
-
-}
+ return true;
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h
index c95bc893b5..b79b7de6a5 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRenameQuery.h
@@ -1,21 +1,21 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/ExpressionElementParsers.h>
-
-
-namespace DB
-{
-
-/** Query like this:
- * RENAME TABLE [db.]name TO [db.]name, [db.]name TO [db.]name, ...
- * (An arbitrary number of tables can be renamed.)
- */
-class ParserRenameQuery : public IParserBase
-{
-protected:
- const char * getName() const override{ return "RENAME query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+
+namespace DB
+{
+
+/** Query like this:
+ * RENAME TABLE [db.]name TO [db.]name, [db.]name TO [db.]name, ...
+ * (An arbitrary number of tables can be renamed.)
+ */
+class ParserRenameQuery : public IParserBase
+{
+protected:
+ const char * getName() const override{ return "RENAME query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp
index 41e9ee6501..86195accc9 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.cpp
@@ -1,145 +1,145 @@
-#include <Parsers/ParserRolesOrUsersSet.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTRolesOrUsersSet.h>
-#include <Parsers/parseUserName.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <boost/range/algorithm/find.hpp>
-
-
-namespace DB
-{
-namespace
-{
+#include <Parsers/ParserRolesOrUsersSet.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTRolesOrUsersSet.h>
+#include <Parsers/parseUserName.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <boost/range/algorithm/find.hpp>
+
+
+namespace DB
+{
+namespace
+{
bool parseNameOrID(IParserBase::Pos & pos, Expected & expected, bool id_mode, String & res)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- if (!id_mode)
- return parseRoleName(pos, expected, res);
-
- if (!ParserKeyword{"ID"}.ignore(pos, expected))
- return false;
- if (!ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
- return false;
- ASTPtr ast;
- if (!ParserStringLiteral{}.parse(pos, ast, expected))
- return false;
- String id = ast->as<ASTLiteral &>().value.safeGet<String>();
- if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected))
- return false;
-
- res = std::move(id);
- return true;
- });
- }
-
- bool parseBeforeExcept(
- IParserBase::Pos & pos,
- Expected & expected,
- bool id_mode,
- bool allow_all,
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ if (!id_mode)
+ return parseRoleName(pos, expected, res);
+
+ if (!ParserKeyword{"ID"}.ignore(pos, expected))
+ return false;
+ if (!ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
+ return false;
+ ASTPtr ast;
+ if (!ParserStringLiteral{}.parse(pos, ast, expected))
+ return false;
+ String id = ast->as<ASTLiteral &>().value.safeGet<String>();
+ if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected))
+ return false;
+
+ res = std::move(id);
+ return true;
+ });
+ }
+
+ bool parseBeforeExcept(
+ IParserBase::Pos & pos,
+ Expected & expected,
+ bool id_mode,
+ bool allow_all,
bool allow_any,
- bool allow_current_user,
+ bool allow_current_user,
bool & all,
- Strings & names,
- bool & current_user)
- {
- bool res_all = false;
+ Strings & names,
+ bool & current_user)
+ {
+ bool res_all = false;
Strings res_names;
- bool res_current_user = false;
+ bool res_current_user = false;
Strings res_with_roles_names;
-
- auto parse_element = [&]
- {
- if (ParserKeyword{"NONE"}.ignore(pos, expected))
- return true;
-
- if (allow_all && ParserKeyword{"ALL"}.ignore(pos, expected))
- {
- res_all = true;
- return true;
- }
-
+
+ auto parse_element = [&]
+ {
+ if (ParserKeyword{"NONE"}.ignore(pos, expected))
+ return true;
+
+ if (allow_all && ParserKeyword{"ALL"}.ignore(pos, expected))
+ {
+ res_all = true;
+ return true;
+ }
+
if (allow_any && ParserKeyword{"ANY"}.ignore(pos, expected))
{
res_all = true;
return true;
}
- if (allow_current_user && parseCurrentUserTag(pos, expected))
- {
- res_current_user = true;
- return true;
- }
-
- String name;
+ if (allow_current_user && parseCurrentUserTag(pos, expected))
+ {
+ res_current_user = true;
+ return true;
+ }
+
+ String name;
if (parseNameOrID(pos, expected, id_mode, name))
- {
- res_names.emplace_back(std::move(name));
- return true;
- }
-
- return false;
- };
-
- if (!ParserList::parseUtil(pos, expected, parse_element, false))
- return false;
-
- names = std::move(res_names);
+ {
+ res_names.emplace_back(std::move(name));
+ return true;
+ }
+
+ return false;
+ };
+
+ if (!ParserList::parseUtil(pos, expected, parse_element, false))
+ return false;
+
+ names = std::move(res_names);
current_user = res_current_user;
- all = res_all;
- return true;
- }
-
- bool parseExceptAndAfterExcept(
- IParserBase::Pos & pos,
- Expected & expected,
- bool id_mode,
- bool allow_current_user,
- Strings & except_names,
- bool & except_current_user)
- {
+ all = res_all;
+ return true;
+ }
+
+ bool parseExceptAndAfterExcept(
+ IParserBase::Pos & pos,
+ Expected & expected,
+ bool id_mode,
+ bool allow_current_user,
+ Strings & except_names,
+ bool & except_current_user)
+ {
return IParserBase::wrapParseImpl(pos, [&] {
- if (!ParserKeyword{"EXCEPT"}.ignore(pos, expected))
- return false;
-
- bool unused;
+ if (!ParserKeyword{"EXCEPT"}.ignore(pos, expected))
+ return false;
+
+ bool unused;
return parseBeforeExcept(pos, expected, id_mode, false, false, allow_current_user, unused, except_names, except_current_user);
- });
- }
-}
-
-
-bool ParserRolesOrUsersSet::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
+ });
+ }
+}
+
+
+bool ParserRolesOrUsersSet::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
bool all = false;
- Strings names;
- bool current_user = false;
- Strings except_names;
- bool except_current_user = false;
-
+ Strings names;
+ bool current_user = false;
+ Strings except_names;
+ bool except_current_user = false;
+
if (!parseBeforeExcept(pos, expected, id_mode, allow_all, allow_any, allow_current_user, all, names, current_user))
- return false;
-
- parseExceptAndAfterExcept(pos, expected, id_mode, allow_current_user, except_names, except_current_user);
-
- if (all)
- names.clear();
-
- auto result = std::make_shared<ASTRolesOrUsersSet>();
- result->names = std::move(names);
- result->current_user = current_user;
- result->all = all;
- result->except_names = std::move(except_names);
- result->except_current_user = except_current_user;
+ return false;
+
+ parseExceptAndAfterExcept(pos, expected, id_mode, allow_current_user, except_names, except_current_user);
+
+ if (all)
+ names.clear();
+
+ auto result = std::make_shared<ASTRolesOrUsersSet>();
+ result->names = std::move(names);
+ result->current_user = current_user;
+ result->all = all;
+ result->except_names = std::move(except_names);
+ result->except_current_user = except_current_user;
result->allow_users = allow_users;
result->allow_roles = allow_roles;
- result->id_mode = id_mode;
+ result->id_mode = id_mode;
result->use_keyword_any = all && allow_any && !allow_all;
- node = result;
- return true;
-}
-
-}
+ node = result;
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h
index 9ae9937e78..46cb0bbd43 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserRolesOrUsersSet.h
@@ -1,35 +1,35 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-/** Parses a string like this:
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+/** Parses a string like this:
* {user_name | role_name | CURRENT_USER | ALL | NONE} [,...]
* [EXCEPT {user_name | role_name | CURRENT_USER | ALL | NONE} [,...]]
- */
-class ParserRolesOrUsersSet : public IParserBase
-{
-public:
- ParserRolesOrUsersSet & allowAll(bool allow_all_ = true) { allow_all = allow_all_; return *this; }
+ */
+class ParserRolesOrUsersSet : public IParserBase
+{
+public:
+ ParserRolesOrUsersSet & allowAll(bool allow_all_ = true) { allow_all = allow_all_; return *this; }
ParserRolesOrUsersSet & allowAny(bool allow_any_ = true) { allow_any = allow_any_; return *this; }
ParserRolesOrUsersSet & allowUsers(bool allow_users_ = true) { allow_users = allow_users_; return *this; }
- ParserRolesOrUsersSet & allowCurrentUser(bool allow_current_user_ = true) { allow_current_user = allow_current_user_; return *this; }
+ ParserRolesOrUsersSet & allowCurrentUser(bool allow_current_user_ = true) { allow_current_user = allow_current_user_; return *this; }
ParserRolesOrUsersSet & allowRoles(bool allow_roles_ = true) { allow_roles = allow_roles_; return *this; }
- ParserRolesOrUsersSet & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; }
-
-protected:
- const char * getName() const override { return "RolesOrUsersSet"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-
-private:
- bool allow_all = false;
+ ParserRolesOrUsersSet & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; }
+
+protected:
+ const char * getName() const override { return "RolesOrUsersSet"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+private:
+ bool allow_all = false;
bool allow_any = false;
bool allow_users = false;
- bool allow_current_user = false;
+ bool allow_current_user = false;
bool allow_roles = false;
- bool id_mode = false;
-};
-
-}
+ bool id_mode = false;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp
index 678474af04..2ddb27cc83 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.cpp
@@ -1,86 +1,86 @@
-#include <Parsers/ParserSetRoleQuery.h>
-#include <Parsers/ASTSetRoleQuery.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ASTRolesOrUsersSet.h>
-#include <Parsers/ParserRolesOrUsersSet.h>
-
-
-namespace DB
-{
-namespace
-{
- bool parseRoles(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTRolesOrUsersSet> & roles)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- ASTPtr ast;
- ParserRolesOrUsersSet roles_p;
+#include <Parsers/ParserSetRoleQuery.h>
+#include <Parsers/ASTSetRoleQuery.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ASTRolesOrUsersSet.h>
+#include <Parsers/ParserRolesOrUsersSet.h>
+
+
+namespace DB
+{
+namespace
+{
+ bool parseRoles(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTRolesOrUsersSet> & roles)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ ASTPtr ast;
+ ParserRolesOrUsersSet roles_p;
roles_p.allowRoles().allowAll();
- if (!roles_p.parse(pos, ast, expected))
- return false;
-
- roles = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(ast);
+ if (!roles_p.parse(pos, ast, expected))
+ return false;
+
+ roles = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(ast);
roles->allow_users = false;
- return true;
- });
- }
-
- bool parseToUsers(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTRolesOrUsersSet> & to_users)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- if (!ParserKeyword{"TO"}.ignore(pos, expected))
- return false;
-
- ASTPtr ast;
- ParserRolesOrUsersSet users_p;
+ return true;
+ });
+ }
+
+ bool parseToUsers(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTRolesOrUsersSet> & to_users)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ if (!ParserKeyword{"TO"}.ignore(pos, expected))
+ return false;
+
+ ASTPtr ast;
+ ParserRolesOrUsersSet users_p;
users_p.allowUsers().allowCurrentUser();
- if (!users_p.parse(pos, ast, expected))
- return false;
-
- to_users = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(ast);
+ if (!users_p.parse(pos, ast, expected))
+ return false;
+
+ to_users = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(ast);
to_users->allow_roles = false;
- return true;
- });
- }
-}
-
-
-bool ParserSetRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- using Kind = ASTSetRoleQuery::Kind;
- Kind kind;
- if (ParserKeyword{"SET ROLE DEFAULT"}.ignore(pos, expected))
- kind = Kind::SET_ROLE_DEFAULT;
- else if (ParserKeyword{"SET ROLE"}.ignore(pos, expected))
- kind = Kind::SET_ROLE;
- else if (ParserKeyword{"SET DEFAULT ROLE"}.ignore(pos, expected))
- kind = Kind::SET_DEFAULT_ROLE;
- else
- return false;
-
- std::shared_ptr<ASTRolesOrUsersSet> roles;
- std::shared_ptr<ASTRolesOrUsersSet> to_users;
-
- if ((kind == Kind::SET_ROLE) || (kind == Kind::SET_DEFAULT_ROLE))
- {
- if (!parseRoles(pos, expected, roles))
- return false;
-
- if (kind == Kind::SET_DEFAULT_ROLE)
- {
- if (!parseToUsers(pos, expected, to_users))
- return false;
- }
- }
-
- auto query = std::make_shared<ASTSetRoleQuery>();
- node = query;
-
- query->kind = kind;
- query->roles = std::move(roles);
- query->to_users = std::move(to_users);
-
- return true;
-}
-}
+ return true;
+ });
+ }
+}
+
+
+bool ParserSetRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ using Kind = ASTSetRoleQuery::Kind;
+ Kind kind;
+ if (ParserKeyword{"SET ROLE DEFAULT"}.ignore(pos, expected))
+ kind = Kind::SET_ROLE_DEFAULT;
+ else if (ParserKeyword{"SET ROLE"}.ignore(pos, expected))
+ kind = Kind::SET_ROLE;
+ else if (ParserKeyword{"SET DEFAULT ROLE"}.ignore(pos, expected))
+ kind = Kind::SET_DEFAULT_ROLE;
+ else
+ return false;
+
+ std::shared_ptr<ASTRolesOrUsersSet> roles;
+ std::shared_ptr<ASTRolesOrUsersSet> to_users;
+
+ if ((kind == Kind::SET_ROLE) || (kind == Kind::SET_DEFAULT_ROLE))
+ {
+ if (!parseRoles(pos, expected, roles))
+ return false;
+
+ if (kind == Kind::SET_DEFAULT_ROLE)
+ {
+ if (!parseToUsers(pos, expected, to_users))
+ return false;
+ }
+ }
+
+ auto query = std::make_shared<ASTSetRoleQuery>();
+ node = query;
+
+ query->kind = kind;
+ query->roles = std::move(roles);
+ query->to_users = std::move(to_users);
+
+ return true;
+}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h
index 7e59f08e7b..54ced410d3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSetRoleQuery.h
@@ -1,18 +1,18 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-/** Parses queries like
- * SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]}
- * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...]
- */
-class ParserSetRoleQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SET ROLE or SET DEFAULT ROLE query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+/** Parses queries like
+ * SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]}
+ * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...]
+ */
+class ParserSetRoleQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "SET ROLE or SET DEFAULT ROLE query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp
index d7d982efe2..a30092ae71 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.cpp
@@ -1,20 +1,20 @@
-#include <Parsers/ParserSettingsProfileElement.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTSettingsProfileElement.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/parseIdentifierOrStringLiteral.h>
+#include <Parsers/ParserSettingsProfileElement.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ASTSettingsProfileElement.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <boost/algorithm/string/predicate.hpp>
-
-
-namespace DB
-{
-namespace
-{
- bool parseProfileKeyword(IParserBase::Pos & pos, Expected & expected, bool use_inherit_keyword)
- {
+
+
+namespace DB
+{
+namespace
+{
+ bool parseProfileKeyword(IParserBase::Pos & pos, Expected & expected, bool use_inherit_keyword)
+ {
if (ParserKeyword{"PROFILE"}.ignore(pos, expected))
return true;
@@ -25,127 +25,127 @@ namespace
}
return false;
- }
-
-
- bool parseProfileNameOrID(IParserBase::Pos & pos, Expected & expected, bool id_mode, String & res)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- ASTPtr ast;
- if (!id_mode)
- return parseIdentifierOrStringLiteral(pos, expected, res);
-
- if (!ParserKeyword{"ID"}.ignore(pos, expected))
- return false;
- if (!ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
- return false;
- if (!ParserStringLiteral{}.parse(pos, ast, expected))
- return false;
- String id = ast->as<ASTLiteral &>().value.safeGet<String>();
- if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected))
- return false;
-
- res = std::move(id);
- return true;
- });
- }
-
-
- bool parseValue(IParserBase::Pos & pos, Expected & expected, Field & res)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- if (!ParserToken{TokenType::Equals}.ignore(pos, expected))
- return false;
-
- ASTPtr ast;
- if (!ParserLiteral{}.parse(pos, ast, expected))
- return false;
-
- res = ast->as<ASTLiteral &>().value;
- return true;
- });
- }
-
-
- bool parseMinMaxValue(IParserBase::Pos & pos, Expected & expected, Field & min_value, Field & max_value)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- bool is_min_value = ParserKeyword{"MIN"}.ignore(pos, expected);
- bool is_max_value = !is_min_value && ParserKeyword{"MAX"}.ignore(pos, expected);
- if (!is_min_value && !is_max_value)
- return false;
-
- ParserToken{TokenType::Equals}.ignore(pos, expected);
-
- ASTPtr ast;
- if (!ParserLiteral{}.parse(pos, ast, expected))
- return false;
-
- auto min_or_max_value = ast->as<ASTLiteral &>().value;
-
- if (is_min_value)
- min_value = min_or_max_value;
- else
- max_value = min_or_max_value;
- return true;
- });
- }
-
-
- bool parseReadonlyOrWritableKeyword(IParserBase::Pos & pos, Expected & expected, std::optional<bool> & readonly)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- if (ParserKeyword{"READONLY"}.ignore(pos, expected))
- {
- readonly = true;
- return true;
- }
- else if (ParserKeyword{"WRITABLE"}.ignore(pos, expected))
- {
- readonly = false;
- return true;
- }
- else
- return false;
- });
- }
-
-
- bool parseSettingNameWithValueOrConstraints(
- IParserBase::Pos & pos,
- Expected & expected,
- String & setting_name,
- Field & value,
- Field & min_value,
- Field & max_value,
- std::optional<bool> & readonly)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- ASTPtr name_ast;
+ }
+
+
+ bool parseProfileNameOrID(IParserBase::Pos & pos, Expected & expected, bool id_mode, String & res)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ ASTPtr ast;
+ if (!id_mode)
+ return parseIdentifierOrStringLiteral(pos, expected, res);
+
+ if (!ParserKeyword{"ID"}.ignore(pos, expected))
+ return false;
+ if (!ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
+ return false;
+ if (!ParserStringLiteral{}.parse(pos, ast, expected))
+ return false;
+ String id = ast->as<ASTLiteral &>().value.safeGet<String>();
+ if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected))
+ return false;
+
+ res = std::move(id);
+ return true;
+ });
+ }
+
+
+ bool parseValue(IParserBase::Pos & pos, Expected & expected, Field & res)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ if (!ParserToken{TokenType::Equals}.ignore(pos, expected))
+ return false;
+
+ ASTPtr ast;
+ if (!ParserLiteral{}.parse(pos, ast, expected))
+ return false;
+
+ res = ast->as<ASTLiteral &>().value;
+ return true;
+ });
+ }
+
+
+ bool parseMinMaxValue(IParserBase::Pos & pos, Expected & expected, Field & min_value, Field & max_value)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ bool is_min_value = ParserKeyword{"MIN"}.ignore(pos, expected);
+ bool is_max_value = !is_min_value && ParserKeyword{"MAX"}.ignore(pos, expected);
+ if (!is_min_value && !is_max_value)
+ return false;
+
+ ParserToken{TokenType::Equals}.ignore(pos, expected);
+
+ ASTPtr ast;
+ if (!ParserLiteral{}.parse(pos, ast, expected))
+ return false;
+
+ auto min_or_max_value = ast->as<ASTLiteral &>().value;
+
+ if (is_min_value)
+ min_value = min_or_max_value;
+ else
+ max_value = min_or_max_value;
+ return true;
+ });
+ }
+
+
+ bool parseReadonlyOrWritableKeyword(IParserBase::Pos & pos, Expected & expected, std::optional<bool> & readonly)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ if (ParserKeyword{"READONLY"}.ignore(pos, expected))
+ {
+ readonly = true;
+ return true;
+ }
+ else if (ParserKeyword{"WRITABLE"}.ignore(pos, expected))
+ {
+ readonly = false;
+ return true;
+ }
+ else
+ return false;
+ });
+ }
+
+
+ bool parseSettingNameWithValueOrConstraints(
+ IParserBase::Pos & pos,
+ Expected & expected,
+ String & setting_name,
+ Field & value,
+ Field & min_value,
+ Field & max_value,
+ std::optional<bool> & readonly)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ ASTPtr name_ast;
if (!ParserCompoundIdentifier{}.parse(pos, name_ast, expected))
- return false;
-
- String res_setting_name = getIdentifierName(name_ast);
- Field res_value;
- Field res_min_value;
- Field res_max_value;
- std::optional<bool> res_readonly;
-
- bool has_value_or_constraint = false;
- while (parseValue(pos, expected, res_value) || parseMinMaxValue(pos, expected, res_min_value, res_max_value)
- || parseReadonlyOrWritableKeyword(pos, expected, res_readonly))
- {
- has_value_or_constraint = true;
- }
-
- if (!has_value_or_constraint)
- return false;
-
+ return false;
+
+ String res_setting_name = getIdentifierName(name_ast);
+ Field res_value;
+ Field res_min_value;
+ Field res_max_value;
+ std::optional<bool> res_readonly;
+
+ bool has_value_or_constraint = false;
+ while (parseValue(pos, expected, res_value) || parseMinMaxValue(pos, expected, res_min_value, res_max_value)
+ || parseReadonlyOrWritableKeyword(pos, expected, res_readonly))
+ {
+ has_value_or_constraint = true;
+ }
+
+ if (!has_value_or_constraint)
+ return false;
+
if (boost::iequals(res_setting_name, "PROFILE") && res_value.isNull() && res_min_value.isNull() && res_max_value.isNull()
&& res_readonly)
{
@@ -155,96 +155,96 @@ namespace
return false;
}
- setting_name = std::move(res_setting_name);
- value = std::move(res_value);
- min_value = std::move(res_min_value);
- max_value = std::move(res_max_value);
- readonly = res_readonly;
- return true;
- });
- }
-
-
- bool parseSettingsProfileElement(IParserBase::Pos & pos,
- Expected & expected,
- bool id_mode,
- bool use_inherit_keyword,
- bool previous_element_was_parent_profile,
- std::shared_ptr<ASTSettingsProfileElement> & result)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- String parent_profile;
- String setting_name;
- Field value;
- Field min_value;
- Field max_value;
- std::optional<bool> readonly;
-
+ setting_name = std::move(res_setting_name);
+ value = std::move(res_value);
+ min_value = std::move(res_min_value);
+ max_value = std::move(res_max_value);
+ readonly = res_readonly;
+ return true;
+ });
+ }
+
+
+ bool parseSettingsProfileElement(IParserBase::Pos & pos,
+ Expected & expected,
+ bool id_mode,
+ bool use_inherit_keyword,
+ bool previous_element_was_parent_profile,
+ std::shared_ptr<ASTSettingsProfileElement> & result)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ String parent_profile;
+ String setting_name;
+ Field value;
+ Field min_value;
+ Field max_value;
+ std::optional<bool> readonly;
+
bool ok = parseSettingNameWithValueOrConstraints(pos, expected, setting_name, value, min_value, max_value, readonly);
if (!ok && (parseProfileKeyword(pos, expected, use_inherit_keyword) || previous_element_was_parent_profile))
ok = parseProfileNameOrID(pos, expected, id_mode, parent_profile);
if (!ok)
- return false;
-
- result = std::make_shared<ASTSettingsProfileElement>();
- result->parent_profile = std::move(parent_profile);
- result->setting_name = std::move(setting_name);
- result->value = std::move(value);
- result->min_value = std::move(min_value);
- result->max_value = std::move(max_value);
- result->readonly = readonly;
- result->id_mode = id_mode;
- result->use_inherit_keyword = use_inherit_keyword;
- return true;
- });
- }
-}
-
-
-bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- std::shared_ptr<ASTSettingsProfileElement> res;
- if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, false, res))
- return false;
-
- node = res;
- return true;
-}
-
-
-bool ParserSettingsProfileElements::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- std::vector<std::shared_ptr<ASTSettingsProfileElement>> elements;
-
- if (ParserKeyword{"NONE"}.ignore(pos, expected))
- {
- }
- else
- {
- bool previous_element_was_parent_profile = false;
-
- auto parse_element = [&]
- {
- std::shared_ptr<ASTSettingsProfileElement> element;
- if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, previous_element_was_parent_profile, element))
- return false;
-
- elements.push_back(element);
- previous_element_was_parent_profile = !element->parent_profile.empty();
- return true;
- };
-
- if (!ParserList::parseUtil(pos, expected, parse_element, false))
- return false;
- }
-
- auto result = std::make_shared<ASTSettingsProfileElements>();
- result->elements = std::move(elements);
- node = result;
- return true;
-}
-
-}
+ return false;
+
+ result = std::make_shared<ASTSettingsProfileElement>();
+ result->parent_profile = std::move(parent_profile);
+ result->setting_name = std::move(setting_name);
+ result->value = std::move(value);
+ result->min_value = std::move(min_value);
+ result->max_value = std::move(max_value);
+ result->readonly = readonly;
+ result->id_mode = id_mode;
+ result->use_inherit_keyword = use_inherit_keyword;
+ return true;
+ });
+ }
+}
+
+
+bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ std::shared_ptr<ASTSettingsProfileElement> res;
+ if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, false, res))
+ return false;
+
+ node = res;
+ return true;
+}
+
+
+bool ParserSettingsProfileElements::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ std::vector<std::shared_ptr<ASTSettingsProfileElement>> elements;
+
+ if (ParserKeyword{"NONE"}.ignore(pos, expected))
+ {
+ }
+ else
+ {
+ bool previous_element_was_parent_profile = false;
+
+ auto parse_element = [&]
+ {
+ std::shared_ptr<ASTSettingsProfileElement> element;
+ if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, previous_element_was_parent_profile, element))
+ return false;
+
+ elements.push_back(element);
+ previous_element_was_parent_profile = !element->parent_profile.empty();
+ return true;
+ };
+
+ if (!ParserList::parseUtil(pos, expected, parse_element, false))
+ return false;
+ }
+
+ auto result = std::make_shared<ASTSettingsProfileElements>();
+ result->elements = std::move(elements);
+ node = result;
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h
index 8843591a56..a54adc4f88 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSettingsProfileElement.h
@@ -1,42 +1,42 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-/** Parses a string like this:
- * {variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name'
- */
-class ParserSettingsProfileElement : public IParserBase
-{
-public:
- ParserSettingsProfileElement & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; }
- ParserSettingsProfileElement & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; }
-
-protected:
- const char * getName() const override { return "SettingsProfileElement"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-
-private:
- bool id_mode = false;
- bool use_inherit_keyword = false;
-};
-
-
-class ParserSettingsProfileElements : public IParserBase
-{
-public:
- ParserSettingsProfileElements & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; }
- ParserSettingsProfileElements & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; }
-
-protected:
- const char * getName() const override { return "SettingsProfileElements"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-
-private:
- bool id_mode = false;
- bool use_inherit_keyword = false;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+/** Parses a string like this:
+ * {variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE]} | PROFILE 'profile_name'
+ */
+class ParserSettingsProfileElement : public IParserBase
+{
+public:
+ ParserSettingsProfileElement & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; }
+ ParserSettingsProfileElement & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; }
+
+protected:
+ const char * getName() const override { return "SettingsProfileElement"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+private:
+ bool id_mode = false;
+ bool use_inherit_keyword = false;
+};
+
+
+class ParserSettingsProfileElements : public IParserBase
+{
+public:
+ ParserSettingsProfileElements & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; }
+ ParserSettingsProfileElements & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; }
+
+protected:
+ const char * getName() const override { return "SettingsProfileElements"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+private:
+ bool id_mode = false;
+ bool use_inherit_keyword = false;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h
index b6483aa3d4..fe257168dc 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h
@@ -1,32 +1,32 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTShowAccessQuery.h>
-
-
-namespace DB
-{
-
-/** Query SHOW ACCESS
- */
-class ParserShowAccessQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SHOW ACCESS query"; }
-
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
- {
- auto query = std::make_shared<ASTShowAccessQuery>();
-
- if (!ParserKeyword("SHOW ACCESS").ignore(pos, expected))
- return false;
-
- node = query;
-
- return true;
- }
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ASTShowAccessQuery.h>
+
+
+namespace DB
+{
+
+/** Query SHOW ACCESS
+ */
+class ParserShowAccessQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "SHOW ACCESS query"; }
+
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
+ {
+ auto query = std::make_shared<ASTShowAccessQuery>();
+
+ if (!ParserKeyword("SHOW ACCESS").ignore(pos, expected))
+ return false;
+
+ node = query;
+
+ return true;
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp
index bd9e401277..b5ef45122d 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.cpp
@@ -1,40 +1,40 @@
-#include <Parsers/ParserShowGrantsQuery.h>
-#include <Parsers/ParserRolesOrUsersSet.h>
-#include <Parsers/ASTRolesOrUsersSet.h>
-#include <Parsers/ASTShowGrantsQuery.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/parseUserName.h>
-
-
-namespace DB
-{
-bool ParserShowGrantsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- if (!ParserKeyword{"SHOW GRANTS"}.ignore(pos, expected))
- return false;
-
- std::shared_ptr<ASTRolesOrUsersSet> for_roles;
-
- if (ParserKeyword{"FOR"}.ignore(pos, expected))
- {
- ASTPtr for_roles_ast;
- ParserRolesOrUsersSet for_roles_p;
+#include <Parsers/ParserShowGrantsQuery.h>
+#include <Parsers/ParserRolesOrUsersSet.h>
+#include <Parsers/ASTRolesOrUsersSet.h>
+#include <Parsers/ASTShowGrantsQuery.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/parseUserName.h>
+
+
+namespace DB
+{
+bool ParserShowGrantsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ if (!ParserKeyword{"SHOW GRANTS"}.ignore(pos, expected))
+ return false;
+
+ std::shared_ptr<ASTRolesOrUsersSet> for_roles;
+
+ if (ParserKeyword{"FOR"}.ignore(pos, expected))
+ {
+ ASTPtr for_roles_ast;
+ ParserRolesOrUsersSet for_roles_p;
for_roles_p.allowUsers().allowRoles().allowAll().allowCurrentUser();
- if (!for_roles_p.parse(pos, for_roles_ast, expected))
- return false;
-
- for_roles = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(for_roles_ast);
- }
- else
- {
- for_roles = std::make_shared<ASTRolesOrUsersSet>();
- for_roles->current_user = true;
- }
-
- auto query = std::make_shared<ASTShowGrantsQuery>();
- query->for_roles = std::move(for_roles);
- node = query;
-
- return true;
-}
-}
+ if (!for_roles_p.parse(pos, for_roles_ast, expected))
+ return false;
+
+ for_roles = typeid_cast<std::shared_ptr<ASTRolesOrUsersSet>>(for_roles_ast);
+ }
+ else
+ {
+ for_roles = std::make_shared<ASTRolesOrUsersSet>();
+ for_roles->current_user = true;
+ }
+
+ auto query = std::make_shared<ASTShowGrantsQuery>();
+ query->for_roles = std::move(for_roles);
+ node = query;
+
+ return true;
+}
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h
index 88409b5b7e..bfb1afb8ca 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowGrantsQuery.h
@@ -1,17 +1,17 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-/** Parses queries like
- * SHOW GRANTS [FOR user_name]
- */
-class ParserShowGrantsQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SHOW GRANTS query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+/** Parses queries like
+ * SHOW GRANTS [FOR user_name]
+ */
+class ParserShowGrantsQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "SHOW GRANTS query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp
index 56b4327dcc..de110f617f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.cpp
@@ -1,21 +1,21 @@
-#include <Parsers/ParserShowPrivilegesQuery.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ASTShowPrivilegesQuery.h>
-
-
-namespace DB
-{
-
-bool ParserShowPrivilegesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- auto query = std::make_shared<ASTShowPrivilegesQuery>();
-
- if (!ParserKeyword("SHOW PRIVILEGES").ignore(pos, expected))
- return false;
-
- node = query;
-
- return true;
-}
-
-}
+#include <Parsers/ParserShowPrivilegesQuery.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ASTShowPrivilegesQuery.h>
+
+
+namespace DB
+{
+
+bool ParserShowPrivilegesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ auto query = std::make_shared<ASTShowPrivilegesQuery>();
+
+ if (!ParserKeyword("SHOW PRIVILEGES").ignore(pos, expected))
+ return false;
+
+ node = query;
+
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h
index 2604e7f28c..38aa76e7ea 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowPrivilegesQuery.h
@@ -1,18 +1,18 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-
-/** Query SHOW PRIVILEGES
- */
-class ParserShowPrivilegesQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SHOW PRIVILEGES query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+
+/** Query SHOW PRIVILEGES
+ */
+class ParserShowPrivilegesQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "SHOW PRIVILEGES query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h
index de08894e05..0eec1d4c90 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h
@@ -1,32 +1,32 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTShowProcesslistQuery.h>
-
-
-namespace DB
-{
-
-/** Query SHOW PROCESSLIST
- */
-class ParserShowProcesslistQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SHOW PROCESSLIST query"; }
-
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
- {
- auto query = std::make_shared<ASTShowProcesslistQuery>();
-
- if (!ParserKeyword("SHOW PROCESSLIST").ignore(pos, expected))
- return false;
-
- node = query;
-
- return true;
- }
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ASTShowProcesslistQuery.h>
+
+
+namespace DB
+{
+
+/** Query SHOW PROCESSLIST
+ */
+class ParserShowProcesslistQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "SHOW PROCESSLIST query"; }
+
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
+ {
+ auto query = std::make_shared<ASTShowProcesslistQuery>();
+
+ if (!ParserKeyword("SHOW PROCESSLIST").ignore(pos, expected))
+ return false;
+
+ node = query;
+
+ return true;
+ }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp
index e8cf732d09..763c60f78a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.cpp
@@ -1,53 +1,53 @@
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTShowTablesQuery.h>
-
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ParserShowTablesQuery.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Parsers/parseIdentifierOrStringLiteral.h>
-
-#include <Common/typeid_cast.h>
-
-
-namespace DB
-{
-
-
-bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_show("SHOW");
- ParserKeyword s_temporary("TEMPORARY");
- ParserKeyword s_tables("TABLES");
- ParserKeyword s_databases("DATABASES");
- ParserKeyword s_clusters("CLUSTERS");
- ParserKeyword s_cluster("CLUSTER");
- ParserKeyword s_dictionaries("DICTIONARIES");
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTShowTablesQuery.h>
+
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ParserShowTablesQuery.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Parsers/parseIdentifierOrStringLiteral.h>
+
+#include <Common/typeid_cast.h>
+
+
+namespace DB
+{
+
+
+bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_show("SHOW");
+ ParserKeyword s_temporary("TEMPORARY");
+ ParserKeyword s_tables("TABLES");
+ ParserKeyword s_databases("DATABASES");
+ ParserKeyword s_clusters("CLUSTERS");
+ ParserKeyword s_cluster("CLUSTER");
+ ParserKeyword s_dictionaries("DICTIONARIES");
ParserKeyword s_settings("SETTINGS");
ParserKeyword s_changed("CHANGED");
- ParserKeyword s_from("FROM");
- ParserKeyword s_in("IN");
- ParserKeyword s_not("NOT");
- ParserKeyword s_like("LIKE");
- ParserKeyword s_ilike("ILIKE");
- ParserKeyword s_where("WHERE");
- ParserKeyword s_limit("LIMIT");
- ParserStringLiteral like_p;
- ParserIdentifier name_p;
- ParserExpressionWithOptionalAlias exp_elem(false);
-
- ASTPtr like;
- ASTPtr database;
-
- auto query = std::make_shared<ASTShowTablesQuery>();
-
- if (!s_show.ignore(pos, expected))
- return false;
-
+ ParserKeyword s_from("FROM");
+ ParserKeyword s_in("IN");
+ ParserKeyword s_not("NOT");
+ ParserKeyword s_like("LIKE");
+ ParserKeyword s_ilike("ILIKE");
+ ParserKeyword s_where("WHERE");
+ ParserKeyword s_limit("LIMIT");
+ ParserStringLiteral like_p;
+ ParserIdentifier name_p;
+ ParserExpressionWithOptionalAlias exp_elem(false);
+
+ ASTPtr like;
+ ASTPtr database;
+
+ auto query = std::make_shared<ASTShowTablesQuery>();
+
+ if (!s_show.ignore(pos, expected))
+ return false;
+
if (s_databases.ignore(pos, expected))
- {
- query->databases = true;
+ {
+ query->databases = true;
if (s_not.ignore(pos, expected))
query->not_like = true;
@@ -67,40 +67,40 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
if (!exp_elem.parse(pos, query->limit_length, expected))
return false;
}
- }
+ }
else if (s_clusters.ignore(pos, expected))
- {
- query->clusters = true;
-
- if (s_not.ignore(pos, expected))
- query->not_like = true;
-
- if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected))
- {
- if (insensitive)
- query->case_insensitive_like = true;
-
- if (!like_p.parse(pos, like, expected))
- return false;
- }
- else if (query->not_like)
- return false;
- if (s_limit.ignore(pos, expected))
- {
- if (!exp_elem.parse(pos, query->limit_length, expected))
- return false;
- }
- }
+ {
+ query->clusters = true;
+
+ if (s_not.ignore(pos, expected))
+ query->not_like = true;
+
+ if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected))
+ {
+ if (insensitive)
+ query->case_insensitive_like = true;
+
+ if (!like_p.parse(pos, like, expected))
+ return false;
+ }
+ else if (query->not_like)
+ return false;
+ if (s_limit.ignore(pos, expected))
+ {
+ if (!exp_elem.parse(pos, query->limit_length, expected))
+ return false;
+ }
+ }
else if (s_cluster.ignore(pos, expected))
- {
- query->cluster = true;
-
- String cluster_str;
- if (!parseIdentifierOrStringLiteral(pos, expected, cluster_str))
- return false;
-
- query->cluster_str = std::move(cluster_str);
- }
+ {
+ query->cluster = true;
+
+ String cluster_str;
+ if (!parseIdentifierOrStringLiteral(pos, expected, cluster_str))
+ return false;
+
+ query->cluster_str = std::move(cluster_str);
+ }
else if (bool changed = s_changed.ignore(pos, expected); changed || s_settings.ignore(pos, expected))
{
query->m_settings = true;
@@ -124,60 +124,60 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
else
return false;
}
- else
- {
- if (s_temporary.ignore(pos))
- query->temporary = true;
-
- if (!s_tables.ignore(pos, expected))
- {
- if (s_dictionaries.ignore(pos, expected))
- query->dictionaries = true;
- else
- return false;
- }
-
- if (s_from.ignore(pos, expected) || s_in.ignore(pos, expected))
- {
- if (!name_p.parse(pos, database, expected))
- return false;
- }
-
- if (s_not.ignore(pos, expected))
- query->not_like = true;
-
- if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected))
- {
- if (insensitive)
- query->case_insensitive_like = true;
-
- if (!like_p.parse(pos, like, expected))
- return false;
- }
- else if (query->not_like)
- return false;
- else if (s_where.ignore(pos, expected))
- {
- if (!exp_elem.parse(pos, query->where_expression, expected))
- return false;
- }
-
- if (s_limit.ignore(pos, expected))
- {
- if (!exp_elem.parse(pos, query->limit_length, expected))
- return false;
- }
- }
-
- tryGetIdentifierNameInto(database, query->from);
-
- if (like)
- query->like = safeGet<const String &>(like->as<ASTLiteral &>().value);
-
- node = query;
-
- return true;
-}
-
-
-}
+ else
+ {
+ if (s_temporary.ignore(pos))
+ query->temporary = true;
+
+ if (!s_tables.ignore(pos, expected))
+ {
+ if (s_dictionaries.ignore(pos, expected))
+ query->dictionaries = true;
+ else
+ return false;
+ }
+
+ if (s_from.ignore(pos, expected) || s_in.ignore(pos, expected))
+ {
+ if (!name_p.parse(pos, database, expected))
+ return false;
+ }
+
+ if (s_not.ignore(pos, expected))
+ query->not_like = true;
+
+ if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected))
+ {
+ if (insensitive)
+ query->case_insensitive_like = true;
+
+ if (!like_p.parse(pos, like, expected))
+ return false;
+ }
+ else if (query->not_like)
+ return false;
+ else if (s_where.ignore(pos, expected))
+ {
+ if (!exp_elem.parse(pos, query->where_expression, expected))
+ return false;
+ }
+
+ if (s_limit.ignore(pos, expected))
+ {
+ if (!exp_elem.parse(pos, query->limit_length, expected))
+ return false;
+ }
+ }
+
+ tryGetIdentifierNameInto(database, query->from);
+
+ if (like)
+ query->like = safeGet<const String &>(like->as<ASTLiteral &>().value);
+
+ node = query;
+
+ return true;
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h
index 3b8bb03327..70aa18333f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowTablesQuery.h
@@ -1,21 +1,21 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-
-/** Query like this:
- * SHOW TABLES [FROM db] [[NOT] [I]LIKE 'str'] [LIMIT expr]
- * or
- * SHOW DATABASES.
- */
-class ParserShowTablesQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+
+/** Query like this:
+ * SHOW TABLES [FROM db] [[NOT] [I]LIKE 'str'] [LIMIT expr]
+ * or
+ * SHOW DATABASES.
+ */
+class ParserShowTablesQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp
index 66bd39e020..e52323ac3f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.cpp
@@ -1,20 +1,20 @@
-#include <Parsers/ParserSystemQuery.h>
-#include <Parsers/ASTSystemQuery.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/parseDatabaseAndTableName.h>
-
-
-namespace ErrorCodes
-{
-}
-
-
-namespace DB
-{
-
+#include <Parsers/ParserSystemQuery.h>
+#include <Parsers/ASTSystemQuery.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/parseDatabaseAndTableName.h>
+
+
+namespace ErrorCodes
+{
+}
+
+
+namespace DB
+{
+
static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr<ASTSystemQuery> & res, IParser::Pos & pos,
Expected & expected, bool require_table, bool allow_string_literal)
{
@@ -23,7 +23,7 @@ static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr<ASTSystemQuery>
/// Need to support both
String cluster;
bool parsed_on_cluster = false;
-
+
if (ParserKeyword{"ON"}.ignore(pos, expected))
{
if (!ASTQueryWithOnCluster::parse(pos, cluster, expected))
@@ -57,37 +57,37 @@ static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr<ASTSystemQuery>
return true;
}
-bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
-{
- if (!ParserKeyword{"SYSTEM"}.ignore(pos, expected))
- return false;
-
- using Type = ASTSystemQuery::Type;
-
- auto res = std::make_shared<ASTSystemQuery>();
-
- bool found = false;
- for (int i = static_cast<int>(Type::UNKNOWN) + 1; i < static_cast<int>(Type::END); ++i)
- {
- Type t = static_cast<Type>(i);
- if (ParserKeyword{ASTSystemQuery::typeToString(t)}.ignore(pos, expected))
- {
- res->type = t;
- found = true;
- }
- }
-
- if (!found)
- return false;
-
- switch (res->type)
- {
- case Type::RELOAD_DICTIONARY:
- {
+bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
+{
+ if (!ParserKeyword{"SYSTEM"}.ignore(pos, expected))
+ return false;
+
+ using Type = ASTSystemQuery::Type;
+
+ auto res = std::make_shared<ASTSystemQuery>();
+
+ bool found = false;
+ for (int i = static_cast<int>(Type::UNKNOWN) + 1; i < static_cast<int>(Type::END); ++i)
+ {
+ Type t = static_cast<Type>(i);
+ if (ParserKeyword{ASTSystemQuery::typeToString(t)}.ignore(pos, expected))
+ {
+ res->type = t;
+ found = true;
+ }
+ }
+
+ if (!found)
+ return false;
+
+ switch (res->type)
+ {
+ case Type::RELOAD_DICTIONARY:
+ {
if (!parseQueryWithOnClusterAndMaybeTable(res, pos, expected, /* require table = */ true, /* allow_string_literal = */ true))
- return false;
- break;
- }
+ return false;
+ break;
+ }
case Type::RELOAD_MODEL:
{
String cluster_str;
@@ -107,7 +107,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
ParserIdentifier model_parser;
ASTPtr model;
String target_model;
-
+
if (!model_parser.parse(pos, model, expected))
return false;
@@ -117,54 +117,54 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
break;
}
- case Type::DROP_REPLICA:
- {
- ASTPtr ast;
- if (!ParserStringLiteral{}.parse(pos, ast, expected))
- return false;
- res->replica = ast->as<ASTLiteral &>().value.safeGet<String>();
- if (ParserKeyword{"FROM"}.ignore(pos, expected))
- {
- // way 1. parse replica database
- // way 2. parse replica tables
- // way 3. parse replica zkpath
- if (ParserKeyword{"DATABASE"}.ignore(pos, expected))
- {
- ParserIdentifier database_parser;
- ASTPtr database;
- if (!database_parser.parse(pos, database, expected))
- return false;
- tryGetIdentifierNameInto(database, res->database);
- }
- else if (ParserKeyword{"TABLE"}.ignore(pos, expected))
- {
- parseDatabaseAndTableName(pos, expected, res->database, res->table);
- }
- else if (ParserKeyword{"ZKPATH"}.ignore(pos, expected))
- {
- ASTPtr path_ast;
- if (!ParserStringLiteral{}.parse(pos, path_ast, expected))
- return false;
- String zk_path = path_ast->as<ASTLiteral &>().value.safeGet<String>();
- if (!zk_path.empty() && zk_path[zk_path.size() - 1] == '/')
- zk_path.pop_back();
- res->replica_zk_path = zk_path;
- }
- else
- return false;
- }
- else
- res->is_drop_whole_replica = true;
-
- break;
- }
-
- case Type::RESTART_REPLICA:
- case Type::SYNC_REPLICA:
- if (!parseDatabaseAndTableName(pos, expected, res->database, res->table))
- return false;
- break;
-
+ case Type::DROP_REPLICA:
+ {
+ ASTPtr ast;
+ if (!ParserStringLiteral{}.parse(pos, ast, expected))
+ return false;
+ res->replica = ast->as<ASTLiteral &>().value.safeGet<String>();
+ if (ParserKeyword{"FROM"}.ignore(pos, expected))
+ {
+ // way 1. parse replica database
+ // way 2. parse replica tables
+ // way 3. parse replica zkpath
+ if (ParserKeyword{"DATABASE"}.ignore(pos, expected))
+ {
+ ParserIdentifier database_parser;
+ ASTPtr database;
+ if (!database_parser.parse(pos, database, expected))
+ return false;
+ tryGetIdentifierNameInto(database, res->database);
+ }
+ else if (ParserKeyword{"TABLE"}.ignore(pos, expected))
+ {
+ parseDatabaseAndTableName(pos, expected, res->database, res->table);
+ }
+ else if (ParserKeyword{"ZKPATH"}.ignore(pos, expected))
+ {
+ ASTPtr path_ast;
+ if (!ParserStringLiteral{}.parse(pos, path_ast, expected))
+ return false;
+ String zk_path = path_ast->as<ASTLiteral &>().value.safeGet<String>();
+ if (!zk_path.empty() && zk_path[zk_path.size() - 1] == '/')
+ zk_path.pop_back();
+ res->replica_zk_path = zk_path;
+ }
+ else
+ return false;
+ }
+ else
+ res->is_drop_whole_replica = true;
+
+ break;
+ }
+
+ case Type::RESTART_REPLICA:
+ case Type::SYNC_REPLICA:
+ if (!parseDatabaseAndTableName(pos, expected, res->database, res->table))
+ return false;
+ break;
+
case Type::RESTART_DISK:
{
ASTPtr ast;
@@ -178,24 +178,24 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
/// FLUSH DISTRIBUTED requires table
/// START/STOP DISTRIBUTED SENDS does not require table
- case Type::STOP_DISTRIBUTED_SENDS:
- case Type::START_DISTRIBUTED_SENDS:
+ case Type::STOP_DISTRIBUTED_SENDS:
+ case Type::START_DISTRIBUTED_SENDS:
{
if (!parseQueryWithOnClusterAndMaybeTable(res, pos, expected, /* require table = */ false, /* allow_string_literal = */ false))
return false;
break;
}
- case Type::FLUSH_DISTRIBUTED:
+ case Type::FLUSH_DISTRIBUTED:
case Type::RESTORE_REPLICA:
- {
+ {
if (!parseQueryWithOnClusterAndMaybeTable(res, pos, expected, /* require table = */ true, /* allow_string_literal = */ false))
return false;
- break;
- }
-
- case Type::STOP_MERGES:
- case Type::START_MERGES:
+ break;
+ }
+
+ case Type::STOP_MERGES:
+ case Type::START_MERGES:
{
String storage_policy_str;
String volume_str;
@@ -223,19 +223,19 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
break;
}
- case Type::STOP_TTL_MERGES:
- case Type::START_TTL_MERGES:
- case Type::STOP_MOVES:
- case Type::START_MOVES:
- case Type::STOP_FETCHES:
- case Type::START_FETCHES:
- case Type::STOP_REPLICATED_SENDS:
- case Type::START_REPLICATED_SENDS:
- case Type::STOP_REPLICATION_QUEUES:
- case Type::START_REPLICATION_QUEUES:
- parseDatabaseAndTableName(pos, expected, res->database, res->table);
- break;
-
+ case Type::STOP_TTL_MERGES:
+ case Type::START_TTL_MERGES:
+ case Type::STOP_MOVES:
+ case Type::START_MOVES:
+ case Type::STOP_FETCHES:
+ case Type::START_FETCHES:
+ case Type::STOP_REPLICATED_SENDS:
+ case Type::START_REPLICATED_SENDS:
+ case Type::STOP_REPLICATION_QUEUES:
+ case Type::START_REPLICATION_QUEUES:
+ parseDatabaseAndTableName(pos, expected, res->database, res->table);
+ break;
+
case Type::SUSPEND:
{
ASTPtr seconds;
@@ -250,13 +250,13 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
break;
}
- default:
- /// There are no [db.table] after COMMAND NAME
- break;
- }
-
- node = std::move(res);
- return true;
-}
-
-}
+ default:
+ /// There are no [db.table] after COMMAND NAME
+ break;
+ }
+
+ node = std::move(res);
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h
index 5f947e63b9..3c4c5e1387 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserSystemQuery.h
@@ -1,16 +1,16 @@
-#pragma once
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-
-
-class ParserSystemQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SYSTEM query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+
+
+class ParserSystemQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "SYSTEM query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp
index 30be37bc4a..f2281e8f9c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.cpp
@@ -1,42 +1,42 @@
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/TablePropertiesQueriesASTs.h>
-
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ParserTablePropertiesQuery.h>
-
-#include <Common/typeid_cast.h>
-
-
-namespace DB
-{
-
-
-bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_exists("EXISTS");
- ParserKeyword s_temporary("TEMPORARY");
- ParserKeyword s_describe("DESCRIBE");
- ParserKeyword s_desc("DESC");
- ParserKeyword s_show("SHOW");
- ParserKeyword s_create("CREATE");
- ParserKeyword s_database("DATABASE");
- ParserKeyword s_table("TABLE");
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/TablePropertiesQueriesASTs.h>
+
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ParserTablePropertiesQuery.h>
+
+#include <Common/typeid_cast.h>
+
+
+namespace DB
+{
+
+
+bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_exists("EXISTS");
+ ParserKeyword s_temporary("TEMPORARY");
+ ParserKeyword s_describe("DESCRIBE");
+ ParserKeyword s_desc("DESC");
+ ParserKeyword s_show("SHOW");
+ ParserKeyword s_create("CREATE");
+ ParserKeyword s_database("DATABASE");
+ ParserKeyword s_table("TABLE");
ParserKeyword s_view("VIEW");
- ParserKeyword s_dictionary("DICTIONARY");
- ParserToken s_dot(TokenType::Dot);
- ParserIdentifier name_p;
-
- ASTPtr database;
- ASTPtr table;
- std::shared_ptr<ASTQueryWithTableAndOutput> query;
-
- bool parse_only_database_name = false;
+ ParserKeyword s_dictionary("DICTIONARY");
+ ParserToken s_dot(TokenType::Dot);
+ ParserIdentifier name_p;
+
+ ASTPtr database;
+ ASTPtr table;
+ std::shared_ptr<ASTQueryWithTableAndOutput> query;
+
+ bool parse_only_database_name = false;
bool parse_show_create_view = false;
bool exists_view = false;
-
- bool temporary = false;
- if (s_exists.ignore(pos, expected))
- {
+
+ bool temporary = false;
+ if (s_exists.ignore(pos, expected))
+ {
if (s_database.ignore(pos, expected))
{
query = std::make_shared<ASTExistsDatabaseQuery>();
@@ -51,7 +51,7 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
{
if (s_temporary.ignore(pos, expected))
temporary = true;
-
+
if (s_table.checkWithoutMoving(pos, expected))
query = std::make_shared<ASTExistsTableQuery>();
else if (s_dictionary.checkWithoutMoving(pos, expected))
@@ -59,64 +59,64 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
else
query = std::make_shared<ASTExistsTableQuery>();
}
- }
- else if (s_show.ignore(pos, expected))
- {
- if (!s_create.ignore(pos, expected))
- return false;
-
- if (s_database.ignore(pos, expected))
- {
- parse_only_database_name = true;
- query = std::make_shared<ASTShowCreateDatabaseQuery>();
- }
- else if (s_dictionary.checkWithoutMoving(pos, expected))
- query = std::make_shared<ASTShowCreateDictionaryQuery>();
+ }
+ else if (s_show.ignore(pos, expected))
+ {
+ if (!s_create.ignore(pos, expected))
+ return false;
+
+ if (s_database.ignore(pos, expected))
+ {
+ parse_only_database_name = true;
+ query = std::make_shared<ASTShowCreateDatabaseQuery>();
+ }
+ else if (s_dictionary.checkWithoutMoving(pos, expected))
+ query = std::make_shared<ASTShowCreateDictionaryQuery>();
else if (s_view.ignore(pos, expected))
{
query = std::make_shared<ASTShowCreateViewQuery>();
parse_show_create_view = true;
}
- else
- query = std::make_shared<ASTShowCreateTableQuery>();
- }
- else
- {
- return false;
- }
-
- if (parse_only_database_name)
- {
- if (!name_p.parse(pos, database, expected))
- return false;
- }
- else
- {
+ else
+ query = std::make_shared<ASTShowCreateTableQuery>();
+ }
+ else
+ {
+ return false;
+ }
+
+ if (parse_only_database_name)
+ {
+ if (!name_p.parse(pos, database, expected))
+ return false;
+ }
+ else
+ {
if (!(exists_view || parse_show_create_view))
{
if (temporary || s_temporary.ignore(pos, expected))
query->temporary = true;
-
+
if (!s_table.ignore(pos, expected))
s_dictionary.ignore(pos, expected);
}
- if (!name_p.parse(pos, table, expected))
- return false;
- if (s_dot.ignore(pos, expected))
- {
- database = table;
- if (!name_p.parse(pos, table, expected))
- return false;
- }
- }
-
- tryGetIdentifierNameInto(database, query->database);
- tryGetIdentifierNameInto(table, query->table);
-
- node = query;
-
- return true;
-}
-
-
-}
+ if (!name_p.parse(pos, table, expected))
+ return false;
+ if (s_dot.ignore(pos, expected))
+ {
+ database = table;
+ if (!name_p.parse(pos, table, expected))
+ return false;
+ }
+ }
+
+ tryGetIdentifierNameInto(database, query->database);
+ tryGetIdentifierNameInto(table, query->table);
+
+ node = query;
+
+ return true;
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h
index 8d2c26d34a..8d9513755a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserTablePropertiesQuery.h
@@ -1,19 +1,19 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/ExpressionElementParsers.h>
-
-
-namespace DB
-{
-
+#pragma once
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+
+namespace DB
+{
+
/** Query (EXISTS | SHOW CREATE) [DATABASE|TABLE|DICTIONARY] [db.]name [FORMAT format]
- */
-class ParserTablePropertiesQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "EXISTS or SHOW CREATE query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+ */
+class ParserTablePropertiesQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "EXISTS or SHOW CREATE query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp
index a71fa17ab7..e24de9942c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.cpp
@@ -1,30 +1,30 @@
-#include <Parsers/ParserUseQuery.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ASTUseQuery.h>
-
-
-namespace DB
-{
-
-bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_use("USE");
- ParserIdentifier name_p;
-
- if (!s_use.ignore(pos, expected))
- return false;
-
- ASTPtr database;
- if (!name_p.parse(pos, database, expected))
- return false;
-
- auto query = std::make_shared<ASTUseQuery>();
- tryGetIdentifierNameInto(database, query->database);
- node = query;
-
- return true;
-}
-
-}
+#include <Parsers/ParserUseQuery.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ASTUseQuery.h>
+
+
+namespace DB
+{
+
+bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_use("USE");
+ ParserIdentifier name_p;
+
+ if (!s_use.ignore(pos, expected))
+ return false;
+
+ ASTPtr database;
+ if (!name_p.parse(pos, database, expected))
+ return false;
+
+ auto query = std::make_shared<ASTUseQuery>();
+ tryGetIdentifierNameInto(database, query->database);
+ node = query;
+
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h
index f5b0be7a86..197a4e828b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUseQuery.h
@@ -1,18 +1,18 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-
-/** Query USE db
- */
-class ParserUseQuery : public IParserBase
-{
-protected:
- const char * getName() const override{ return "USE query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+
+/** Query USE db
+ */
+class ParserUseQuery : public IParserBase
+{
+protected:
+ const char * getName() const override{ return "USE query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp
index 9cb4bb6fc9..1e678f69da 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.cpp
@@ -1,77 +1,77 @@
-#include <Parsers/ParserUserNameWithHost.h>
-#include <Parsers/ASTUserNameWithHost.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Parsers/parseIdentifierOrStringLiteral.h>
-#include <boost/algorithm/string.hpp>
-
-
-namespace DB
-{
-namespace
-{
- bool parseUserNameWithHost(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTUserNameWithHost> & ast)
- {
- return IParserBase::wrapParseImpl(pos, [&]
- {
- String base_name;
- if (!parseIdentifierOrStringLiteral(pos, expected, base_name))
- return false;
-
- boost::algorithm::trim(base_name);
-
- String host_pattern;
- if (ParserToken{TokenType::At}.ignore(pos, expected))
- {
- if (!parseIdentifierOrStringLiteral(pos, expected, host_pattern))
- return false;
-
- boost::algorithm::trim(host_pattern);
- if (host_pattern == "%")
- host_pattern.clear();
- }
-
- ast = std::make_shared<ASTUserNameWithHost>();
- ast->base_name = std::move(base_name);
- ast->host_pattern = std::move(host_pattern);
- return true;
- });
- }
-}
-
-
-bool ParserUserNameWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- std::shared_ptr<ASTUserNameWithHost> res;
- if (!parseUserNameWithHost(pos, expected, res))
- return false;
-
- node = res;
- return true;
-}
-
-
-bool ParserUserNamesWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- std::vector<std::shared_ptr<ASTUserNameWithHost>> names;
-
- auto parse_single_name = [&]
- {
- std::shared_ptr<ASTUserNameWithHost> ast;
- if (!parseUserNameWithHost(pos, expected, ast))
- return false;
-
- names.emplace_back(std::move(ast));
- return true;
- };
-
- if (!ParserList::parseUtil(pos, expected, parse_single_name, false))
- return false;
-
- auto result = std::make_shared<ASTUserNamesWithHost>();
- result->names = std::move(names);
- node = result;
- return true;
-}
-
-}
+#include <Parsers/ParserUserNameWithHost.h>
+#include <Parsers/ASTUserNameWithHost.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Parsers/parseIdentifierOrStringLiteral.h>
+#include <boost/algorithm/string.hpp>
+
+
+namespace DB
+{
+namespace
+{
+ bool parseUserNameWithHost(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTUserNameWithHost> & ast)
+ {
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ String base_name;
+ if (!parseIdentifierOrStringLiteral(pos, expected, base_name))
+ return false;
+
+ boost::algorithm::trim(base_name);
+
+ String host_pattern;
+ if (ParserToken{TokenType::At}.ignore(pos, expected))
+ {
+ if (!parseIdentifierOrStringLiteral(pos, expected, host_pattern))
+ return false;
+
+ boost::algorithm::trim(host_pattern);
+ if (host_pattern == "%")
+ host_pattern.clear();
+ }
+
+ ast = std::make_shared<ASTUserNameWithHost>();
+ ast->base_name = std::move(base_name);
+ ast->host_pattern = std::move(host_pattern);
+ return true;
+ });
+ }
+}
+
+
+bool ParserUserNameWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ std::shared_ptr<ASTUserNameWithHost> res;
+ if (!parseUserNameWithHost(pos, expected, res))
+ return false;
+
+ node = res;
+ return true;
+}
+
+
+bool ParserUserNamesWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ std::vector<std::shared_ptr<ASTUserNameWithHost>> names;
+
+ auto parse_single_name = [&]
+ {
+ std::shared_ptr<ASTUserNameWithHost> ast;
+ if (!parseUserNameWithHost(pos, expected, ast))
+ return false;
+
+ names.emplace_back(std::move(ast));
+ return true;
+ };
+
+ if (!ParserList::parseUtil(pos, expected, parse_single_name, false))
+ return false;
+
+ auto result = std::make_shared<ASTUserNamesWithHost>();
+ result->names = std::move(names);
+ node = result;
+ return true;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h
index 453b816a98..9c4f591742 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserUserNameWithHost.h
@@ -1,26 +1,26 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-/** Parses a user name.
- * It can be a simple string or identifier or something like `name@host`.
- */
-class ParserUserNameWithHost : public IParserBase
-{
-protected:
- const char * getName() const override { return "UserNameWithHost"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-
-class ParserUserNamesWithHost : public IParserBase
-{
-protected:
- const char * getName() const override { return "UserNamesWithHost"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+/** Parses a user name.
+ * It can be a simple string or identifier or something like `name@host`.
+ */
+class ParserUserNameWithHost : public IParserBase
+{
+protected:
+ const char * getName() const override { return "UserNameWithHost"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+
+class ParserUserNamesWithHost : public IParserBase
+{
+protected:
+ const char * getName() const override { return "UserNamesWithHost"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp
index 046a840e87..2c9b37226e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.cpp
@@ -1,66 +1,66 @@
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTWatchQuery.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ParserWatchQuery.h>
-#include <Parsers/ExpressionElementParsers.h>
-
-
-namespace DB
-{
-
-bool ParserWatchQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_watch("WATCH");
- ParserToken s_dot(TokenType::Dot);
- ParserIdentifier name_p;
- ParserKeyword s_events("EVENTS");
- ParserKeyword s_limit("LIMIT");
-
- ASTPtr database;
- ASTPtr table;
- auto query = std::make_shared<ASTWatchQuery>();
-
- if (!s_watch.ignore(pos, expected))
- {
- return false;
- }
-
- if (!name_p.parse(pos, table, expected))
- return false;
-
- if (s_dot.ignore(pos, expected))
- {
- database = table;
- if (!name_p.parse(pos, table, expected))
- return false;
- }
-
- /// EVENTS
- if (s_events.ignore(pos, expected))
- {
- query->is_watch_events = true;
- }
-
- /// LIMIT length
- if (s_limit.ignore(pos, expected))
- {
- ParserNumber num;
-
- if (!num.parse(pos, query->limit_length, expected))
- return false;
- }
-
- if (database)
- query->database = getIdentifierName(database);
-
- if (table)
- query->table = getIdentifierName(table);
-
- node = query;
-
- return true;
-}
-
-
-}
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTWatchQuery.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ParserWatchQuery.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+
+namespace DB
+{
+
+bool ParserWatchQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ ParserKeyword s_watch("WATCH");
+ ParserToken s_dot(TokenType::Dot);
+ ParserIdentifier name_p;
+ ParserKeyword s_events("EVENTS");
+ ParserKeyword s_limit("LIMIT");
+
+ ASTPtr database;
+ ASTPtr table;
+ auto query = std::make_shared<ASTWatchQuery>();
+
+ if (!s_watch.ignore(pos, expected))
+ {
+ return false;
+ }
+
+ if (!name_p.parse(pos, table, expected))
+ return false;
+
+ if (s_dot.ignore(pos, expected))
+ {
+ database = table;
+ if (!name_p.parse(pos, table, expected))
+ return false;
+ }
+
+ /// EVENTS
+ if (s_events.ignore(pos, expected))
+ {
+ query->is_watch_events = true;
+ }
+
+ /// LIMIT length
+ if (s_limit.ignore(pos, expected))
+ {
+ ParserNumber num;
+
+ if (!num.parse(pos, query->limit_length, expected))
+ return false;
+ }
+
+ if (database)
+ query->database = getIdentifierName(database);
+
+ if (table)
+ query->table = getIdentifierName(table);
+
+ node = query;
+
+ return true;
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h
index 63097eba67..0918b5c65c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserWatchQuery.h
@@ -1,19 +1,19 @@
#pragma once
-
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-
-/** Query like this:
- * WATCH [db.]table EVENTS
- */
-class ParserWatchQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "WATCH query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
-}
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+
+/** Query like this:
+ * WATCH [db.]table EVENTS
+ */
+class ParserWatchQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "WATCH query"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h
index 1fc285a562..b7cc68940f 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/StringRange.h
@@ -1,71 +1,71 @@
-#pragma once
-
+#pragma once
+
#include <common/types.h>
-#include <Parsers/TokenIterator.h>
-#include <map>
-#include <memory>
-#include <Common/SipHash.h>
-
-
-namespace DB
-{
-
-struct StringRange
-{
- const char * first = nullptr;
- const char * second = nullptr;
-
- StringRange() = default;
- StringRange(const char * begin, const char * end) : first(begin), second(end) {}
- explicit StringRange(TokenIterator token) : first(token->begin), second(token->end) {}
-
- StringRange(TokenIterator token_begin, TokenIterator token_end)
- {
- /// Empty range.
- if (token_begin == token_end)
- {
- first = token_begin->begin;
- second = token_begin->begin;
- return;
- }
-
- TokenIterator token_last = token_end;
- --token_last;
-
- first = token_begin->begin;
- second = token_last->end;
- }
-};
-
-using StringPtr = std::shared_ptr<String>;
-
-
-inline String toString(const StringRange & range)
-{
- return range.first ? String(range.first, range.second) : String();
-}
-
-/// Hashes only the values of pointers in StringRange. Is used with StringRangePointersEqualTo comparator.
-struct StringRangePointersHash
-{
- UInt64 operator()(const StringRange & range) const
- {
- SipHash hash;
- hash.update(range.first);
- hash.update(range.second);
- return hash.get64();
- }
-};
-
-/// Ranges are equal only when they point to the same memory region.
-/// It may be used when it's enough to compare substrings by their position in the same string.
-struct StringRangePointersEqualTo
-{
- constexpr bool operator()(const StringRange &lhs, const StringRange &rhs) const
- {
- return std::tie(lhs.first, lhs.second) == std::tie(rhs.first, rhs.second);
- }
-};
-
-}
-
+#include <Parsers/TokenIterator.h>
+#include <map>
+#include <memory>
+#include <Common/SipHash.h>
+
+
+namespace DB
+{
+
+struct StringRange
+{
+ const char * first = nullptr;
+ const char * second = nullptr;
+
+ StringRange() = default;
+ StringRange(const char * begin, const char * end) : first(begin), second(end) {}
+ explicit StringRange(TokenIterator token) : first(token->begin), second(token->end) {}
+
+ StringRange(TokenIterator token_begin, TokenIterator token_end)
+ {
+ /// Empty range.
+ if (token_begin == token_end)
+ {
+ first = token_begin->begin;
+ second = token_begin->begin;
+ return;
+ }
+
+ TokenIterator token_last = token_end;
+ --token_last;
+
+ first = token_begin->begin;
+ second = token_last->end;
+ }
+};
+
+using StringPtr = std::shared_ptr<String>;
+
+
+inline String toString(const StringRange & range)
+{
+ return range.first ? String(range.first, range.second) : String();
+}
+
+/// Hashes only the values of pointers in StringRange. Is used with StringRangePointersEqualTo comparator.
+struct StringRangePointersHash
+{
+ UInt64 operator()(const StringRange & range) const
+ {
+ SipHash hash;
+ hash.update(range.first);
+ hash.update(range.second);
+ return hash.get64();
+ }
+};
+
+/// Ranges are equal only when they point to the same memory region.
+/// It may be used when it's enough to compare substrings by their position in the same string.
+struct StringRangePointersEqualTo
+{
+ constexpr bool operator()(const StringRange &lhs, const StringRange &rhs) const
+ {
+ return std::tie(lhs.first, lhs.second) == std::tie(rhs.first, rhs.second);
+ }
+};
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h
index edb040d72d..33be3042b3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TablePropertiesQueriesASTs.h
@@ -1,12 +1,12 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Common/quoteString.h>
-
-
-namespace DB
-{
-
+#pragma once
+
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Common/quoteString.h>
+
+
+namespace DB
+{
+
struct ASTExistsDatabaseQueryIDAndQueryNames
{
static constexpr auto ID = "ExistsDatabaseQuery";
@@ -15,13 +15,13 @@ struct ASTExistsDatabaseQueryIDAndQueryNames
static constexpr auto QueryTemporary = "";
};
-struct ASTExistsTableQueryIDAndQueryNames
-{
- static constexpr auto ID = "ExistsTableQuery";
- static constexpr auto Query = "EXISTS TABLE";
- static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE";
-};
-
+struct ASTExistsTableQueryIDAndQueryNames
+{
+ static constexpr auto ID = "ExistsTableQuery";
+ static constexpr auto Query = "EXISTS TABLE";
+ static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE";
+};
+
struct ASTExistsViewQueryIDAndQueryNames
{
static constexpr auto ID = "ExistsViewQuery";
@@ -31,21 +31,21 @@ struct ASTExistsViewQueryIDAndQueryNames
};
-struct ASTExistsDictionaryQueryIDAndQueryNames
-{
- static constexpr auto ID = "ExistsDictionaryQuery";
- static constexpr auto Query = "EXISTS DICTIONARY";
- /// No temporary dictionaries are supported, just for parsing
- static constexpr auto QueryTemporary = "EXISTS TEMPORARY DICTIONARY";
-};
-
-struct ASTShowCreateTableQueryIDAndQueryNames
-{
- static constexpr auto ID = "ShowCreateTableQuery";
- static constexpr auto Query = "SHOW CREATE TABLE";
- static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY TABLE";
-};
-
+struct ASTExistsDictionaryQueryIDAndQueryNames
+{
+ static constexpr auto ID = "ExistsDictionaryQuery";
+ static constexpr auto Query = "EXISTS DICTIONARY";
+ /// No temporary dictionaries are supported, just for parsing
+ static constexpr auto QueryTemporary = "EXISTS TEMPORARY DICTIONARY";
+};
+
+struct ASTShowCreateTableQueryIDAndQueryNames
+{
+ static constexpr auto ID = "ShowCreateTableQuery";
+ static constexpr auto Query = "SHOW CREATE TABLE";
+ static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY TABLE";
+};
+
struct ASTShowCreateViewQueryIDAndQueryNames
{
static constexpr auto ID = "ShowCreateViewQuery";
@@ -54,35 +54,35 @@ struct ASTShowCreateViewQueryIDAndQueryNames
static constexpr auto QueryTemporary = "";
};
-struct ASTShowCreateDatabaseQueryIDAndQueryNames
-{
- static constexpr auto ID = "ShowCreateDatabaseQuery";
- static constexpr auto Query = "SHOW CREATE DATABASE";
- static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DATABASE";
-};
-
-struct ASTShowCreateDictionaryQueryIDAndQueryNames
-{
- static constexpr auto ID = "ShowCreateDictionaryQuery";
- static constexpr auto Query = "SHOW CREATE DICTIONARY";
- /// No temporary dictionaries are supported, just for parsing
- static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DICTIONARY";
-};
-
-struct ASTDescribeQueryExistsQueryIDAndQueryNames
-{
- static constexpr auto ID = "DescribeQuery";
- static constexpr auto Query = "DESCRIBE TABLE";
- static constexpr auto QueryTemporary = "DESCRIBE TEMPORARY TABLE";
-};
-
-using ASTExistsTableQuery = ASTQueryWithTableAndOutputImpl<ASTExistsTableQueryIDAndQueryNames>;
+struct ASTShowCreateDatabaseQueryIDAndQueryNames
+{
+ static constexpr auto ID = "ShowCreateDatabaseQuery";
+ static constexpr auto Query = "SHOW CREATE DATABASE";
+ static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DATABASE";
+};
+
+struct ASTShowCreateDictionaryQueryIDAndQueryNames
+{
+ static constexpr auto ID = "ShowCreateDictionaryQuery";
+ static constexpr auto Query = "SHOW CREATE DICTIONARY";
+ /// No temporary dictionaries are supported, just for parsing
+ static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DICTIONARY";
+};
+
+struct ASTDescribeQueryExistsQueryIDAndQueryNames
+{
+ static constexpr auto ID = "DescribeQuery";
+ static constexpr auto Query = "DESCRIBE TABLE";
+ static constexpr auto QueryTemporary = "DESCRIBE TEMPORARY TABLE";
+};
+
+using ASTExistsTableQuery = ASTQueryWithTableAndOutputImpl<ASTExistsTableQueryIDAndQueryNames>;
using ASTExistsViewQuery = ASTQueryWithTableAndOutputImpl<ASTExistsViewQueryIDAndQueryNames>;
-using ASTExistsDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTExistsDictionaryQueryIDAndQueryNames>;
-using ASTShowCreateTableQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateTableQueryIDAndQueryNames>;
+using ASTExistsDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTExistsDictionaryQueryIDAndQueryNames>;
+using ASTShowCreateTableQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateTableQueryIDAndQueryNames>;
using ASTShowCreateViewQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateViewQueryIDAndQueryNames>;
-using ASTShowCreateDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateDictionaryQueryIDAndQueryNames>;
-
+using ASTShowCreateDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateDictionaryQueryIDAndQueryNames>;
+
class ASTExistsDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTExistsDatabaseQueryIDAndQueryNames>
{
protected:
@@ -93,44 +93,44 @@ protected:
}
};
-class ASTShowCreateDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTShowCreateDatabaseQueryIDAndQueryNames>
-{
-protected:
- void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "") << ASTShowCreateDatabaseQueryIDAndQueryNames::Query
- << " " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database);
- }
-};
-
-class ASTDescribeQuery : public ASTQueryWithOutput
-{
-public:
- ASTPtr table_expression;
-
- String getID(char) const override { return "DescribeQuery"; }
-
- ASTPtr clone() const override
- {
- auto res = std::make_shared<ASTDescribeQuery>(*this);
- res->children.clear();
- if (table_expression)
- {
- res->table_expression = table_expression->clone();
- res->children.push_back(res->table_expression);
- }
- cloneOutputOptions(*res);
- return res;
- }
-
-protected:
- void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
- {
- settings.ostr << (settings.hilite ? hilite_keyword : "")
- << "DESCRIBE TABLE " << (settings.hilite ? hilite_none : "");
- table_expression->formatImpl(settings, state, frame);
- }
-
-};
-
-}
+class ASTShowCreateDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTShowCreateDatabaseQueryIDAndQueryNames>
+{
+protected:
+ void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "") << ASTShowCreateDatabaseQueryIDAndQueryNames::Query
+ << " " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database);
+ }
+};
+
+class ASTDescribeQuery : public ASTQueryWithOutput
+{
+public:
+ ASTPtr table_expression;
+
+ String getID(char) const override { return "DescribeQuery"; }
+
+ ASTPtr clone() const override
+ {
+ auto res = std::make_shared<ASTDescribeQuery>(*this);
+ res->children.clear();
+ if (table_expression)
+ {
+ res->table_expression = table_expression->clone();
+ res->children.push_back(res->table_expression);
+ }
+ cloneOutputOptions(*res);
+ return res;
+ }
+
+protected:
+ void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
+ {
+ settings.ostr << (settings.hilite ? hilite_keyword : "")
+ << "DESCRIBE TABLE " << (settings.hilite ? hilite_none : "");
+ table_expression->formatImpl(settings, state, frame);
+ }
+
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp
index 08877e0b2f..f6dc405728 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/TokenIterator.cpp
@@ -1,47 +1,47 @@
-#include <Parsers/TokenIterator.h>
-
-
-namespace DB
-{
-
+#include <Parsers/TokenIterator.h>
+
+
+namespace DB
+{
+
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin)
-{
- /// We have just two kind of parentheses: () and [].
- UnmatchedParentheses stack;
-
+{
+ /// We have just two kind of parentheses: () and [].
+ UnmatchedParentheses stack;
+
/// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error
/// when parser failed in the middle of the query.
for (TokenIterator it = begin; it.isValid(); ++it)
- {
- if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket)
- {
- stack.push_back(*it);
- }
- else if (it->type == TokenType::ClosingRoundBracket || it->type == TokenType::ClosingSquareBracket)
- {
- if (stack.empty())
- {
- /// Excessive closing bracket.
- stack.push_back(*it);
- return stack;
- }
- else if ((stack.back().type == TokenType::OpeningRoundBracket && it->type == TokenType::ClosingRoundBracket)
- || (stack.back().type == TokenType::OpeningSquareBracket && it->type == TokenType::ClosingSquareBracket))
- {
- /// Valid match.
- stack.pop_back();
- }
- else
- {
- /// Closing bracket type doesn't match opening bracket type.
- stack.push_back(*it);
- return stack;
- }
- }
- }
-
- /// If stack is not empty, we have unclosed brackets.
- return stack;
-}
-
-}
+ {
+ if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket)
+ {
+ stack.push_back(*it);
+ }
+ else if (it->type == TokenType::ClosingRoundBracket || it->type == TokenType::ClosingSquareBracket)
+ {
+ if (stack.empty())
+ {
+ /// Excessive closing bracket.
+ stack.push_back(*it);
+ return stack;
+ }
+ else if ((stack.back().type == TokenType::OpeningRoundBracket && it->type == TokenType::ClosingRoundBracket)
+ || (stack.back().type == TokenType::OpeningSquareBracket && it->type == TokenType::ClosingSquareBracket))
+ {
+ /// Valid match.
+ stack.pop_back();
+ }
+ else
+ {
+ /// Closing bracket type doesn't match opening bracket type.
+ stack.push_back(*it);
+ return stack;
+ }
+ }
+ }
+
+ /// If stack is not empty, we have unclosed brackets.
+ return stack;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp
index 13429df5b4..951900e9c0 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.cpp
@@ -1,114 +1,114 @@
-#include "parseDatabaseAndTableName.h"
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/CommonParsers.h>
-
-
-namespace DB
-{
-
-bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str)
-{
- ParserToken s_dot(TokenType::Dot);
- ParserIdentifier table_parser;
-
- ASTPtr database;
- ASTPtr table;
-
- database_str = "";
- table_str = "";
-
- if (!table_parser.parse(pos, database, expected))
- return false;
-
- if (s_dot.ignore(pos))
- {
- if (!table_parser.parse(pos, table, expected))
- {
- database_str = "";
- return false;
- }
-
- tryGetIdentifierNameInto(database, database_str);
- tryGetIdentifierNameInto(table, table_str);
- }
- else
- {
- database_str = "";
- tryGetIdentifierNameInto(database, table_str);
- }
-
- return true;
-}
-
-
-bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table)
-{
- return IParserBase::wrapParseImpl(pos, [&]
- {
- if (ParserToken{TokenType::Asterisk}.ignore(pos, expected))
- {
- auto pos_before_dot = pos;
- if (ParserToken{TokenType::Dot}.ignore(pos, expected)
- && ParserToken{TokenType::Asterisk}.ignore(pos, expected))
- {
- /// *.*
- any_database = true;
- database.clear();
- any_table = true;
- table.clear();
- return true;
- }
-
- /// *
- pos = pos_before_dot;
- any_database = false;
- database.clear();
- any_table = true;
- table.clear();
- return true;
- }
-
- ASTPtr ast;
- ParserIdentifier identifier_parser;
- if (identifier_parser.parse(pos, ast, expected))
- {
- String first_identifier = getIdentifierName(ast);
- auto pos_before_dot = pos;
-
- if (ParserToken{TokenType::Dot}.ignore(pos, expected))
- {
- if (ParserToken{TokenType::Asterisk}.ignore(pos, expected))
- {
- /// db.*
- any_database = false;
- database = std::move(first_identifier);
- any_table = true;
- table.clear();
- return true;
- }
- else if (identifier_parser.parse(pos, ast, expected))
- {
- /// db.table
- any_database = false;
- database = std::move(first_identifier);
- any_table = false;
- table = getIdentifierName(ast);
- return true;
- }
- }
-
- /// table
- pos = pos_before_dot;
- any_database = false;
- database.clear();
- any_table = false;
- table = std::move(first_identifier);
- return true;
- }
-
- return false;
- });
-}
-
-}
+#include "parseDatabaseAndTableName.h"
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/CommonParsers.h>
+
+
+namespace DB
+{
+
+bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str)
+{
+ ParserToken s_dot(TokenType::Dot);
+ ParserIdentifier table_parser;
+
+ ASTPtr database;
+ ASTPtr table;
+
+ database_str = "";
+ table_str = "";
+
+ if (!table_parser.parse(pos, database, expected))
+ return false;
+
+ if (s_dot.ignore(pos))
+ {
+ if (!table_parser.parse(pos, table, expected))
+ {
+ database_str = "";
+ return false;
+ }
+
+ tryGetIdentifierNameInto(database, database_str);
+ tryGetIdentifierNameInto(table, table_str);
+ }
+ else
+ {
+ database_str = "";
+ tryGetIdentifierNameInto(database, table_str);
+ }
+
+ return true;
+}
+
+
+bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table)
+{
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ if (ParserToken{TokenType::Asterisk}.ignore(pos, expected))
+ {
+ auto pos_before_dot = pos;
+ if (ParserToken{TokenType::Dot}.ignore(pos, expected)
+ && ParserToken{TokenType::Asterisk}.ignore(pos, expected))
+ {
+ /// *.*
+ any_database = true;
+ database.clear();
+ any_table = true;
+ table.clear();
+ return true;
+ }
+
+ /// *
+ pos = pos_before_dot;
+ any_database = false;
+ database.clear();
+ any_table = true;
+ table.clear();
+ return true;
+ }
+
+ ASTPtr ast;
+ ParserIdentifier identifier_parser;
+ if (identifier_parser.parse(pos, ast, expected))
+ {
+ String first_identifier = getIdentifierName(ast);
+ auto pos_before_dot = pos;
+
+ if (ParserToken{TokenType::Dot}.ignore(pos, expected))
+ {
+ if (ParserToken{TokenType::Asterisk}.ignore(pos, expected))
+ {
+ /// db.*
+ any_database = false;
+ database = std::move(first_identifier);
+ any_table = true;
+ table.clear();
+ return true;
+ }
+ else if (identifier_parser.parse(pos, ast, expected))
+ {
+ /// db.table
+ any_database = false;
+ database = std::move(first_identifier);
+ any_table = false;
+ table = getIdentifierName(ast);
+ return true;
+ }
+ }
+
+ /// table
+ pos = pos_before_dot;
+ any_database = false;
+ database.clear();
+ any_table = false;
+ table = std::move(first_identifier);
+ return true;
+ }
+
+ return false;
+ });
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h
index e4699c8ad9..e12e4f07c2 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseDatabaseAndTableName.h
@@ -1,13 +1,13 @@
-#pragma once
-#include <Parsers/IParser.h>
-
-namespace DB
-{
-
-/// Parses [db.]name
-bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str);
-
-/// Parses [db.]name or [db.]* or [*.]*
-bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table);
-
-}
+#pragma once
+#include <Parsers/IParser.h>
+
+namespace DB
+{
+
+/// Parses [db.]name
+bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str);
+
+/// Parses [db.]name or [db.]* or [*.]*
+bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table);
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp
index 1f25f51ef2..b6c6ff6466 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.cpp
@@ -1,46 +1,46 @@
-#include <Parsers/parseUserName.h>
-#include <Parsers/ParserUserNameWithHost.h>
-#include <Parsers/ASTUserNameWithHost.h>
-#include <Parsers/CommonParsers.h>
-
-
-namespace DB
-{
-
-bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name)
-{
- ASTPtr ast;
- if (!ParserUserNameWithHost{}.parse(pos, ast, expected))
- return false;
- user_name = ast->as<const ASTUserNameWithHost &>().toString();
- return true;
-}
-
-
-bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names)
-{
- ASTPtr ast;
- if (!ParserUserNamesWithHost{}.parse(pos, ast, expected))
- return false;
- user_names = ast->as<const ASTUserNamesWithHost &>().toStrings();
- return true;
-}
-
-
-bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected)
-{
- return IParserBase::wrapParseImpl(pos, [&]
- {
- if (!ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) && !ParserKeyword{"currentUser"}.ignore(pos, expected))
- return false;
-
- if (ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected))
- {
- if (!ParserToken{TokenType::ClosingRoundBracket}.ignore(pos, expected))
- return false;
- }
- return true;
- });
-}
-
-}
+#include <Parsers/parseUserName.h>
+#include <Parsers/ParserUserNameWithHost.h>
+#include <Parsers/ASTUserNameWithHost.h>
+#include <Parsers/CommonParsers.h>
+
+
+namespace DB
+{
+
+bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name)
+{
+ ASTPtr ast;
+ if (!ParserUserNameWithHost{}.parse(pos, ast, expected))
+ return false;
+ user_name = ast->as<const ASTUserNameWithHost &>().toString();
+ return true;
+}
+
+
+bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names)
+{
+ ASTPtr ast;
+ if (!ParserUserNamesWithHost{}.parse(pos, ast, expected))
+ return false;
+ user_names = ast->as<const ASTUserNamesWithHost &>().toStrings();
+ return true;
+}
+
+
+bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected)
+{
+ return IParserBase::wrapParseImpl(pos, [&]
+ {
+ if (!ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) && !ParserKeyword{"currentUser"}.ignore(pos, expected))
+ return false;
+
+ if (ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected))
+ {
+ if (!ParserToken{TokenType::ClosingRoundBracket}.ignore(pos, expected))
+ return false;
+ }
+ return true;
+ });
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h
index c1ad36c936..678e73daec 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/parseUserName.h
@@ -1,36 +1,36 @@
-#pragma once
-
-#include <Parsers/IParser.h>
-
-
-namespace DB
-{
-/// Parses a user name. It can be a simple string or identifier or something like `name@host`.
-/// In the last case `host` specifies the hosts user is allowed to connect from.
-/// The `host` can be an ip address, ip subnet, or a host name.
-/// The % and _ wildcard characters are permitted in `host`.
-/// These have the same meaning as for pattern-matching operations performed with the LIKE operator.
-bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name);
-
-/// Parses a comma-separated list of user names.
-bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names);
-
-
-/// Parses either the 'CURRENT_USER' keyword (or some of its aliases).
-bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected);
-
-
-/// Parses a role name. It follows the same rules as a user name, but allowed hosts are never checked
-/// (because roles are not used to connect to server).
-inline bool parseRoleName(IParser::Pos & pos, Expected & expected, String & role_name)
-{
- return parseUserName(pos, expected, role_name);
-}
-
-/// Parses a comma-separated list of role names.
-inline bool parseRoleNames(IParser::Pos & pos, Expected & expected, Strings & role_names)
-{
- return parseUserNames(pos, expected, role_names);
-}
-
-}
+#pragma once
+
+#include <Parsers/IParser.h>
+
+
+namespace DB
+{
+/// Parses a user name. It can be a simple string or identifier or something like `name@host`.
+/// In the last case `host` specifies the hosts user is allowed to connect from.
+/// The `host` can be an ip address, ip subnet, or a host name.
+/// The % and _ wildcard characters are permitted in `host`.
+/// These have the same meaning as for pattern-matching operations performed with the LIKE operator.
+bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name);
+
+/// Parses a comma-separated list of user names.
+bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names);
+
+
+/// Parses either the 'CURRENT_USER' keyword (or some of its aliases).
+bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected);
+
+
+/// Parses a role name. It follows the same rules as a user name, but allowed hosts are never checked
+/// (because roles are not used to connect to server).
+inline bool parseRoleName(IParser::Pos & pos, Expected & expected, String & role_name)
+{
+ return parseUserName(pos, expected, role_name);
+}
+
+/// Parses a comma-separated list of role names.
+inline bool parseRoleNames(IParser::Pos & pos, Expected & expected, Strings & role_names)
+{
+ return parseUserNames(pos, expected, role_names);
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp
index 4800bfca2c..65e3998168 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Chunk.cpp
@@ -1,170 +1,170 @@
-#include <Processors/Chunk.h>
-#include <IO/WriteHelpers.h>
-#include <IO/Operators.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int POSITION_OUT_OF_BOUND;
-}
-
-Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_)
-{
- checkNumRowsIsConsistent();
-}
-
-Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
- : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_))
-{
- checkNumRowsIsConsistent();
-}
-
-static Columns unmuteColumns(MutableColumns && mut_columns)
-{
- Columns columns;
- columns.reserve(mut_columns.size());
- for (auto & col : mut_columns)
- columns.emplace_back(std::move(col));
-
- return columns;
-}
-
-Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_)
- : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_)
-{
- checkNumRowsIsConsistent();
-}
-
-Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
- : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_))
-{
- checkNumRowsIsConsistent();
-}
-
-Chunk Chunk::clone() const
-{
- return Chunk(getColumns(), getNumRows(), chunk_info);
-}
-
-void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
-{
- columns = std::move(columns_);
- num_rows = num_rows_;
- checkNumRowsIsConsistent();
-}
-
-void Chunk::setColumns(MutableColumns columns_, UInt64 num_rows_)
-{
- columns = unmuteColumns(std::move(columns_));
- num_rows = num_rows_;
- checkNumRowsIsConsistent();
-}
-
-void Chunk::checkNumRowsIsConsistent()
-{
+#include <Processors/Chunk.h>
+#include <IO/WriteHelpers.h>
+#include <IO/Operators.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int POSITION_OUT_OF_BOUND;
+}
+
+Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_)
+{
+ checkNumRowsIsConsistent();
+}
+
+Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
+ : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_))
+{
+ checkNumRowsIsConsistent();
+}
+
+static Columns unmuteColumns(MutableColumns && mut_columns)
+{
+ Columns columns;
+ columns.reserve(mut_columns.size());
+ for (auto & col : mut_columns)
+ columns.emplace_back(std::move(col));
+
+ return columns;
+}
+
+Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_)
+ : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_)
+{
+ checkNumRowsIsConsistent();
+}
+
+Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
+ : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_))
+{
+ checkNumRowsIsConsistent();
+}
+
+Chunk Chunk::clone() const
+{
+ return Chunk(getColumns(), getNumRows(), chunk_info);
+}
+
+void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
+{
+ columns = std::move(columns_);
+ num_rows = num_rows_;
+ checkNumRowsIsConsistent();
+}
+
+void Chunk::setColumns(MutableColumns columns_, UInt64 num_rows_)
+{
+ columns = unmuteColumns(std::move(columns_));
+ num_rows = num_rows_;
+ checkNumRowsIsConsistent();
+}
+
+void Chunk::checkNumRowsIsConsistent()
+{
for (size_t i = 0; i < columns.size(); ++i)
{
auto & column = columns[i];
- if (column->size() != num_rows)
+ if (column->size() != num_rows)
throw Exception("Invalid number of rows in Chunk column " + column->getName()+ " position " + toString(i) + ": expected " +
- toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
+ toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
}
-}
-
-MutableColumns Chunk::mutateColumns()
-{
- size_t num_columns = columns.size();
- MutableColumns mut_columns(num_columns);
- for (size_t i = 0; i < num_columns; ++i)
- mut_columns[i] = IColumn::mutate(std::move(columns[i]));
-
- columns.clear();
- num_rows = 0;
-
- return mut_columns;
-}
-
-MutableColumns Chunk::cloneEmptyColumns() const
-{
- size_t num_columns = columns.size();
- MutableColumns mut_columns(num_columns);
- for (size_t i = 0; i < num_columns; ++i)
- mut_columns[i] = columns[i]->cloneEmpty();
- return mut_columns;
-}
-
-Columns Chunk::detachColumns()
-{
- num_rows = 0;
- return std::move(columns);
-}
-
-void Chunk::addColumn(ColumnPtr column)
-{
- if (column->size() != num_rows)
- throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " +
- toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
-
- columns.emplace_back(std::move(column));
-}
-
-void Chunk::erase(size_t position)
-{
- if (columns.empty())
- throw Exception("Chunk is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
-
- if (position >= columns.size())
- throw Exception("Position " + toString(position) + " out of bound in Chunk::erase(), max position = "
- + toString(columns.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
-
- columns.erase(columns.begin() + position);
-}
-
-UInt64 Chunk::bytes() const
-{
- UInt64 res = 0;
- for (const auto & column : columns)
- res += column->byteSize();
-
- return res;
-}
-
-UInt64 Chunk::allocatedBytes() const
-{
- UInt64 res = 0;
- for (const auto & column : columns)
- res += column->allocatedBytes();
-
- return res;
-}
-
-std::string Chunk::dumpStructure() const
-{
- WriteBufferFromOwnString out;
- for (const auto & column : columns)
- out << ' ' << column->dumpStructure();
-
- return out.str();
-}
-
-
-void ChunkMissingValues::setBit(size_t column_idx, size_t row_idx)
-{
- RowsBitMask & mask = rows_mask_by_column_id[column_idx];
- mask.resize(row_idx + 1);
- mask[row_idx] = true;
-}
-
-const ChunkMissingValues::RowsBitMask & ChunkMissingValues::getDefaultsBitmask(size_t column_idx) const
-{
- static RowsBitMask none;
- auto it = rows_mask_by_column_id.find(column_idx);
- if (it != rows_mask_by_column_id.end())
- return it->second;
- return none;
-}
-
-}
+}
+
+MutableColumns Chunk::mutateColumns()
+{
+ size_t num_columns = columns.size();
+ MutableColumns mut_columns(num_columns);
+ for (size_t i = 0; i < num_columns; ++i)
+ mut_columns[i] = IColumn::mutate(std::move(columns[i]));
+
+ columns.clear();
+ num_rows = 0;
+
+ return mut_columns;
+}
+
+MutableColumns Chunk::cloneEmptyColumns() const
+{
+ size_t num_columns = columns.size();
+ MutableColumns mut_columns(num_columns);
+ for (size_t i = 0; i < num_columns; ++i)
+ mut_columns[i] = columns[i]->cloneEmpty();
+ return mut_columns;
+}
+
+Columns Chunk::detachColumns()
+{
+ num_rows = 0;
+ return std::move(columns);
+}
+
+void Chunk::addColumn(ColumnPtr column)
+{
+ if (column->size() != num_rows)
+ throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " +
+ toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
+
+ columns.emplace_back(std::move(column));
+}
+
+void Chunk::erase(size_t position)
+{
+ if (columns.empty())
+ throw Exception("Chunk is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
+
+ if (position >= columns.size())
+ throw Exception("Position " + toString(position) + " out of bound in Chunk::erase(), max position = "
+ + toString(columns.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
+
+ columns.erase(columns.begin() + position);
+}
+
+UInt64 Chunk::bytes() const
+{
+ UInt64 res = 0;
+ for (const auto & column : columns)
+ res += column->byteSize();
+
+ return res;
+}
+
+UInt64 Chunk::allocatedBytes() const
+{
+ UInt64 res = 0;
+ for (const auto & column : columns)
+ res += column->allocatedBytes();
+
+ return res;
+}
+
+std::string Chunk::dumpStructure() const
+{
+ WriteBufferFromOwnString out;
+ for (const auto & column : columns)
+ out << ' ' << column->dumpStructure();
+
+ return out.str();
+}
+
+
+void ChunkMissingValues::setBit(size_t column_idx, size_t row_idx)
+{
+ RowsBitMask & mask = rows_mask_by_column_id[column_idx];
+ mask.resize(row_idx + 1);
+ mask[row_idx] = true;
+}
+
+const ChunkMissingValues::RowsBitMask & ChunkMissingValues::getDefaultsBitmask(size_t column_idx) const
+{
+ static RowsBitMask none;
+ auto it = rows_mask_by_column_id.find(column_idx);
+ if (it != rows_mask_by_column_id.end())
+ return it->second;
+ return none;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp
index f4648caf0f..5eb4504027 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.cpp
@@ -1,64 +1,64 @@
-#include <Processors/ConcatProcessor.h>
-
-
-namespace DB
-{
-
-ConcatProcessor::ConcatProcessor(const Block & header, size_t num_inputs)
- : IProcessor(InputPorts(num_inputs, header), OutputPorts{header}), current_input(inputs.begin())
-{
-}
-
-ConcatProcessor::Status ConcatProcessor::prepare()
-{
- auto & output = outputs.front();
-
- /// Check can output.
-
- if (output.isFinished())
- {
- for (; current_input != inputs.end(); ++current_input)
- current_input->close();
-
- return Status::Finished;
- }
-
- if (!output.isNeeded())
- {
- if (current_input != inputs.end())
- current_input->setNotNeeded();
-
- return Status::PortFull;
- }
-
- if (!output.canPush())
- return Status::PortFull;
-
- /// Check can input.
-
- while (current_input != inputs.end() && current_input->isFinished())
- ++current_input;
-
- if (current_input == inputs.end())
- {
- output.finish();
- return Status::Finished;
- }
-
- auto & input = *current_input;
-
- input.setNeeded();
-
- if (!input.hasData())
- return Status::NeedData;
-
- /// Move data.
- output.push(input.pull());
-
- /// Now, we pushed to output, and it must be full.
- return Status::PortFull;
-}
-
-}
-
-
+#include <Processors/ConcatProcessor.h>
+
+
+namespace DB
+{
+
+ConcatProcessor::ConcatProcessor(const Block & header, size_t num_inputs)
+ : IProcessor(InputPorts(num_inputs, header), OutputPorts{header}), current_input(inputs.begin())
+{
+}
+
+ConcatProcessor::Status ConcatProcessor::prepare()
+{
+ auto & output = outputs.front();
+
+ /// Check can output.
+
+ if (output.isFinished())
+ {
+ for (; current_input != inputs.end(); ++current_input)
+ current_input->close();
+
+ return Status::Finished;
+ }
+
+ if (!output.isNeeded())
+ {
+ if (current_input != inputs.end())
+ current_input->setNotNeeded();
+
+ return Status::PortFull;
+ }
+
+ if (!output.canPush())
+ return Status::PortFull;
+
+ /// Check can input.
+
+ while (current_input != inputs.end() && current_input->isFinished())
+ ++current_input;
+
+ if (current_input == inputs.end())
+ {
+ output.finish();
+ return Status::Finished;
+ }
+
+ auto & input = *current_input;
+
+ input.setNeeded();
+
+ if (!input.hasData())
+ return Status::NeedData;
+
+ /// Move data.
+ output.push(input.pull());
+
+ /// Now, we pushed to output, and it must be full.
+ return Status::PortFull;
+}
+
+}
+
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h
index 4a1fc58041..b60d07e516 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ConcatProcessor.h
@@ -1,32 +1,32 @@
-#pragma once
-
-#include <Processors/IProcessor.h>
-
-
-namespace DB
-{
-
+#pragma once
+
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
/** Has arbitrary non zero number of inputs and one output.
- * All of them have the same structure.
- *
- * Pulls all data from first input, then all data from second input, etc...
- * Doesn't do any heavy calculations.
- * Preserves an order of data.
- */
-class ConcatProcessor : public IProcessor
-{
-public:
- ConcatProcessor(const Block & header, size_t num_inputs);
-
- String getName() const override { return "Concat"; }
-
- Status prepare() override;
-
- OutputPort & getOutputPort() { return outputs.front(); }
-
-private:
- InputPorts::iterator current_input;
-};
-
-}
-
+ * All of them have the same structure.
+ *
+ * Pulls all data from first input, then all data from second input, etc...
+ * Doesn't do any heavy calculations.
+ * Preserves an order of data.
+ */
+class ConcatProcessor : public IProcessor
+{
+public:
+ ConcatProcessor(const Block & header, size_t num_inputs);
+
+ String getName() const override { return "Concat"; }
+
+ Status prepare() override;
+
+ OutputPort & getOutputPort() { return outputs.front(); }
+
+private:
+ InputPorts::iterator current_input;
+};
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index d62cc112d1..19eb0f030b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -1,508 +1,508 @@
-#include <IO/ReadHelpers.h>
-#include <IO/Operators.h>
-
-#include <Formats/verbosePrintString.h>
-#include <Processors/Formats/Impl/CSVRowInputFormat.h>
-#include <Formats/FormatFactory.h>
+#include <IO/ReadHelpers.h>
+#include <IO/Operators.h>
+
+#include <Formats/verbosePrintString.h>
+#include <Processors/Formats/Impl/CSVRowInputFormat.h>
+#include <Formats/FormatFactory.h>
#include <DataTypes/Serializations/SerializationNullable.h>
-#include <DataTypes/DataTypeNothing.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int BAD_ARGUMENTS;
- extern const int INCORRECT_DATA;
+#include <DataTypes/DataTypeNothing.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+ extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
-}
-
-
-CSVRowInputFormat::CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
- bool with_names_, const FormatSettings & format_settings_)
- : RowInputFormatWithDiagnosticInfo(header_, in_, params_)
- , with_names(with_names_)
- , format_settings(format_settings_)
-{
-
- const String bad_delimiters = " \t\"'.UL";
- if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos)
- throw Exception(String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter +
- "'. Try use CustomSeparated format instead.", ErrorCodes::BAD_ARGUMENTS);
-
- const auto & sample = getPort().getHeader();
- size_t num_columns = sample.columns();
-
- data_types.resize(num_columns);
- column_indexes_by_names.reserve(num_columns);
-
- for (size_t i = 0; i < num_columns; ++i)
- {
- const auto & column_info = sample.getByPosition(i);
-
- data_types[i] = column_info.type;
- column_indexes_by_names.emplace(column_info.name, i);
- }
-}
-
-
-/// Map an input file column to a table column, based on its name.
-void CSVRowInputFormat::addInputColumn(const String & column_name)
-{
- const auto column_it = column_indexes_by_names.find(column_name);
- if (column_it == column_indexes_by_names.end())
- {
- if (format_settings.skip_unknown_fields)
- {
+}
+
+
+CSVRowInputFormat::CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
+ bool with_names_, const FormatSettings & format_settings_)
+ : RowInputFormatWithDiagnosticInfo(header_, in_, params_)
+ , with_names(with_names_)
+ , format_settings(format_settings_)
+{
+
+ const String bad_delimiters = " \t\"'.UL";
+ if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos)
+ throw Exception(String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter +
+ "'. Try use CustomSeparated format instead.", ErrorCodes::BAD_ARGUMENTS);
+
+ const auto & sample = getPort().getHeader();
+ size_t num_columns = sample.columns();
+
+ data_types.resize(num_columns);
+ column_indexes_by_names.reserve(num_columns);
+
+ for (size_t i = 0; i < num_columns; ++i)
+ {
+ const auto & column_info = sample.getByPosition(i);
+
+ data_types[i] = column_info.type;
+ column_indexes_by_names.emplace(column_info.name, i);
+ }
+}
+
+
+/// Map an input file column to a table column, based on its name.
+void CSVRowInputFormat::addInputColumn(const String & column_name)
+{
+ const auto column_it = column_indexes_by_names.find(column_name);
+ if (column_it == column_indexes_by_names.end())
+ {
+ if (format_settings.skip_unknown_fields)
+ {
column_mapping->column_indexes_for_input_fields.push_back(std::nullopt);
- return;
- }
-
- throw Exception(
- "Unknown field found in CSV header: '" + column_name + "' " +
+ return;
+ }
+
+ throw Exception(
+ "Unknown field found in CSV header: '" + column_name + "' " +
"at position " + std::to_string(column_mapping->column_indexes_for_input_fields.size()) +
- "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed",
- ErrorCodes::INCORRECT_DATA
- );
- }
-
- const auto column_index = column_it->second;
-
+ "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed",
+ ErrorCodes::INCORRECT_DATA
+ );
+ }
+
+ const auto column_index = column_it->second;
+
if (column_mapping->read_columns[column_index])
- throw Exception("Duplicate field found while parsing CSV header: " + column_name, ErrorCodes::INCORRECT_DATA);
-
+ throw Exception("Duplicate field found while parsing CSV header: " + column_name, ErrorCodes::INCORRECT_DATA);
+
column_mapping->read_columns[column_index] = true;
column_mapping->column_indexes_for_input_fields.emplace_back(column_index);
-}
-
-static void skipEndOfLine(ReadBuffer & in)
-{
- /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
-
- if (*in.position() == '\n')
- {
- ++in.position();
- if (!in.eof() && *in.position() == '\r')
- ++in.position();
- }
- else if (*in.position() == '\r')
- {
- ++in.position();
- if (!in.eof() && *in.position() == '\n')
- ++in.position();
- else
- throw Exception("Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)."
- " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.", ErrorCodes::INCORRECT_DATA);
- }
- else if (!in.eof())
- throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA);
-}
-
-
-static void skipDelimiter(ReadBuffer & in, const char delimiter, bool is_last_column)
-{
- if (is_last_column)
- {
- if (in.eof())
- return;
-
- /// we support the extra delimiter at the end of the line
- if (*in.position() == delimiter)
- {
- ++in.position();
- if (in.eof())
- return;
- }
-
- skipEndOfLine(in);
- }
- else
- assertChar(delimiter, in);
-}
-
-
-/// Skip `whitespace` symbols allowed in CSV.
-static inline void skipWhitespacesAndTabs(ReadBuffer & in)
-{
- while (!in.eof()
- && (*in.position() == ' '
- || *in.position() == '\t'))
- ++in.position();
-}
-
-
-static void skipRow(ReadBuffer & in, const FormatSettings::CSV & settings, size_t num_columns)
-{
- String tmp;
- for (size_t i = 0; i < num_columns; ++i)
- {
- skipWhitespacesAndTabs(in);
- readCSVString(tmp, in, settings);
- skipWhitespacesAndTabs(in);
-
- skipDelimiter(in, settings.delimiter, i + 1 == num_columns);
- }
-}
-
+}
+
+static void skipEndOfLine(ReadBuffer & in)
+{
+ /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
+
+ if (*in.position() == '\n')
+ {
+ ++in.position();
+ if (!in.eof() && *in.position() == '\r')
+ ++in.position();
+ }
+ else if (*in.position() == '\r')
+ {
+ ++in.position();
+ if (!in.eof() && *in.position() == '\n')
+ ++in.position();
+ else
+ throw Exception("Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)."
+ " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.", ErrorCodes::INCORRECT_DATA);
+ }
+ else if (!in.eof())
+ throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA);
+}
+
+
+static void skipDelimiter(ReadBuffer & in, const char delimiter, bool is_last_column)
+{
+ if (is_last_column)
+ {
+ if (in.eof())
+ return;
+
+ /// we support the extra delimiter at the end of the line
+ if (*in.position() == delimiter)
+ {
+ ++in.position();
+ if (in.eof())
+ return;
+ }
+
+ skipEndOfLine(in);
+ }
+ else
+ assertChar(delimiter, in);
+}
+
+
+/// Skip `whitespace` symbols allowed in CSV.
+static inline void skipWhitespacesAndTabs(ReadBuffer & in)
+{
+ while (!in.eof()
+ && (*in.position() == ' '
+ || *in.position() == '\t'))
+ ++in.position();
+}
+
+
+static void skipRow(ReadBuffer & in, const FormatSettings::CSV & settings, size_t num_columns)
+{
+ String tmp;
+ for (size_t i = 0; i < num_columns; ++i)
+ {
+ skipWhitespacesAndTabs(in);
+ readCSVString(tmp, in, settings);
+ skipWhitespacesAndTabs(in);
+
+ skipDelimiter(in, settings.delimiter, i + 1 == num_columns);
+ }
+}
+
void CSVRowInputFormat::setupAllColumnsByTableSchema()
{
const auto & header = getPort().getHeader();
column_mapping->read_columns.assign(header.columns(), true);
column_mapping->column_indexes_for_input_fields.resize(header.columns());
-
+
for (size_t i = 0; i < column_mapping->column_indexes_for_input_fields.size(); ++i)
column_mapping->column_indexes_for_input_fields[i] = i;
}
-void CSVRowInputFormat::readPrefix()
-{
- /// In this format, we assume, that if first string field contain BOM as value, it will be written in quotes,
- /// so BOM at beginning of stream cannot be confused with BOM in first string value, and it is safe to skip it.
- skipBOMIfExists(in);
-
- size_t num_columns = data_types.size();
- const auto & header = getPort().getHeader();
-
+void CSVRowInputFormat::readPrefix()
+{
+ /// In this format, we assume, that if first string field contain BOM as value, it will be written in quotes,
+ /// so BOM at beginning of stream cannot be confused with BOM in first string value, and it is safe to skip it.
+ skipBOMIfExists(in);
+
+ size_t num_columns = data_types.size();
+ const auto & header = getPort().getHeader();
+
/// This is a bit of abstraction leakage, but we have almost the same code in other places.
/// Thus, we check if this InputFormat is working with the "real" beginning of the data in case of parallel parsing.
if (with_names && getCurrentUnitNumber() == 0)
- {
- /// This CSV file has a header row with column names. Depending on the
- /// settings, use it or skip it.
- if (format_settings.with_names_use_header)
- {
- /// Look at the file header to see which columns we have there.
- /// The missing columns are filled with defaults.
+ {
+ /// This CSV file has a header row with column names. Depending on the
+ /// settings, use it or skip it.
+ if (format_settings.with_names_use_header)
+ {
+ /// Look at the file header to see which columns we have there.
+ /// The missing columns are filled with defaults.
column_mapping->read_columns.assign(header.columns(), false);
- do
- {
- String column_name;
- skipWhitespacesAndTabs(in);
- readCSVString(column_name, in, format_settings.csv);
- skipWhitespacesAndTabs(in);
-
- addInputColumn(column_name);
- }
- while (checkChar(format_settings.csv.delimiter, in));
-
- skipDelimiter(in, format_settings.csv.delimiter, true);
-
+ do
+ {
+ String column_name;
+ skipWhitespacesAndTabs(in);
+ readCSVString(column_name, in, format_settings.csv);
+ skipWhitespacesAndTabs(in);
+
+ addInputColumn(column_name);
+ }
+ while (checkChar(format_settings.csv.delimiter, in));
+
+ skipDelimiter(in, format_settings.csv.delimiter, true);
+
for (auto read_column : column_mapping->read_columns)
- {
- if (!read_column)
- {
+ {
+ if (!read_column)
+ {
column_mapping->have_always_default_columns = true;
- break;
- }
- }
-
- return;
- }
- else
+ break;
+ }
+ }
+
+ return;
+ }
+ else
{
- skipRow(in, format_settings.csv, num_columns);
+ skipRow(in, format_settings.csv, num_columns);
setupAllColumnsByTableSchema();
}
- }
+ }
else if (!column_mapping->is_set)
setupAllColumnsByTableSchema();
-}
-
-
-bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
-{
- if (in.eof())
- return false;
-
- updateDiagnosticInfo();
-
- /// Track whether we have to fill any columns in this row with default
- /// values. If not, we return an empty column mask to the caller, so that
- /// it doesn't have to check it.
+}
+
+
+bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
+{
+ if (in.eof())
+ return false;
+
+ updateDiagnosticInfo();
+
+ /// Track whether we have to fill any columns in this row with default
+ /// values. If not, we return an empty column mask to the caller, so that
+ /// it doesn't have to check it.
bool have_default_columns = column_mapping->have_always_default_columns;
-
+
ext.read_columns.assign(column_mapping->read_columns.size(), true);
- const auto delimiter = format_settings.csv.delimiter;
+ const auto delimiter = format_settings.csv.delimiter;
for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column)
- {
+ {
const auto & table_column = column_mapping->column_indexes_for_input_fields[file_column];
const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size();
-
- if (table_column)
- {
- skipWhitespacesAndTabs(in);
+
+ if (table_column)
+ {
+ skipWhitespacesAndTabs(in);
ext.read_columns[*table_column] = readField(*columns[*table_column], data_types[*table_column],
serializations[*table_column], is_last_file_column);
- if (!ext.read_columns[*table_column])
- have_default_columns = true;
- skipWhitespacesAndTabs(in);
- }
- else
- {
- /// We never read this column from the file, just skip it.
- String tmp;
- readCSVString(tmp, in, format_settings.csv);
- }
-
- skipDelimiter(in, delimiter, is_last_file_column);
- }
-
- if (have_default_columns)
- {
+ if (!ext.read_columns[*table_column])
+ have_default_columns = true;
+ skipWhitespacesAndTabs(in);
+ }
+ else
+ {
+ /// We never read this column from the file, just skip it.
+ String tmp;
+ readCSVString(tmp, in, format_settings.csv);
+ }
+
+ skipDelimiter(in, delimiter, is_last_file_column);
+ }
+
+ if (have_default_columns)
+ {
for (size_t i = 0; i < column_mapping->read_columns.size(); i++)
- {
+ {
if (!column_mapping->read_columns[i])
- {
- /// The column value for this row is going to be overwritten
- /// with default by the caller, but the general assumption is
- /// that the column size increases for each row, so we have
- /// to insert something. Since we do not care about the exact
- /// value, we do not have to use the default value specified by
- /// the data type, and can just use IColumn::insertDefault().
- columns[i]->insertDefault();
- ext.read_columns[i] = false;
- }
- }
- }
-
- return true;
-}
-
-bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
-{
- const char delimiter = format_settings.csv.delimiter;
-
+ {
+ /// The column value for this row is going to be overwritten
+ /// with default by the caller, but the general assumption is
+ /// that the column size increases for each row, so we have
+ /// to insert something. Since we do not care about the exact
+ /// value, we do not have to use the default value specified by
+ /// the data type, and can just use IColumn::insertDefault().
+ columns[i]->insertDefault();
+ ext.read_columns[i] = false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
+{
+ const char delimiter = format_settings.csv.delimiter;
+
for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column)
- {
- if (file_column == 0 && in.eof())
- {
- out << "<End of stream>\n";
- return false;
- }
-
- skipWhitespacesAndTabs(in);
+ {
+ if (file_column == 0 && in.eof())
+ {
+ out << "<End of stream>\n";
+ return false;
+ }
+
+ skipWhitespacesAndTabs(in);
if (column_mapping->column_indexes_for_input_fields[file_column].has_value())
- {
- const auto & header = getPort().getHeader();
+ {
+ const auto & header = getPort().getHeader();
size_t col_idx = column_mapping->column_indexes_for_input_fields[file_column].value();
- if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx],
- out, file_column))
- return false;
- }
- else
- {
- static const String skipped_column_str = "<SKIPPED COLUMN>";
- static const DataTypePtr skipped_column_type = std::make_shared<DataTypeNothing>();
- static const MutableColumnPtr skipped_column = skipped_column_type->createColumn();
- if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column))
- return false;
- }
- skipWhitespacesAndTabs(in);
-
- /// Delimiters
+ if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx],
+ out, file_column))
+ return false;
+ }
+ else
+ {
+ static const String skipped_column_str = "<SKIPPED COLUMN>";
+ static const DataTypePtr skipped_column_type = std::make_shared<DataTypeNothing>();
+ static const MutableColumnPtr skipped_column = skipped_column_type->createColumn();
+ if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column))
+ return false;
+ }
+ skipWhitespacesAndTabs(in);
+
+ /// Delimiters
if (file_column + 1 == column_mapping->column_indexes_for_input_fields.size())
- {
- if (in.eof())
- return false;
-
- /// we support the extra delimiter at the end of the line
- if (*in.position() == delimiter)
- {
- ++in.position();
- if (in.eof())
- break;
- }
-
- if (!in.eof() && *in.position() != '\n' && *in.position() != '\r')
- {
- out << "ERROR: There is no line feed. ";
- verbosePrintString(in.position(), in.position() + 1, out);
- out << " found instead.\n"
- " It's like your file has more columns than expected.\n"
+ {
+ if (in.eof())
+ return false;
+
+ /// we support the extra delimiter at the end of the line
+ if (*in.position() == delimiter)
+ {
+ ++in.position();
+ if (in.eof())
+ break;
+ }
+
+ if (!in.eof() && *in.position() != '\n' && *in.position() != '\r')
+ {
+ out << "ERROR: There is no line feed. ";
+ verbosePrintString(in.position(), in.position() + 1, out);
+ out << " found instead.\n"
+ " It's like your file has more columns than expected.\n"
"And if your file has the right number of columns, maybe it has an unquoted string value with a comma.\n";
-
- return false;
- }
-
- skipEndOfLine(in);
- }
- else
- {
- try
- {
- assertChar(delimiter, in);
- }
- catch (const DB::Exception &)
- {
- if (*in.position() == '\n' || *in.position() == '\r')
- {
- out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected."
- " It's like your file has less columns than expected.\n"
+
+ return false;
+ }
+
+ skipEndOfLine(in);
+ }
+ else
+ {
+ try
+ {
+ assertChar(delimiter, in);
+ }
+ catch (const DB::Exception &)
+ {
+ if (*in.position() == '\n' || *in.position() == '\r')
+ {
+ out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected."
+ " It's like your file has less columns than expected.\n"
"And if your file has the right number of columns, maybe it has unescaped quotes in values.\n";
- }
- else
- {
- out << "ERROR: There is no delimiter (" << delimiter << "). ";
- verbosePrintString(in.position(), in.position() + 1, out);
- out << " found instead.\n";
- }
- return false;
- }
- }
- }
-
- return true;
-}
-
-
-void CSVRowInputFormat::syncAfterError()
-{
- skipToNextLineOrEOF(in);
-}
-
-void CSVRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
-{
+ }
+ else
+ {
+ out << "ERROR: There is no delimiter (" << delimiter << "). ";
+ verbosePrintString(in.position(), in.position() + 1, out);
+ out << " found instead.\n";
+ }
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+void CSVRowInputFormat::syncAfterError()
+{
+ skipToNextLineOrEOF(in);
+}
+
+void CSVRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
+{
const auto & index = column_mapping->column_indexes_for_input_fields[file_column];
if (index)
- {
+ {
const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size();
readField(column, type, serializations[*index], is_last_file_column);
- }
- else
- {
- String tmp;
- readCSVString(tmp, in, format_settings.csv);
- }
-}
-
+ }
+ else
+ {
+ String tmp;
+ readCSVString(tmp, in, format_settings.csv);
+ }
+}
+
bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column)
-{
- const bool at_delimiter = !in.eof() && *in.position() == format_settings.csv.delimiter;
- const bool at_last_column_line_end = is_last_file_column
- && (in.eof() || *in.position() == '\n' || *in.position() == '\r');
-
- /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default
- /// only one empty or NULL column will be expected
- if (format_settings.csv.empty_as_default
- && (at_delimiter || at_last_column_line_end))
- {
- /// Treat empty unquoted column value as default value, if
- /// specified in the settings. Tuple columns might seem
- /// problematic, because they are never quoted but still contain
- /// commas, which might be also used as delimiters. However,
- /// they do not contain empty unquoted fields, so this check
- /// works for tuples as well.
- column.insertDefault();
- return false;
- }
- else if (format_settings.null_as_default && !type->isNullable())
- {
- /// If value is null but type is not nullable then use default value instead.
+{
+ const bool at_delimiter = !in.eof() && *in.position() == format_settings.csv.delimiter;
+ const bool at_last_column_line_end = is_last_file_column
+ && (in.eof() || *in.position() == '\n' || *in.position() == '\r');
+
+ /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default
+ /// only one empty or NULL column will be expected
+ if (format_settings.csv.empty_as_default
+ && (at_delimiter || at_last_column_line_end))
+ {
+ /// Treat empty unquoted column value as default value, if
+ /// specified in the settings. Tuple columns might seem
+ /// problematic, because they are never quoted but still contain
+ /// commas, which might be also used as delimiters. However,
+ /// they do not contain empty unquoted fields, so this check
+ /// works for tuples as well.
+ column.insertDefault();
+ return false;
+ }
+ else if (format_settings.null_as_default && !type->isNullable())
+ {
+ /// If value is null but type is not nullable then use default value instead.
return SerializationNullable::deserializeTextCSVImpl(column, in, format_settings, serialization);
- }
- else
- {
- /// Read the column normally.
+ }
+ else
+ {
+ /// Read the column normally.
serialization->deserializeTextCSV(column, in, format_settings);
- return true;
- }
-}
-
-void CSVRowInputFormat::resetParser()
-{
- RowInputFormatWithDiagnosticInfo::resetParser();
+ return true;
+ }
+}
+
+void CSVRowInputFormat::resetParser()
+{
+ RowInputFormatWithDiagnosticInfo::resetParser();
column_mapping->column_indexes_for_input_fields.clear();
column_mapping->read_columns.clear();
column_mapping->have_always_default_columns = false;
-}
-
-
-void registerInputFormatProcessorCSV(FormatFactory & factory)
-{
- for (bool with_names : {false, true})
- {
- factory.registerInputFormatProcessor(with_names ? "CSVWithNames" : "CSV", [=](
- ReadBuffer & buf,
- const Block & sample,
- IRowInputFormat::Params params,
- const FormatSettings & settings)
- {
- return std::make_shared<CSVRowInputFormat>(sample, buf, params, with_names, settings);
- });
- }
-}
-
+}
+
+
+void registerInputFormatProcessorCSV(FormatFactory & factory)
+{
+ for (bool with_names : {false, true})
+ {
+ factory.registerInputFormatProcessor(with_names ? "CSVWithNames" : "CSV", [=](
+ ReadBuffer & buf,
+ const Block & sample,
+ IRowInputFormat::Params params,
+ const FormatSettings & settings)
+ {
+ return std::make_shared<CSVRowInputFormat>(sample, buf, params, with_names, settings);
+ });
+ }
+}
+
static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
-{
- char * pos = in.position();
- bool quotes = false;
- bool need_more_data = true;
+{
+ char * pos = in.position();
+ bool quotes = false;
+ bool need_more_data = true;
size_t number_of_rows = 0;
-
- while (loadAtPosition(in, memory, pos) && need_more_data)
- {
- if (quotes)
- {
- pos = find_first_symbols<'"'>(pos, in.buffer().end());
+
+ while (loadAtPosition(in, memory, pos) && need_more_data)
+ {
+ if (quotes)
+ {
+ pos = find_first_symbols<'"'>(pos, in.buffer().end());
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
- continue;
+ continue;
else if (*pos == '"')
- {
- ++pos;
- if (loadAtPosition(in, memory, pos) && *pos == '"')
- ++pos;
- else
- quotes = false;
- }
- }
- else
- {
- pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end());
+ {
+ ++pos;
+ if (loadAtPosition(in, memory, pos) && *pos == '"')
+ ++pos;
+ else
+ quotes = false;
+ }
+ }
+ else
+ {
+ pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end());
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
- continue;
+ continue;
else if (*pos == '"')
- {
- quotes = true;
- ++pos;
- }
- else if (*pos == '\n')
- {
+ {
+ quotes = true;
+ ++pos;
+ }
+ else if (*pos == '\n')
+ {
++number_of_rows;
- if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
- need_more_data = false;
- ++pos;
- if (loadAtPosition(in, memory, pos) && *pos == '\r')
- ++pos;
- }
- else if (*pos == '\r')
- {
- if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
- need_more_data = false;
- ++pos;
- if (loadAtPosition(in, memory, pos) && *pos == '\n')
+ if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
+ need_more_data = false;
+ ++pos;
+ if (loadAtPosition(in, memory, pos) && *pos == '\r')
+ ++pos;
+ }
+ else if (*pos == '\r')
+ {
+ if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
+ need_more_data = false;
+ ++pos;
+ if (loadAtPosition(in, memory, pos) && *pos == '\n')
{
- ++pos;
+ ++pos;
++number_of_rows;
}
- }
- }
- }
-
- saveUpToPosition(in, memory, pos);
+ }
+ }
+ }
+
+ saveUpToPosition(in, memory, pos);
return {loadAtPosition(in, memory, pos), number_of_rows};
-}
-
-void registerFileSegmentationEngineCSV(FormatFactory & factory)
-{
- factory.registerFileSegmentationEngine("CSV", &fileSegmentationEngineCSVImpl);
+}
+
+void registerFileSegmentationEngineCSV(FormatFactory & factory)
+{
+ factory.registerFileSegmentationEngine("CSV", &fileSegmentationEngineCSVImpl);
factory.registerFileSegmentationEngine("CSVWithNames", &fileSegmentationEngineCSVImpl);
-}
-
-}
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h
index b6075745b3..69f8d85559 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CSVRowInputFormat.h
@@ -1,54 +1,54 @@
-#pragma once
-
-#include <optional>
-#include <unordered_map>
-
-#include <Core/Block.h>
-#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
-#include <Formats/FormatSettings.h>
-
-
-namespace DB
-{
-
-/** A stream for inputting data in csv format.
- * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values.
- */
-class CSVRowInputFormat : public RowInputFormatWithDiagnosticInfo
-{
-public:
- /** with_names - in the first line the header with column names
- */
- CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
- bool with_names_, const FormatSettings & format_settings_);
-
- String getName() const override { return "CSVRowInputFormat"; }
-
- bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
- void readPrefix() override;
- bool allowSyncAfterError() const override { return true; }
- void syncAfterError() override;
- void resetParser() override;
-
-private:
+#pragma once
+
+#include <optional>
+#include <unordered_map>
+
+#include <Core/Block.h>
+#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
+#include <Formats/FormatSettings.h>
+
+
+namespace DB
+{
+
+/** A stream for inputting data in csv format.
+ * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values.
+ */
+class CSVRowInputFormat : public RowInputFormatWithDiagnosticInfo
+{
+public:
+ /** with_names - in the first line the header with column names
+ */
+ CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
+ bool with_names_, const FormatSettings & format_settings_);
+
+ String getName() const override { return "CSVRowInputFormat"; }
+
+ bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
+ void readPrefix() override;
+ bool allowSyncAfterError() const override { return true; }
+ void syncAfterError() override;
+ void resetParser() override;
+
+private:
/// There fields are computed in constructor.
- bool with_names;
- const FormatSettings format_settings;
- DataTypes data_types;
- using IndexesMap = std::unordered_map<String, size_t>;
- IndexesMap column_indexes_by_names;
-
- void addInputColumn(const String & column_name);
-
+ bool with_names;
+ const FormatSettings format_settings;
+ DataTypes data_types;
+ using IndexesMap = std::unordered_map<String, size_t>;
+ IndexesMap column_indexes_by_names;
+
+ void addInputColumn(const String & column_name);
+
void setupAllColumnsByTableSchema();
- bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
- void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
- bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override
- {
- return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter && *pos != ' ' && *pos != '\t';
- }
-
+ bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
+ void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
+ bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override
+ {
+ return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter && *pos != ' ' && *pos != '\t';
+ }
+
bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column);
-};
-
-}
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
index 85937935f1..775a93cbbe 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@@ -1,31 +1,31 @@
-#include <IO/ReadHelpers.h>
-#include <Processors/Formats/Impl/TSKVRowInputFormat.h>
-#include <Formats/FormatFactory.h>
+#include <IO/ReadHelpers.h>
+#include <Processors/Formats/Impl/TSKVRowInputFormat.h>
+#include <Formats/FormatFactory.h>
#include <DataTypes/Serializations/SerializationNullable.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int INCORRECT_DATA;
- extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
- extern const int CANNOT_READ_ALL_DATA;
- extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
-}
-
-
-TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_)
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int INCORRECT_DATA;
+ extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
+ extern const int CANNOT_READ_ALL_DATA;
+ extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
+}
+
+
+TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_)
: IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_)
-{
- const auto & sample_block = getPort().getHeader();
- size_t num_columns = sample_block.columns();
- for (size_t i = 0; i < num_columns; ++i)
- name_map[sample_block.getByPosition(i).name] = i; /// NOTE You could place names more cache-locally.
-}
-
-
+{
+ const auto & sample_block = getPort().getHeader();
+ size_t num_columns = sample_block.columns();
+ for (size_t i = 0; i < num_columns; ++i)
+ name_map[sample_block.getByPosition(i).name] = i; /// NOTE You could place names more cache-locally.
+}
+
+
void TSKVRowInputFormat::readPrefix()
{
/// In this format, we assume that column name cannot contain BOM,
@@ -34,193 +34,193 @@ void TSKVRowInputFormat::readPrefix()
}
-/** Read the field name in the `tskv` format.
- * Return true if the field is followed by an equal sign,
- * otherwise (field with no value) return false.
- * The reference to the field name will be written to `ref`.
- * A temporary `tmp` buffer can also be used to copy the field name to it.
- * When reading, skips the name and the equal sign after it.
- */
-static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp)
-{
- tmp.clear();
-
- while (!buf.eof())
- {
- const char * next_pos = find_first_symbols<'\t', '\n', '\\', '='>(buf.position(), buf.buffer().end());
-
- if (next_pos == buf.buffer().end())
- {
- tmp.append(buf.position(), next_pos - buf.position());
+/** Read the field name in the `tskv` format.
+ * Return true if the field is followed by an equal sign,
+ * otherwise (field with no value) return false.
+ * The reference to the field name will be written to `ref`.
+ * A temporary `tmp` buffer can also be used to copy the field name to it.
+ * When reading, skips the name and the equal sign after it.
+ */
+static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp)
+{
+ tmp.clear();
+
+ while (!buf.eof())
+ {
+ const char * next_pos = find_first_symbols<'\t', '\n', '\\', '='>(buf.position(), buf.buffer().end());
+
+ if (next_pos == buf.buffer().end())
+ {
+ tmp.append(buf.position(), next_pos - buf.position());
buf.position() = buf.buffer().end();
- buf.next();
- continue;
- }
-
- /// Came to the end of the name.
- if (*next_pos != '\\')
- {
- bool have_value = *next_pos == '=';
- if (tmp.empty())
- {
- /// No need to copy data, you can refer directly to the `buf`.
- ref = StringRef(buf.position(), next_pos - buf.position());
- buf.position() += next_pos + have_value - buf.position();
- }
- else
- {
- /// Copy the data to a temporary string and return a reference to it.
- tmp.append(buf.position(), next_pos - buf.position());
- buf.position() += next_pos + have_value - buf.position();
- ref = StringRef(tmp);
- }
- return have_value;
- }
- /// The name has an escape sequence.
- else
- {
- tmp.append(buf.position(), next_pos - buf.position());
- buf.position() += next_pos + 1 - buf.position();
- if (buf.eof())
- throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
-
- tmp.push_back(parseEscapeSequence(*buf.position()));
- ++buf.position();
- continue;
- }
- }
-
+ buf.next();
+ continue;
+ }
+
+ /// Came to the end of the name.
+ if (*next_pos != '\\')
+ {
+ bool have_value = *next_pos == '=';
+ if (tmp.empty())
+ {
+ /// No need to copy data, you can refer directly to the `buf`.
+ ref = StringRef(buf.position(), next_pos - buf.position());
+ buf.position() += next_pos + have_value - buf.position();
+ }
+ else
+ {
+ /// Copy the data to a temporary string and return a reference to it.
+ tmp.append(buf.position(), next_pos - buf.position());
+ buf.position() += next_pos + have_value - buf.position();
+ ref = StringRef(tmp);
+ }
+ return have_value;
+ }
+ /// The name has an escape sequence.
+ else
+ {
+ tmp.append(buf.position(), next_pos - buf.position());
+ buf.position() += next_pos + 1 - buf.position();
+ if (buf.eof())
+ throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
+
+ tmp.push_back(parseEscapeSequence(*buf.position()));
+ ++buf.position();
+ continue;
+ }
+ }
+
throw ParsingException("Unexpected end of stream while reading key name from TSKV format", ErrorCodes::CANNOT_READ_ALL_DATA);
-}
-
-
-bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
-{
- if (in.eof())
- return false;
-
- const auto & header = getPort().getHeader();
- size_t num_columns = columns.size();
-
- /// Set of columns for which the values were read. The rest will be filled with default values.
- read_columns.assign(num_columns, false);
- seen_columns.assign(num_columns, false);
-
- if (unlikely(*in.position() == '\n'))
- {
- /// An empty string. It is permissible, but it is unclear why.
- ++in.position();
- }
- else
- {
- while (true)
- {
- StringRef name_ref;
- bool has_value = readName(in, name_ref, name_buf);
- ssize_t index = -1;
-
- if (has_value)
- {
- /// NOTE Optimization is possible by caching the order of fields (which is almost always the same)
- /// and quickly checking for the next expected field, instead of searching the hash table.
-
- auto * it = name_map.find(name_ref);
- if (!it)
- {
- if (!format_settings.skip_unknown_fields)
- throw Exception("Unknown field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
-
- /// If the key is not found, skip the value.
+}
+
+
+bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
+{
+ if (in.eof())
+ return false;
+
+ const auto & header = getPort().getHeader();
+ size_t num_columns = columns.size();
+
+ /// Set of columns for which the values were read. The rest will be filled with default values.
+ read_columns.assign(num_columns, false);
+ seen_columns.assign(num_columns, false);
+
+ if (unlikely(*in.position() == '\n'))
+ {
+ /// An empty string. It is permissible, but it is unclear why.
+ ++in.position();
+ }
+ else
+ {
+ while (true)
+ {
+ StringRef name_ref;
+ bool has_value = readName(in, name_ref, name_buf);
+ ssize_t index = -1;
+
+ if (has_value)
+ {
+ /// NOTE Optimization is possible by caching the order of fields (which is almost always the same)
+ /// and quickly checking for the next expected field, instead of searching the hash table.
+
+ auto * it = name_map.find(name_ref);
+ if (!it)
+ {
+ if (!format_settings.skip_unknown_fields)
+ throw Exception("Unknown field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
+
+ /// If the key is not found, skip the value.
NullOutput sink;
- readEscapedStringInto(sink, in);
- }
- else
- {
- index = it->getMapped();
-
- if (seen_columns[index])
- throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
-
- seen_columns[index] = read_columns[index] = true;
- const auto & type = getPort().getHeader().getByPosition(index).type;
+ readEscapedStringInto(sink, in);
+ }
+ else
+ {
+ index = it->getMapped();
+
+ if (seen_columns[index])
+ throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
+
+ seen_columns[index] = read_columns[index] = true;
+ const auto & type = getPort().getHeader().getByPosition(index).type;
const auto & serialization = serializations[index];
- if (format_settings.null_as_default && !type->isNullable())
+ if (format_settings.null_as_default && !type->isNullable())
read_columns[index] = SerializationNullable::deserializeTextEscapedImpl(*columns[index], in, format_settings, serialization);
- else
+ else
serialization->deserializeTextEscaped(*columns[index], in, format_settings);
- }
- }
- else
- {
- /// The only thing that can go without value is `tskv` fragment that is ignored.
- if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4)))
- throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
- }
-
- if (in.eof())
- {
+ }
+ }
+ else
+ {
+ /// The only thing that can go without value is `tskv` fragment that is ignored.
+ if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4)))
+ throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
+ }
+
+ if (in.eof())
+ {
throw ParsingException("Unexpected end of stream after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_READ_ALL_DATA);
- }
- else if (*in.position() == '\t')
- {
- ++in.position();
- continue;
- }
- else if (*in.position() == '\n')
- {
- ++in.position();
- break;
- }
- else
- {
- /// Possibly a garbage was written into column, remove it
- if (index >= 0)
- {
- columns[index]->popBack(1);
- seen_columns[index] = read_columns[index] = false;
- }
-
- throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
- }
- }
- }
-
- /// Fill in the not met columns with default values.
- for (size_t i = 0; i < num_columns; ++i)
- if (!seen_columns[i])
- header.getByPosition(i).type->insertDefaultInto(*columns[i]);
-
- /// return info about defaults set
- ext.read_columns = read_columns;
-
- return true;
-}
-
-
-void TSKVRowInputFormat::syncAfterError()
-{
- skipToUnescapedNextLineOrEOF(in);
-}
-
-
-void TSKVRowInputFormat::resetParser()
-{
- IRowInputFormat::resetParser();
- read_columns.clear();
- seen_columns.clear();
- name_buf.clear();
-}
-
-void registerInputFormatProcessorTSKV(FormatFactory & factory)
-{
- factory.registerInputFormatProcessor("TSKV", [](
- ReadBuffer & buf,
- const Block & sample,
- IRowInputFormat::Params params,
- const FormatSettings & settings)
- {
- return std::make_shared<TSKVRowInputFormat>(buf, sample, std::move(params), settings);
- });
-}
-
-}
+ }
+ else if (*in.position() == '\t')
+ {
+ ++in.position();
+ continue;
+ }
+ else if (*in.position() == '\n')
+ {
+ ++in.position();
+ break;
+ }
+ else
+ {
+ /// Possibly a garbage was written into column, remove it
+ if (index >= 0)
+ {
+ columns[index]->popBack(1);
+ seen_columns[index] = read_columns[index] = false;
+ }
+
+ throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
+ }
+ }
+ }
+
+ /// Fill in the not met columns with default values.
+ for (size_t i = 0; i < num_columns; ++i)
+ if (!seen_columns[i])
+ header.getByPosition(i).type->insertDefaultInto(*columns[i]);
+
+ /// return info about defaults set
+ ext.read_columns = read_columns;
+
+ return true;
+}
+
+
+void TSKVRowInputFormat::syncAfterError()
+{
+ skipToUnescapedNextLineOrEOF(in);
+}
+
+
+void TSKVRowInputFormat::resetParser()
+{
+ IRowInputFormat::resetParser();
+ read_columns.clear();
+ seen_columns.clear();
+ name_buf.clear();
+}
+
+void registerInputFormatProcessorTSKV(FormatFactory & factory)
+{
+ factory.registerInputFormatProcessor("TSKV", [](
+ ReadBuffer & buf,
+ const Block & sample,
+ IRowInputFormat::Params params,
+ const FormatSettings & settings)
+ {
+ return std::make_shared<TSKVRowInputFormat>(buf, sample, std::move(params), settings);
+ });
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h
index bc537158d9..15fe077e41 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TSKVRowInputFormat.h
@@ -1,55 +1,55 @@
-#pragma once
-
-#include <Core/Block.h>
-#include <Processors/Formats/IRowInputFormat.h>
-#include <Formats/FormatSettings.h>
-#include <Common/HashTable/HashMap.h>
-
-
-namespace DB
-{
-
-class ReadBuffer;
-
-
-/** Stream for reading data in TSKV format.
- * TSKV is a very inefficient data format.
- * Similar to TSV, but each field is written as key=value.
- * Fields can be listed in any order (including, in different lines there may be different order),
- * and some fields may be missing.
- * An equal sign can be escaped in the field name.
- * Also, as an additional element there may be a useless tskv fragment - it needs to be ignored.
- */
-class TSKVRowInputFormat : public IRowInputFormat
-{
-public:
- TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_);
-
- String getName() const override { return "TSKVRowInputFormat"; }
-
+#pragma once
+
+#include <Core/Block.h>
+#include <Processors/Formats/IRowInputFormat.h>
+#include <Formats/FormatSettings.h>
+#include <Common/HashTable/HashMap.h>
+
+
+namespace DB
+{
+
+class ReadBuffer;
+
+
+/** Stream for reading data in TSKV format.
+ * TSKV is a very inefficient data format.
+ * Similar to TSV, but each field is written as key=value.
+ * Fields can be listed in any order (including, in different lines there may be different order),
+ * and some fields may be missing.
+ * An equal sign can be escaped in the field name.
+ * Also, as an additional element there may be a useless tskv fragment - it needs to be ignored.
+ */
+class TSKVRowInputFormat : public IRowInputFormat
+{
+public:
+ TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_);
+
+ String getName() const override { return "TSKVRowInputFormat"; }
+
void readPrefix() override;
- bool readRow(MutableColumns & columns, RowReadExtension &) override;
- bool allowSyncAfterError() const override { return true; }
- void syncAfterError() override;
- void resetParser() override;
-
-
-private:
- const FormatSettings format_settings;
-
- /// Buffer for the read from the stream the field name. Used when you have to copy it.
- String name_buf;
-
- /// Hash table matching `field name -> position in the block`. NOTE You can use perfect hash map.
- using NameMap = HashMap<StringRef, size_t, StringRefHash>;
- NameMap name_map;
-
- /// Set of columns for which the values were read. The rest will be filled with default values.
- std::vector<UInt8> read_columns;
- /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name.
- std::vector<UInt8> seen_columns;
- /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true
- /// for row like ..., non-nullable column name=\N, ...
-};
-
-}
+ bool readRow(MutableColumns & columns, RowReadExtension &) override;
+ bool allowSyncAfterError() const override { return true; }
+ void syncAfterError() override;
+ void resetParser() override;
+
+
+private:
+ const FormatSettings format_settings;
+
+ /// Buffer for the read from the stream the field name. Used when you have to copy it.
+ String name_buf;
+
+ /// Hash table matching `field name -> position in the block`. NOTE You can use perfect hash map.
+ using NameMap = HashMap<StringRef, size_t, StringRefHash>;
+ NameMap name_map;
+
+ /// Set of columns for which the values were read. The rest will be filled with default values.
+ std::vector<UInt8> read_columns;
+ /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name.
+ std::vector<UInt8> seen_columns;
+ /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true
+ /// for row like ..., non-nullable column name=\N, ...
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 5d56ed1327..c3e6bcb47b 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -1,152 +1,152 @@
-#include <IO/ReadHelpers.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
-
-#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+
+#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
#include <Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h>
-#include <Formats/verbosePrintString.h>
-#include <Formats/FormatFactory.h>
-#include <DataTypes/DataTypeNothing.h>
+#include <Formats/verbosePrintString.h>
+#include <Formats/FormatFactory.h>
+#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/Serializations/SerializationNullable.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int INCORRECT_DATA;
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
-}
-
-
-static void skipTSVRow(ReadBuffer & in, const size_t num_columns)
-{
+}
+
+
+static void skipTSVRow(ReadBuffer & in, const size_t num_columns)
+{
NullOutput null_sink;
-
- for (size_t i = 0; i < num_columns; ++i)
- {
- readEscapedStringInto(null_sink, in);
- assertChar(i == num_columns - 1 ? '\n' : '\t', in);
- }
-}
-
-
-/** Check for a common error case - usage of Windows line feed.
- */
-static void checkForCarriageReturn(ReadBuffer & in)
-{
- if (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r'))
- throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row."
- "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format."
- " You must transform your file to Unix format."
- "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.",
- ErrorCodes::INCORRECT_DATA);
-}
-
-
-TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
- bool with_names_, bool with_types_, const FormatSettings & format_settings_)
- : RowInputFormatWithDiagnosticInfo(header_, in_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_)
-{
- const auto & sample = getPort().getHeader();
- size_t num_columns = sample.columns();
-
- data_types.resize(num_columns);
- column_indexes_by_names.reserve(num_columns);
-
- for (size_t i = 0; i < num_columns; ++i)
- {
- const auto & column_info = sample.getByPosition(i);
-
- data_types[i] = column_info.type;
- column_indexes_by_names.emplace(column_info.name, i);
- }
-
+
+ for (size_t i = 0; i < num_columns; ++i)
+ {
+ readEscapedStringInto(null_sink, in);
+ assertChar(i == num_columns - 1 ? '\n' : '\t', in);
+ }
+}
+
+
+/** Check for a common error case - usage of Windows line feed.
+ */
+static void checkForCarriageReturn(ReadBuffer & in)
+{
+ if (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r'))
+ throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row."
+ "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format."
+ " You must transform your file to Unix format."
+ "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.",
+ ErrorCodes::INCORRECT_DATA);
+}
+
+
+TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
+ bool with_names_, bool with_types_, const FormatSettings & format_settings_)
+ : RowInputFormatWithDiagnosticInfo(header_, in_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_)
+{
+ const auto & sample = getPort().getHeader();
+ size_t num_columns = sample.columns();
+
+ data_types.resize(num_columns);
+ column_indexes_by_names.reserve(num_columns);
+
+ for (size_t i = 0; i < num_columns; ++i)
+ {
+ const auto & column_info = sample.getByPosition(i);
+
+ data_types[i] = column_info.type;
+ column_indexes_by_names.emplace(column_info.name, i);
+ }
+
column_mapping->column_indexes_for_input_fields.reserve(num_columns);
column_mapping->read_columns.assign(num_columns, false);
-}
-
-
-void TabSeparatedRowInputFormat::setupAllColumnsByTableSchema()
-{
- const auto & header = getPort().getHeader();
+}
+
+
+void TabSeparatedRowInputFormat::setupAllColumnsByTableSchema()
+{
+ const auto & header = getPort().getHeader();
column_mapping->read_columns.assign(header.columns(), true);
column_mapping->column_indexes_for_input_fields.resize(header.columns());
-
+
for (size_t i = 0; i < column_mapping->column_indexes_for_input_fields.size(); ++i)
column_mapping->column_indexes_for_input_fields[i] = i;
-}
-
-
-void TabSeparatedRowInputFormat::addInputColumn(const String & column_name)
-{
- const auto column_it = column_indexes_by_names.find(column_name);
- if (column_it == column_indexes_by_names.end())
- {
- if (format_settings.skip_unknown_fields)
- {
+}
+
+
+void TabSeparatedRowInputFormat::addInputColumn(const String & column_name)
+{
+ const auto column_it = column_indexes_by_names.find(column_name);
+ if (column_it == column_indexes_by_names.end())
+ {
+ if (format_settings.skip_unknown_fields)
+ {
column_mapping->column_indexes_for_input_fields.push_back(std::nullopt);
- return;
- }
-
- throw Exception(
- "Unknown field found in TSV header: '" + column_name + "' " +
+ return;
+ }
+
+ throw Exception(
+ "Unknown field found in TSV header: '" + column_name + "' " +
"at position " + std::to_string(column_mapping->column_indexes_for_input_fields.size()) +
- "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed",
- ErrorCodes::INCORRECT_DATA
- );
- }
-
- const auto column_index = column_it->second;
-
+ "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed",
+ ErrorCodes::INCORRECT_DATA
+ );
+ }
+
+ const auto column_index = column_it->second;
+
if (column_mapping->read_columns[column_index])
- throw Exception("Duplicate field found while parsing TSV header: " + column_name, ErrorCodes::INCORRECT_DATA);
-
+ throw Exception("Duplicate field found while parsing TSV header: " + column_name, ErrorCodes::INCORRECT_DATA);
+
column_mapping->read_columns[column_index] = true;
column_mapping->column_indexes_for_input_fields.emplace_back(column_index);
-}
-
-
-void TabSeparatedRowInputFormat::fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension)
-{
- /// It is safe to memorize this on the first run - the format guarantees this does not change
- if (unlikely(row_num == 1))
- {
- columns_to_fill_with_default_values.clear();
+}
+
+
+void TabSeparatedRowInputFormat::fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension)
+{
+ /// It is safe to memorize this on the first run - the format guarantees this does not change
+ if (unlikely(row_num == 1))
+ {
+ columns_to_fill_with_default_values.clear();
for (size_t index = 0; index < column_mapping->read_columns.size(); ++index)
if (column_mapping->read_columns[index] == 0)
- columns_to_fill_with_default_values.push_back(index);
- }
-
- for (const auto column_index : columns_to_fill_with_default_values)
- {
- data_types[column_index]->insertDefaultInto(*columns[column_index]);
- row_read_extension.read_columns[column_index] = false;
- }
-}
-
-
-void TabSeparatedRowInputFormat::readPrefix()
-{
- if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8())
- {
- /// In this format, we assume that column name or type cannot contain BOM,
- /// so, if format has header,
- /// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it.
- skipBOMIfExists(in);
- }
-
+ columns_to_fill_with_default_values.push_back(index);
+ }
+
+ for (const auto column_index : columns_to_fill_with_default_values)
+ {
+ data_types[column_index]->insertDefaultInto(*columns[column_index]);
+ row_read_extension.read_columns[column_index] = false;
+ }
+}
+
+
+void TabSeparatedRowInputFormat::readPrefix()
+{
+ if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8())
+ {
+ /// In this format, we assume that column name or type cannot contain BOM,
+ /// so, if format has header,
+ /// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it.
+ skipBOMIfExists(in);
+ }
+
/// This is a bit of abstraction leakage, but we have almost the same code in other places.
/// Thus, we check if this InputFormat is working with the "real" beginning of the data in case of parallel parsing.
if (with_names && getCurrentUnitNumber() == 0)
- {
- if (format_settings.with_names_use_header)
- {
- String column_name;
+ {
+ if (format_settings.with_names_use_header)
+ {
+ String column_name;
for (;;)
- {
- readEscapedString(column_name, in);
+ {
+ readEscapedString(column_name, in);
if (!checkChar('\t', in))
{
/// Check last column for \r before adding it, otherwise an error will be:
@@ -157,188 +157,188 @@ void TabSeparatedRowInputFormat::readPrefix()
}
else
addInputColumn(column_name);
- }
-
-
- if (!in.eof())
- {
- assertChar('\n', in);
- }
- }
- else
- {
- setupAllColumnsByTableSchema();
+ }
+
+
+ if (!in.eof())
+ {
+ assertChar('\n', in);
+ }
+ }
+ else
+ {
+ setupAllColumnsByTableSchema();
skipTSVRow(in, column_mapping->column_indexes_for_input_fields.size());
- }
- }
+ }
+ }
else if (!column_mapping->is_set)
- setupAllColumnsByTableSchema();
-
- if (with_types)
- {
+ setupAllColumnsByTableSchema();
+
+ if (with_types)
+ {
skipTSVRow(in, column_mapping->column_indexes_for_input_fields.size());
- }
-}
-
-
-bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
-{
- if (in.eof())
- return false;
-
- updateDiagnosticInfo();
-
+ }
+}
+
+
+bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
+{
+ if (in.eof())
+ return false;
+
+ updateDiagnosticInfo();
+
ext.read_columns.assign(column_mapping->read_columns.size(), true);
for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column)
- {
+ {
const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column];
const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size();
- if (column_index)
- {
- const auto & type = data_types[*column_index];
+ if (column_index)
+ {
+ const auto & type = data_types[*column_index];
ext.read_columns[*column_index] = readField(*columns[*column_index], type, serializations[*column_index], is_last_file_column);
- }
- else
- {
+ }
+ else
+ {
NullOutput null_sink;
- readEscapedStringInto(null_sink, in);
- }
-
- /// skip separators
+ readEscapedStringInto(null_sink, in);
+ }
+
+ /// skip separators
if (file_column + 1 < column_mapping->column_indexes_for_input_fields.size())
- {
- assertChar('\t', in);
- }
- else if (!in.eof())
- {
- if (unlikely(row_num == 1))
- checkForCarriageReturn(in);
-
- assertChar('\n', in);
- }
- }
-
- fillUnreadColumnsWithDefaults(columns, ext);
-
- return true;
-}
-
-
+ {
+ assertChar('\t', in);
+ }
+ else if (!in.eof())
+ {
+ if (unlikely(row_num == 1))
+ checkForCarriageReturn(in);
+
+ assertChar('\n', in);
+ }
+ }
+
+ fillUnreadColumnsWithDefaults(columns, ext);
+
+ return true;
+}
+
+
bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type,
const SerializationPtr & serialization, bool is_last_file_column)
-{
- const bool at_delimiter = !is_last_file_column && !in.eof() && *in.position() == '\t';
- const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n');
-
- if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end))
- {
- column.insertDefault();
- return false;
- }
- else if (format_settings.null_as_default && !type->isNullable())
+{
+ const bool at_delimiter = !is_last_file_column && !in.eof() && *in.position() == '\t';
+ const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n');
+
+ if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end))
+ {
+ column.insertDefault();
+ return false;
+ }
+ else if (format_settings.null_as_default && !type->isNullable())
return SerializationNullable::deserializeTextEscapedImpl(column, in, format_settings, serialization);
serialization->deserializeTextEscaped(column, in, format_settings);
- return true;
-}
-
-bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
-{
+ return true;
+}
+
+bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
+{
for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column)
- {
- if (file_column == 0 && in.eof())
- {
- out << "<End of stream>\n";
- return false;
- }
-
+ {
+ if (file_column == 0 && in.eof())
+ {
+ out << "<End of stream>\n";
+ return false;
+ }
+
if (column_mapping->column_indexes_for_input_fields[file_column].has_value())
- {
- const auto & header = getPort().getHeader();
+ {
+ const auto & header = getPort().getHeader();
size_t col_idx = column_mapping->column_indexes_for_input_fields[file_column].value();
- if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx],
- out, file_column))
- return false;
- }
- else
- {
- static const String skipped_column_str = "<SKIPPED COLUMN>";
- static const DataTypePtr skipped_column_type = std::make_shared<DataTypeNothing>();
- static const MutableColumnPtr skipped_column = skipped_column_type->createColumn();
- if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column))
- return false;
- }
-
- /// Delimiters
+ if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx],
+ out, file_column))
+ return false;
+ }
+ else
+ {
+ static const String skipped_column_str = "<SKIPPED COLUMN>";
+ static const DataTypePtr skipped_column_type = std::make_shared<DataTypeNothing>();
+ static const MutableColumnPtr skipped_column = skipped_column_type->createColumn();
+ if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column))
+ return false;
+ }
+
+ /// Delimiters
if (file_column + 1 == column_mapping->column_indexes_for_input_fields.size())
- {
- if (!in.eof())
- {
- try
- {
- assertChar('\n', in);
- }
- catch (const DB::Exception &)
- {
- if (*in.position() == '\t')
- {
- out << "ERROR: Tab found where line feed is expected."
- " It's like your file has more columns than expected.\n"
+ {
+ if (!in.eof())
+ {
+ try
+ {
+ assertChar('\n', in);
+ }
+ catch (const DB::Exception &)
+ {
+ if (*in.position() == '\t')
+ {
+ out << "ERROR: Tab found where line feed is expected."
+ " It's like your file has more columns than expected.\n"
"And if your file has the right number of columns, maybe it has an unescaped tab in a value.\n";
- }
- else if (*in.position() == '\r')
- {
- out << "ERROR: Carriage return found where line feed is expected."
- " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n";
- }
- else
- {
- out << "ERROR: There is no line feed. ";
- verbosePrintString(in.position(), in.position() + 1, out);
- out << " found instead.\n";
- }
- return false;
- }
- }
- }
- else
- {
- try
- {
- assertChar('\t', in);
- }
- catch (const DB::Exception &)
- {
- if (*in.position() == '\n')
- {
- out << "ERROR: Line feed found where tab is expected."
- " It's like your file has less columns than expected.\n"
+ }
+ else if (*in.position() == '\r')
+ {
+ out << "ERROR: Carriage return found where line feed is expected."
+ " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n";
+ }
+ else
+ {
+ out << "ERROR: There is no line feed. ";
+ verbosePrintString(in.position(), in.position() + 1, out);
+ out << " found instead.\n";
+ }
+ return false;
+ }
+ }
+ }
+ else
+ {
+ try
+ {
+ assertChar('\t', in);
+ }
+ catch (const DB::Exception &)
+ {
+ if (*in.position() == '\n')
+ {
+ out << "ERROR: Line feed found where tab is expected."
+ " It's like your file has less columns than expected.\n"
"And if your file has the right number of columns, "
"maybe it has an unescaped backslash in value before tab, which causes the tab to be escaped.\n";
- }
- else if (*in.position() == '\r')
- {
- out << "ERROR: Carriage return found where tab is expected.\n";
- }
- else
- {
- out << "ERROR: There is no tab. ";
- verbosePrintString(in.position(), in.position() + 1, out);
- out << " found instead.\n";
- }
- return false;
- }
- }
- }
-
- return true;
-}
-
-void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
-{
+ }
+ else if (*in.position() == '\r')
+ {
+ out << "ERROR: Carriage return found where tab is expected.\n";
+ }
+ else
+ {
+ out << "ERROR: There is no tab. ";
+ verbosePrintString(in.position(), in.position() + 1, out);
+ out << " found instead.\n";
+ }
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
+{
const auto & index = column_mapping->column_indexes_for_input_fields[file_column];
if (index)
- {
+ {
bool can_be_parsed_as_null = removeLowCardinality(type)->isNullable();
// check null value for type is not nullable. don't cross buffer bound for simplicity, so maybe missing some case
@@ -361,42 +361,42 @@ void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, I
const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size();
readField(column, type, serializations[*index], is_last_file_column);
- }
- else
- {
+ }
+ else
+ {
NullOutput null_sink;
- readEscapedStringInto(null_sink, in);
- }
-}
-
-void TabSeparatedRowInputFormat::syncAfterError()
-{
- skipToUnescapedNextLineOrEOF(in);
-}
-
-void TabSeparatedRowInputFormat::resetParser()
-{
- RowInputFormatWithDiagnosticInfo::resetParser();
- const auto & sample = getPort().getHeader();
+ readEscapedStringInto(null_sink, in);
+ }
+}
+
+void TabSeparatedRowInputFormat::syncAfterError()
+{
+ skipToUnescapedNextLineOrEOF(in);
+}
+
+void TabSeparatedRowInputFormat::resetParser()
+{
+ RowInputFormatWithDiagnosticInfo::resetParser();
+ const auto & sample = getPort().getHeader();
column_mapping->read_columns.assign(sample.columns(), false);
column_mapping->column_indexes_for_input_fields.clear();
- columns_to_fill_with_default_values.clear();
-}
-
-void registerInputFormatProcessorTabSeparated(FormatFactory & factory)
-{
- for (const auto * name : {"TabSeparated", "TSV"})
- {
- factory.registerInputFormatProcessor(name, [](
- ReadBuffer & buf,
- const Block & sample,
- IRowInputFormat::Params params,
- const FormatSettings & settings)
- {
- return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, false, false, settings);
- });
- }
-
+ columns_to_fill_with_default_values.clear();
+}
+
+void registerInputFormatProcessorTabSeparated(FormatFactory & factory)
+{
+ for (const auto * name : {"TabSeparated", "TSV"})
+ {
+ factory.registerInputFormatProcessor(name, [](
+ ReadBuffer & buf,
+ const Block & sample,
+ IRowInputFormat::Params params,
+ const FormatSettings & settings)
+ {
+ return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, false, false, settings);
+ });
+ }
+
for (const auto * name : {"TabSeparatedRaw", "TSVRaw"})
{
factory.registerInputFormatProcessor(name, [](
@@ -409,74 +409,74 @@ void registerInputFormatProcessorTabSeparated(FormatFactory & factory)
});
}
- for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"})
- {
- factory.registerInputFormatProcessor(name, [](
- ReadBuffer & buf,
- const Block & sample,
- IRowInputFormat::Params params,
- const FormatSettings & settings)
- {
- return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, true, false, settings);
- });
- }
-
- for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"})
- {
- factory.registerInputFormatProcessor(name, [](
- ReadBuffer & buf,
- const Block & sample,
- IRowInputFormat::Params params,
- const FormatSettings & settings)
- {
- return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, true, true, settings);
- });
- }
-}
-
+ for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"})
+ {
+ factory.registerInputFormatProcessor(name, [](
+ ReadBuffer & buf,
+ const Block & sample,
+ IRowInputFormat::Params params,
+ const FormatSettings & settings)
+ {
+ return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, true, false, settings);
+ });
+ }
+
+ for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"})
+ {
+ factory.registerInputFormatProcessor(name, [](
+ ReadBuffer & buf,
+ const Block & sample,
+ IRowInputFormat::Params params,
+ const FormatSettings & settings)
+ {
+ return std::make_shared<TabSeparatedRowInputFormat>(sample, buf, params, true, true, settings);
+ });
+ }
+}
+
static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
-{
- bool need_more_data = true;
- char * pos = in.position();
+{
+ bool need_more_data = true;
+ char * pos = in.position();
size_t number_of_rows = 0;
-
- while (loadAtPosition(in, memory, pos) && need_more_data)
- {
- pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end());
-
+
+ while (loadAtPosition(in, memory, pos) && need_more_data)
+ {
+ pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end());
+
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
- continue;
+ continue;
else if (*pos == '\\')
- {
- ++pos;
- if (loadAtPosition(in, memory, pos))
- ++pos;
- }
- else if (*pos == '\n' || *pos == '\r')
- {
+ {
+ ++pos;
+ if (loadAtPosition(in, memory, pos))
+ ++pos;
+ }
+ else if (*pos == '\n' || *pos == '\r')
+ {
if (*pos == '\n')
++number_of_rows;
- if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
- need_more_data = false;
- ++pos;
- }
- }
-
- saveUpToPosition(in, memory, pos);
-
+ if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
+ need_more_data = false;
+ ++pos;
+ }
+ }
+
+ saveUpToPosition(in, memory, pos);
+
return {loadAtPosition(in, memory, pos), number_of_rows};
-}
-
-void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
-{
- // We can use the same segmentation engine for TSKV.
+}
+
+void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
+{
+ // We can use the same segmentation engine for TSKV.
for (const auto & name : {"TabSeparated", "TSV", "TSKV", "TabSeparatedWithNames", "TSVWithNames"})
- {
- factory.registerFileSegmentationEngine(name, &fileSegmentationEngineTabSeparatedImpl);
- }
-}
-
-}
+ {
+ factory.registerFileSegmentationEngine(name, &fileSegmentationEngineTabSeparatedImpl);
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
index 8127b5ceba..f56665da86 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
@@ -1,56 +1,56 @@
-#pragma once
-
-#include <Core/Block.h>
-#include <Formats/FormatSettings.h>
-#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
-
-
-namespace DB
-{
-
-/** A stream to input data in tsv format.
- */
-class TabSeparatedRowInputFormat : public RowInputFormatWithDiagnosticInfo
-{
-public:
- /** with_names - the first line is the header with the names of the columns
- * with_types - on the next line header with type names
- */
- TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
- bool with_names_, bool with_types_, const FormatSettings & format_settings_);
-
- String getName() const override { return "TabSeparatedRowInputFormat"; }
-
- bool readRow(MutableColumns & columns, RowReadExtension &) override;
- void readPrefix() override;
- bool allowSyncAfterError() const override { return true; }
- void syncAfterError() override;
-
- void resetParser() override;
-
+#pragma once
+
+#include <Core/Block.h>
+#include <Formats/FormatSettings.h>
+#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
+
+
+namespace DB
+{
+
+/** A stream to input data in tsv format.
+ */
+class TabSeparatedRowInputFormat : public RowInputFormatWithDiagnosticInfo
+{
+public:
+ /** with_names - the first line is the header with the names of the columns
+ * with_types - on the next line header with type names
+ */
+ TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
+ bool with_names_, bool with_types_, const FormatSettings & format_settings_);
+
+ String getName() const override { return "TabSeparatedRowInputFormat"; }
+
+ bool readRow(MutableColumns & columns, RowReadExtension &) override;
+ void readPrefix() override;
+ bool allowSyncAfterError() const override { return true; }
+ void syncAfterError() override;
+
+ void resetParser() override;
+
protected:
- bool with_names;
- bool with_types;
- const FormatSettings format_settings;
+ bool with_names;
+ bool with_types;
+ const FormatSettings format_settings;
virtual bool readField(IColumn & column, const DataTypePtr & type,
const SerializationPtr & serialization, bool is_last_file_column);
private:
- DataTypes data_types;
-
- using IndexesMap = std::unordered_map<String, size_t>;
- IndexesMap column_indexes_by_names;
-
- std::vector<size_t> columns_to_fill_with_default_values;
-
- void addInputColumn(const String & column_name);
- void setupAllColumnsByTableSchema();
- void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension);
-
- bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
- void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
- bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; }
-};
-
-}
+ DataTypes data_types;
+
+ using IndexesMap = std::unordered_map<String, size_t>;
+ IndexesMap column_indexes_by_names;
+
+ std::vector<size_t> columns_to_fill_with_default_values;
+
+ void addInputColumn(const String & column_name);
+ void setupAllColumnsByTableSchema();
+ void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension);
+
+ bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
+ void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
+ bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
index 4dddc9fff9..f617c2baa3 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
@@ -1,183 +1,183 @@
-#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
-#include <Formats/verbosePrintString.h>
-#include <IO/Operators.h>
-#include <IO/WriteBufferFromString.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-static String alignedName(const String & name, size_t max_length)
-{
- size_t spaces_count = max_length >= name.size() ? max_length - name.size() : 0;
- return name + ", " + std::string(spaces_count, ' ');
-}
-
-
-RowInputFormatWithDiagnosticInfo::RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_)
- : IRowInputFormat(header_, in_, params_)
-{
-}
-
-void RowInputFormatWithDiagnosticInfo::updateDiagnosticInfo()
-{
- ++row_num;
-
- bytes_read_at_start_of_buffer_on_prev_row = bytes_read_at_start_of_buffer_on_current_row;
- bytes_read_at_start_of_buffer_on_current_row = in.count() - in.offset();
-
- offset_of_prev_row = offset_of_current_row;
- offset_of_current_row = in.offset();
-}
-
-String RowInputFormatWithDiagnosticInfo::getDiagnosticInfo()
-{
- if (in.eof())
- return "Buffer has gone, cannot extract information about what has been parsed.";
-
- WriteBufferFromOwnString out;
-
- const auto & header = getPort().getHeader();
- MutableColumns columns = header.cloneEmptyColumns();
-
- /// It is possible to display detailed diagnostics only if the last and next to last rows are still in the read buffer.
- size_t bytes_read_at_start_of_buffer = in.count() - in.offset();
- if (bytes_read_at_start_of_buffer != bytes_read_at_start_of_buffer_on_prev_row)
- {
- out << "Could not print diagnostic info because two last rows aren't in buffer (rare case)\n";
- return out.str();
- }
-
- max_length_of_column_name = 0;
- for (size_t i = 0; i < header.columns(); ++i)
- if (header.safeGetByPosition(i).name.size() > max_length_of_column_name)
- max_length_of_column_name = header.safeGetByPosition(i).name.size();
-
- max_length_of_data_type_name = 0;
- for (size_t i = 0; i < header.columns(); ++i)
- if (header.safeGetByPosition(i).type->getName().size() > max_length_of_data_type_name)
- max_length_of_data_type_name = header.safeGetByPosition(i).type->getName().size();
-
- /// Roll back the cursor to the beginning of the previous or current row and parse all over again. But now we derive detailed information.
-
- if (offset_of_prev_row <= in.buffer().size())
- {
- in.position() = in.buffer().begin() + offset_of_prev_row;
-
- out << "\nRow " << (row_num - 1) << ":\n";
- if (!parseRowAndPrintDiagnosticInfo(columns, out))
- return out.str();
- }
- else
- {
- if (in.buffer().size() < offset_of_current_row)
- {
- out << "Could not print diagnostic info because parsing of data hasn't started.\n";
- return out.str();
- }
-
- in.position() = in.buffer().begin() + offset_of_current_row;
- }
-
- out << "\nRow " << row_num << ":\n";
- parseRowAndPrintDiagnosticInfo(columns, out);
- out << "\n";
-
- return out.str();
-}
-
-bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(const String & col_name,
- const DataTypePtr & type,
- IColumn & column,
- WriteBuffer & out,
- size_t file_column)
-{
- out << "Column " << file_column << ", " << std::string((file_column < 10 ? 2 : file_column < 100 ? 1 : 0), ' ')
- << "name: " << alignedName(col_name, max_length_of_column_name)
- << "type: " << alignedName(type->getName(), max_length_of_data_type_name);
-
- auto * prev_position = in.position();
- std::exception_ptr exception;
-
- try
- {
- tryDeserializeField(type, column, file_column);
- }
- catch (...)
- {
- exception = std::current_exception();
- }
- auto * curr_position = in.position();
-
- if (curr_position < prev_position)
- throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);
-
+#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
+#include <Formats/verbosePrintString.h>
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+static String alignedName(const String & name, size_t max_length)
+{
+ size_t spaces_count = max_length >= name.size() ? max_length - name.size() : 0;
+ return name + ", " + std::string(spaces_count, ' ');
+}
+
+
+RowInputFormatWithDiagnosticInfo::RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_)
+ : IRowInputFormat(header_, in_, params_)
+{
+}
+
+void RowInputFormatWithDiagnosticInfo::updateDiagnosticInfo()
+{
+ ++row_num;
+
+ bytes_read_at_start_of_buffer_on_prev_row = bytes_read_at_start_of_buffer_on_current_row;
+ bytes_read_at_start_of_buffer_on_current_row = in.count() - in.offset();
+
+ offset_of_prev_row = offset_of_current_row;
+ offset_of_current_row = in.offset();
+}
+
+String RowInputFormatWithDiagnosticInfo::getDiagnosticInfo()
+{
+ if (in.eof())
+ return "Buffer has gone, cannot extract information about what has been parsed.";
+
+ WriteBufferFromOwnString out;
+
+ const auto & header = getPort().getHeader();
+ MutableColumns columns = header.cloneEmptyColumns();
+
+ /// It is possible to display detailed diagnostics only if the last and next to last rows are still in the read buffer.
+ size_t bytes_read_at_start_of_buffer = in.count() - in.offset();
+ if (bytes_read_at_start_of_buffer != bytes_read_at_start_of_buffer_on_prev_row)
+ {
+ out << "Could not print diagnostic info because two last rows aren't in buffer (rare case)\n";
+ return out.str();
+ }
+
+ max_length_of_column_name = 0;
+ for (size_t i = 0; i < header.columns(); ++i)
+ if (header.safeGetByPosition(i).name.size() > max_length_of_column_name)
+ max_length_of_column_name = header.safeGetByPosition(i).name.size();
+
+ max_length_of_data_type_name = 0;
+ for (size_t i = 0; i < header.columns(); ++i)
+ if (header.safeGetByPosition(i).type->getName().size() > max_length_of_data_type_name)
+ max_length_of_data_type_name = header.safeGetByPosition(i).type->getName().size();
+
+ /// Roll back the cursor to the beginning of the previous or current row and parse all over again. But now we derive detailed information.
+
+ if (offset_of_prev_row <= in.buffer().size())
+ {
+ in.position() = in.buffer().begin() + offset_of_prev_row;
+
+ out << "\nRow " << (row_num - 1) << ":\n";
+ if (!parseRowAndPrintDiagnosticInfo(columns, out))
+ return out.str();
+ }
+ else
+ {
+ if (in.buffer().size() < offset_of_current_row)
+ {
+ out << "Could not print diagnostic info because parsing of data hasn't started.\n";
+ return out.str();
+ }
+
+ in.position() = in.buffer().begin() + offset_of_current_row;
+ }
+
+ out << "\nRow " << row_num << ":\n";
+ parseRowAndPrintDiagnosticInfo(columns, out);
+ out << "\n";
+
+ return out.str();
+}
+
+bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(const String & col_name,
+ const DataTypePtr & type,
+ IColumn & column,
+ WriteBuffer & out,
+ size_t file_column)
+{
+ out << "Column " << file_column << ", " << std::string((file_column < 10 ? 2 : file_column < 100 ? 1 : 0), ' ')
+ << "name: " << alignedName(col_name, max_length_of_column_name)
+ << "type: " << alignedName(type->getName(), max_length_of_data_type_name);
+
+ auto * prev_position = in.position();
+ std::exception_ptr exception;
+
+ try
+ {
+ tryDeserializeField(type, column, file_column);
+ }
+ catch (...)
+ {
+ exception = std::current_exception();
+ }
+ auto * curr_position = in.position();
+
+ if (curr_position < prev_position)
+ throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);
+
if (isNativeNumber(type) || isDate(type) || isDateTime(type) || isDateTime64(type))
- {
- /// An empty string instead of a value.
- if (curr_position == prev_position)
- {
- out << "ERROR: text ";
- verbosePrintString(prev_position, std::min(prev_position + 10, in.buffer().end()), out);
- out << " is not like " << type->getName() << "\n";
- return false;
- }
- }
-
- out << "parsed text: ";
- verbosePrintString(prev_position, curr_position, out);
-
- if (exception)
- {
- if (type->getName() == "DateTime")
- out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
- else if (type->getName() == "Date")
- out << "ERROR: Date must be in YYYY-MM-DD format.\n";
- else
- out << "ERROR\n";
+ {
+ /// An empty string instead of a value.
+ if (curr_position == prev_position)
+ {
+ out << "ERROR: text ";
+ verbosePrintString(prev_position, std::min(prev_position + 10, in.buffer().end()), out);
+ out << " is not like " << type->getName() << "\n";
+ return false;
+ }
+ }
+
+ out << "parsed text: ";
+ verbosePrintString(prev_position, curr_position, out);
+
+ if (exception)
+ {
+ if (type->getName() == "DateTime")
+ out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
+ else if (type->getName() == "Date")
+ out << "ERROR: Date must be in YYYY-MM-DD format.\n";
+ else
+ out << "ERROR\n";
// Print exception message
out << getExceptionMessage(exception, false) << '\n';
- return false;
- }
-
- out << "\n";
-
- if (type->haveMaximumSizeOfValue())
- {
- if (isGarbageAfterField(file_column, curr_position))
- {
- out << "ERROR: garbage after " << type->getName() << ": ";
- verbosePrintString(curr_position, std::min(curr_position + 10, in.buffer().end()), out);
- out << "\n";
-
- if (type->getName() == "DateTime")
- out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
- else if (type->getName() == "Date")
- out << "ERROR: Date must be in YYYY-MM-DD format.\n";
-
- return false;
- }
- }
-
- return true;
-}
-
-void RowInputFormatWithDiagnosticInfo::resetParser()
-{
- IRowInputFormat::resetParser();
- row_num = 0;
- bytes_read_at_start_of_buffer_on_current_row = 0;
- bytes_read_at_start_of_buffer_on_prev_row = 0;
- offset_of_current_row = std::numeric_limits<size_t>::max();
- offset_of_prev_row = std::numeric_limits<size_t>::max();
- max_length_of_column_name = 0;
- max_length_of_data_type_name = 0;
-}
-
-
-}
+ return false;
+ }
+
+ out << "\n";
+
+ if (type->haveMaximumSizeOfValue())
+ {
+ if (isGarbageAfterField(file_column, curr_position))
+ {
+ out << "ERROR: garbage after " << type->getName() << ": ";
+ verbosePrintString(curr_position, std::min(curr_position + 10, in.buffer().end()), out);
+ out << "\n";
+
+ if (type->getName() == "DateTime")
+ out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
+ else if (type->getName() == "Date")
+ out << "ERROR: Date must be in YYYY-MM-DD format.\n";
+
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void RowInputFormatWithDiagnosticInfo::resetParser()
+{
+ IRowInputFormat::resetParser();
+ row_num = 0;
+ bytes_read_at_start_of_buffer_on_current_row = 0;
+ bytes_read_at_start_of_buffer_on_prev_row = 0;
+ offset_of_current_row = std::numeric_limits<size_t>::max();
+ offset_of_prev_row = std::numeric_limits<size_t>::max();
+ max_length_of_column_name = 0;
+ max_length_of_data_type_name = 0;
+}
+
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h
index 5bad24cd48..3ad737dd63 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h
@@ -1,46 +1,46 @@
-#pragma once
-
-#include <Core/Block.h>
-#include <Processors/Formats/IRowInputFormat.h>
-#include <IO/ReadBuffer.h>
-#include <limits>
-
-
-namespace DB
-{
-
-class RowInputFormatWithDiagnosticInfo : public IRowInputFormat
-{
-public:
- RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_);
-
- String getDiagnosticInfo() override;
-
- void resetParser() override;
-
-protected:
- void updateDiagnosticInfo();
- bool deserializeFieldAndPrintDiagnosticInfo(const String & col_name, const DataTypePtr & type, IColumn & column,
- WriteBuffer & out, size_t file_column);
-
- virtual bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) = 0;
- virtual void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) = 0;
- virtual bool isGarbageAfterField(size_t after_input_pos_idx, ReadBuffer::Position pos) = 0;
-
- /// For convenient diagnostics in case of an error.
- size_t row_num = 0;
-
-private:
- /// How many bytes were read, not counting those still in the buffer.
- size_t bytes_read_at_start_of_buffer_on_current_row = 0;
- size_t bytes_read_at_start_of_buffer_on_prev_row = 0;
-
- size_t offset_of_current_row = std::numeric_limits<size_t>::max();
- size_t offset_of_prev_row = std::numeric_limits<size_t>::max();
-
- /// For alignment of diagnostic info.
- size_t max_length_of_column_name = 0;
- size_t max_length_of_data_type_name = 0;
-};
-
-}
+#pragma once
+
+#include <Core/Block.h>
+#include <Processors/Formats/IRowInputFormat.h>
+#include <IO/ReadBuffer.h>
+#include <limits>
+
+
+namespace DB
+{
+
+class RowInputFormatWithDiagnosticInfo : public IRowInputFormat
+{
+public:
+ RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_);
+
+ String getDiagnosticInfo() override;
+
+ void resetParser() override;
+
+protected:
+ void updateDiagnosticInfo();
+ bool deserializeFieldAndPrintDiagnosticInfo(const String & col_name, const DataTypePtr & type, IColumn & column,
+ WriteBuffer & out, size_t file_column);
+
+ virtual bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) = 0;
+ virtual void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) = 0;
+ virtual bool isGarbageAfterField(size_t after_input_pos_idx, ReadBuffer::Position pos) = 0;
+
+ /// For convenient diagnostics in case of an error.
+ size_t row_num = 0;
+
+private:
+ /// How many bytes were read, not counting those still in the buffer.
+ size_t bytes_read_at_start_of_buffer_on_current_row = 0;
+ size_t bytes_read_at_start_of_buffer_on_prev_row = 0;
+
+ size_t offset_of_current_row = std::numeric_limits<size_t>::max();
+ size_t offset_of_prev_row = std::numeric_limits<size_t>::max();
+
+ /// For alignment of diagnostic info.
+ size_t max_length_of_column_name = 0;
+ size_t max_length_of_data_type_name = 0;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp
index 64bdbe2410..a591c7d000 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.cpp
@@ -1,19 +1,19 @@
-#include <Processors/IAccumulatingTransform.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header)
- : IProcessor({std::move(input_header)}, {std::move(output_header)}),
- input(inputs.front()), output(outputs.front())
-{
-}
-
+#include <Processors/IAccumulatingTransform.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header)
+ : IProcessor({std::move(input_header)}, {std::move(output_header)}),
+ input(inputs.front()), output(outputs.front())
+{
+}
+
InputPort * IAccumulatingTransform::addTotalsPort()
{
if (inputs.size() > 1)
@@ -22,33 +22,33 @@ InputPort * IAccumulatingTransform::addTotalsPort()
return &inputs.emplace_back(getInputPort().getHeader(), this);
}
-IAccumulatingTransform::Status IAccumulatingTransform::prepare()
-{
- /// Check can output.
- if (output.isFinished())
- {
+IAccumulatingTransform::Status IAccumulatingTransform::prepare()
+{
+ /// Check can output.
+ if (output.isFinished())
+ {
for (auto & in : inputs)
in.close();
- return Status::Finished;
- }
-
- if (!output.canPush())
- {
- input.setNotNeeded();
- return Status::PortFull;
- }
-
- /// Output if has data.
- if (current_output_chunk)
- output.push(std::move(current_output_chunk));
-
- if (finished_generate)
- {
- output.finish();
- return Status::Finished;
- }
-
+ return Status::Finished;
+ }
+
+ if (!output.canPush())
+ {
+ input.setNotNeeded();
+ return Status::PortFull;
+ }
+
+ /// Output if has data.
+ if (current_output_chunk)
+ output.push(std::move(current_output_chunk));
+
+ if (finished_generate)
+ {
+ output.finish();
+ return Status::Finished;
+ }
+
if (input.isFinished())
finished_input = true;
@@ -71,48 +71,48 @@ IAccumulatingTransform::Status IAccumulatingTransform::prepare()
totals_input.close();
}
}
-
+
/// Generate output block.
- return Status::Ready;
- }
-
- /// Check can input.
- if (!has_input)
- {
- input.setNeeded();
- if (!input.hasData())
- return Status::NeedData;
-
- current_input_chunk = input.pull();
- has_input = true;
- }
-
- return Status::Ready;
-}
-
-void IAccumulatingTransform::work()
-{
- if (!finished_input)
- {
- consume(std::move(current_input_chunk));
- has_input = false;
- }
- else
- {
- current_output_chunk = generate();
- if (!current_output_chunk)
- finished_generate = true;
- }
-}
-
-void IAccumulatingTransform::setReadyChunk(Chunk chunk)
-{
- if (current_output_chunk)
- throw Exception("IAccumulatingTransform already has input. Cannot set another chunk. "
- "Probably, setReadyChunk method was called twice per consume().", ErrorCodes::LOGICAL_ERROR);
-
- current_output_chunk = std::move(chunk);
-}
-
-}
-
+ return Status::Ready;
+ }
+
+ /// Check can input.
+ if (!has_input)
+ {
+ input.setNeeded();
+ if (!input.hasData())
+ return Status::NeedData;
+
+ current_input_chunk = input.pull();
+ has_input = true;
+ }
+
+ return Status::Ready;
+}
+
+void IAccumulatingTransform::work()
+{
+ if (!finished_input)
+ {
+ consume(std::move(current_input_chunk));
+ has_input = false;
+ }
+ else
+ {
+ current_output_chunk = generate();
+ if (!current_output_chunk)
+ finished_generate = true;
+ }
+}
+
+void IAccumulatingTransform::setReadyChunk(Chunk chunk)
+{
+ if (current_output_chunk)
+ throw Exception("IAccumulatingTransform already has input. Cannot set another chunk. "
+ "Probably, setReadyChunk method was called twice per consume().", ErrorCodes::LOGICAL_ERROR);
+
+ current_output_chunk = std::move(chunk);
+}
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h
index b51753199c..ba9727d9ba 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IAccumulatingTransform.h
@@ -1,47 +1,47 @@
-#pragma once
-
-#include <Processors/IProcessor.h>
-
-
-namespace DB
-{
-
-/** Has one input and one output.
- * Pulls all blocks from input, and only then produce output.
- * Examples: ORDER BY, GROUP BY.
- */
-class IAccumulatingTransform : public IProcessor
-{
-protected:
- InputPort & input;
- OutputPort & output;
-
- Chunk current_input_chunk;
- Chunk current_output_chunk;
+#pragma once
+
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
+/** Has one input and one output.
+ * Pulls all blocks from input, and only then produce output.
+ * Examples: ORDER BY, GROUP BY.
+ */
+class IAccumulatingTransform : public IProcessor
+{
+protected:
+ InputPort & input;
+ OutputPort & output;
+
+ Chunk current_input_chunk;
+ Chunk current_output_chunk;
Chunk totals;
- bool has_input = false;
- bool finished_input = false;
- bool finished_generate = false;
-
- virtual void consume(Chunk chunk) = 0;
- virtual Chunk generate() = 0;
-
- /// This method can be called once per consume call. In case if some chunks are ready.
- void setReadyChunk(Chunk chunk);
- void finishConsume() { finished_input = true; }
-
-public:
- IAccumulatingTransform(Block input_header, Block output_header);
-
- Status prepare() override;
- void work() override;
-
+ bool has_input = false;
+ bool finished_input = false;
+ bool finished_generate = false;
+
+ virtual void consume(Chunk chunk) = 0;
+ virtual Chunk generate() = 0;
+
+ /// This method can be called once per consume call. In case if some chunks are ready.
+ void setReadyChunk(Chunk chunk);
+ void finishConsume() { finished_input = true; }
+
+public:
+ IAccumulatingTransform(Block input_header, Block output_header);
+
+ Status prepare() override;
+ void work() override;
+
/// Adds additional port for totals.
/// If added, totals will have been ready by the first generate() call (in totals chunk).
InputPort * addTotalsPort();
- InputPort & getInputPort() { return input; }
- OutputPort & getOutputPort() { return output; }
-};
-
-}
+ InputPort & getInputPort() { return input; }
+ OutputPort & getOutputPort() { return output; }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp
index a2533ee4c8..8f52bd6a4d 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/IProcessor.cpp
@@ -1,44 +1,44 @@
-#include <iostream>
-#include <Processors/IProcessor.h>
-
-
-namespace DB
-{
-
-void IProcessor::dump() const
-{
- std::cerr << getName() << "\n";
-
- std::cerr << "inputs:\n";
- for (const auto & port : inputs)
- std::cerr << "\t" << port.hasData() << " " << port.isFinished() << "\n";
-
- std::cerr << "outputs:\n";
- for (const auto & port : outputs)
- std::cerr << "\t" << port.hasData() << " " << port.isNeeded() << "\n";
-}
-
-
-std::string IProcessor::statusToName(Status status)
-{
- switch (status)
- {
- case Status::NeedData:
- return "NeedData";
- case Status::PortFull:
- return "PortFull";
- case Status::Finished:
- return "Finished";
- case Status::Ready:
- return "Ready";
- case Status::Async:
- return "Async";
- case Status::ExpandPipeline:
- return "ExpandPipeline";
- }
-
- __builtin_unreachable();
-}
-
-}
-
+#include <iostream>
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
+void IProcessor::dump() const
+{
+ std::cerr << getName() << "\n";
+
+ std::cerr << "inputs:\n";
+ for (const auto & port : inputs)
+ std::cerr << "\t" << port.hasData() << " " << port.isFinished() << "\n";
+
+ std::cerr << "outputs:\n";
+ for (const auto & port : outputs)
+ std::cerr << "\t" << port.hasData() << " " << port.isNeeded() << "\n";
+}
+
+
+std::string IProcessor::statusToName(Status status)
+{
+ switch (status)
+ {
+ case Status::NeedData:
+ return "NeedData";
+ case Status::PortFull:
+ return "PortFull";
+ case Status::Finished:
+ return "Finished";
+ case Status::Ready:
+ return "Ready";
+ case Status::Async:
+ return "Async";
+ case Status::ExpandPipeline:
+ return "ExpandPipeline";
+ }
+
+ __builtin_unreachable();
+}
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp
index ac8f2f8b7a..905b6a48b9 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.cpp
@@ -1,110 +1,110 @@
-#include <Processors/ISimpleTransform.h>
-
-
-namespace DB
-{
-
-ISimpleTransform::ISimpleTransform(Block input_header_, Block output_header_, bool skip_empty_chunks_)
- : IProcessor({std::move(input_header_)}, {std::move(output_header_)})
- , input(inputs.front())
- , output(outputs.front())
- , skip_empty_chunks(skip_empty_chunks_)
-{
-}
-
-ISimpleTransform::Status ISimpleTransform::prepare()
-{
- /// Check can output.
-
- if (output.isFinished())
- {
- input.close();
- return Status::Finished;
- }
-
- if (!output.canPush())
- {
- input.setNotNeeded();
- return Status::PortFull;
- }
-
- /// Output if has data.
- if (has_output)
- {
- output.pushData(std::move(output_data));
- has_output = false;
-
- if (!no_more_data_needed)
- return Status::PortFull;
-
- }
-
- /// Stop if don't need more data.
- if (no_more_data_needed)
- {
- input.close();
- output.finish();
- return Status::Finished;
- }
-
- /// Check can input.
- if (!has_input)
- {
- if (input.isFinished())
- {
- output.finish();
- return Status::Finished;
- }
-
- input.setNeeded();
-
- if (!input.hasData())
- return Status::NeedData;
-
- input_data = input.pullData(set_input_not_needed_after_read);
- has_input = true;
-
- if (input_data.exception)
- /// No more data needed. Exception will be thrown (or swallowed) later.
- input.setNotNeeded();
- }
-
- /// Now transform.
- return Status::Ready;
-}
-
-void ISimpleTransform::work()
-{
- if (input_data.exception)
- {
- /// Skip transform in case of exception.
- output_data = std::move(input_data);
- has_input = false;
- has_output = true;
- return;
- }
-
- try
- {
- transform(input_data.chunk, output_data.chunk);
- }
- catch (DB::Exception &)
- {
- output_data.exception = std::current_exception();
- has_output = true;
- has_input = false;
- return;
- }
-
- has_input = !needInputData();
-
- if (!skip_empty_chunks || output_data.chunk)
- has_output = true;
-
- if (has_output && !output_data.chunk && getOutputPort().getHeader())
- /// Support invariant that chunks must have the same number of columns as header.
- output_data.chunk = Chunk(getOutputPort().getHeader().cloneEmpty().getColumns(), 0);
-}
-
-}
-
+#include <Processors/ISimpleTransform.h>
+
+
+namespace DB
+{
+
+ISimpleTransform::ISimpleTransform(Block input_header_, Block output_header_, bool skip_empty_chunks_)
+ : IProcessor({std::move(input_header_)}, {std::move(output_header_)})
+ , input(inputs.front())
+ , output(outputs.front())
+ , skip_empty_chunks(skip_empty_chunks_)
+{
+}
+
+ISimpleTransform::Status ISimpleTransform::prepare()
+{
+ /// Check can output.
+
+ if (output.isFinished())
+ {
+ input.close();
+ return Status::Finished;
+ }
+
+ if (!output.canPush())
+ {
+ input.setNotNeeded();
+ return Status::PortFull;
+ }
+
+ /// Output if has data.
+ if (has_output)
+ {
+ output.pushData(std::move(output_data));
+ has_output = false;
+
+ if (!no_more_data_needed)
+ return Status::PortFull;
+
+ }
+
+ /// Stop if don't need more data.
+ if (no_more_data_needed)
+ {
+ input.close();
+ output.finish();
+ return Status::Finished;
+ }
+
+ /// Check can input.
+ if (!has_input)
+ {
+ if (input.isFinished())
+ {
+ output.finish();
+ return Status::Finished;
+ }
+
+ input.setNeeded();
+
+ if (!input.hasData())
+ return Status::NeedData;
+
+ input_data = input.pullData(set_input_not_needed_after_read);
+ has_input = true;
+
+ if (input_data.exception)
+ /// No more data needed. Exception will be thrown (or swallowed) later.
+ input.setNotNeeded();
+ }
+
+ /// Now transform.
+ return Status::Ready;
+}
+
+void ISimpleTransform::work()
+{
+ if (input_data.exception)
+ {
+ /// Skip transform in case of exception.
+ output_data = std::move(input_data);
+ has_input = false;
+ has_output = true;
+ return;
+ }
+
+ try
+ {
+ transform(input_data.chunk, output_data.chunk);
+ }
+ catch (DB::Exception &)
+ {
+ output_data.exception = std::current_exception();
+ has_output = true;
+ has_input = false;
+ return;
+ }
+
+ has_input = !needInputData();
+
+ if (!skip_empty_chunks || output_data.chunk)
+ has_output = true;
+
+ if (has_output && !output_data.chunk && getOutputPort().getHeader())
+ /// Support invariant that chunks must have the same number of columns as header.
+ output_data.chunk = Chunk(getOutputPort().getHeader().cloneEmpty().getColumns(), 0);
+}
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h
index ee92b574d7..20134b59dd 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISimpleTransform.h
@@ -1,61 +1,61 @@
-#pragma once
-
-#include <Processors/IProcessor.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int NOT_IMPLEMENTED;
-}
-
-/** Has one input and one output.
- * Simply pull a block from input, transform it, and push it to output.
- */
-class ISimpleTransform : public IProcessor
-{
-protected:
- InputPort & input;
- OutputPort & output;
-
- Port::Data input_data;
- Port::Data output_data;
- bool has_input = false;
- bool has_output = false;
- bool no_more_data_needed = false;
- const bool skip_empty_chunks;
-
- /// Set input port NotNeeded after chunk was pulled.
- /// Input port will become needed again only after data was transformed.
- /// This allows to escape caching chunks in input port, which can lead to uneven data distribution.
- bool set_input_not_needed_after_read = true;
-
- virtual void transform(Chunk &)
- {
- throw Exception("Method transform is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
- }
-
- virtual void transform(Chunk & input_chunk, Chunk & output_chunk)
- {
- transform(input_chunk);
- output_chunk.swap(input_chunk);
- }
-
- virtual bool needInputData() const { return true; }
- void stopReading() { no_more_data_needed = true; }
-
-public:
- ISimpleTransform(Block input_header_, Block output_header_, bool skip_empty_chunks_);
-
- Status prepare() override;
- void work() override;
-
- InputPort & getInputPort() { return input; }
- OutputPort & getOutputPort() { return output; }
-
- void setInputNotNeededAfterRead(bool value) { set_input_not_needed_after_read = value; }
-};
-
-}
+#pragma once
+
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+}
+
+/** Has one input and one output.
+ * Simply pull a block from input, transform it, and push it to output.
+ */
+class ISimpleTransform : public IProcessor
+{
+protected:
+ InputPort & input;
+ OutputPort & output;
+
+ Port::Data input_data;
+ Port::Data output_data;
+ bool has_input = false;
+ bool has_output = false;
+ bool no_more_data_needed = false;
+ const bool skip_empty_chunks;
+
+ /// Set input port NotNeeded after chunk was pulled.
+ /// Input port will become needed again only after data was transformed.
+ /// This allows to escape caching chunks in input port, which can lead to uneven data distribution.
+ bool set_input_not_needed_after_read = true;
+
+ virtual void transform(Chunk &)
+ {
+ throw Exception("Method transform is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
+ }
+
+ virtual void transform(Chunk & input_chunk, Chunk & output_chunk)
+ {
+ transform(input_chunk);
+ output_chunk.swap(input_chunk);
+ }
+
+ virtual bool needInputData() const { return true; }
+ void stopReading() { no_more_data_needed = true; }
+
+public:
+ ISimpleTransform(Block input_header_, Block output_header_, bool skip_empty_chunks_);
+
+ Status prepare() override;
+ void work() override;
+
+ InputPort & getInputPort() { return input; }
+ OutputPort & getOutputPort() { return output; }
+
+ void setInputNotNeededAfterRead(bool value) { set_input_not_needed_after_read = value; }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp
index 0de3ed37a6..4b5ef0f8df 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.cpp
@@ -1,41 +1,41 @@
-#include <Processors/ISink.h>
-
-
-namespace DB
-{
-
-ISink::ISink(Block header)
- : IProcessor({std::move(header)}, {}), input(inputs.front())
-{
-}
-
-ISink::Status ISink::prepare()
-{
+#include <Processors/ISink.h>
+
+
+namespace DB
+{
+
+ISink::ISink(Block header)
+ : IProcessor({std::move(header)}, {}), input(inputs.front())
+{
+}
+
+ISink::Status ISink::prepare()
+{
if (!was_on_start_called)
return Status::Ready;
- if (has_input)
- return Status::Ready;
-
- if (input.isFinished())
- {
+ if (has_input)
+ return Status::Ready;
+
+ if (input.isFinished())
+ {
if (!was_on_finish_called)
return Status::Ready;
- return Status::Finished;
- }
-
- input.setNeeded();
- if (!input.hasData())
- return Status::NeedData;
-
+ return Status::Finished;
+ }
+
+ input.setNeeded();
+ if (!input.hasData())
+ return Status::NeedData;
+
current_chunk = input.pull(true);
- has_input = true;
- return Status::Ready;
-}
-
-void ISink::work()
-{
+ has_input = true;
+ return Status::Ready;
+}
+
+void ISink::work()
+{
if (!was_on_start_called)
{
was_on_start_called = true;
@@ -51,6 +51,6 @@ void ISink::work()
was_on_finish_called = true;
onFinish();
}
-}
-
-}
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h
index f960def1cd..1406583e61 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ISink.h
@@ -1,31 +1,31 @@
-#pragma once
-
-#include <Processors/IProcessor.h>
-
-
-namespace DB
-{
-
-class ISink : public IProcessor
-{
-protected:
- InputPort & input;
- Chunk current_chunk;
- bool has_input = false;
+#pragma once
+
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
+class ISink : public IProcessor
+{
+protected:
+ InputPort & input;
+ Chunk current_chunk;
+ bool has_input = false;
bool was_on_start_called = false;
bool was_on_finish_called = false;
-
- virtual void consume(Chunk block) = 0;
+
+ virtual void consume(Chunk block) = 0;
virtual void onStart() {}
- virtual void onFinish() {}
-
-public:
- explicit ISink(Block header);
-
- Status prepare() override;
- void work() override;
-
- InputPort & getPort() { return input; }
-};
-
-}
+ virtual void onFinish() {}
+
+public:
+ explicit ISink(Block header);
+
+ Status prepare() override;
+ void work() override;
+
+ InputPort & getPort() { return input; }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp
index 36c58e1454..d1c06046b6 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.cpp
@@ -1,296 +1,296 @@
-#include <Processors/LimitTransform.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-LimitTransform::LimitTransform(
+#include <Processors/LimitTransform.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+LimitTransform::LimitTransform(
const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams,
- bool always_read_till_end_, bool with_ties_,
- SortDescription description_)
- : IProcessor(InputPorts(num_streams, header_), OutputPorts(num_streams, header_))
- , limit(limit_), offset(offset_)
- , always_read_till_end(always_read_till_end_)
- , with_ties(with_ties_), description(std::move(description_))
-{
- if (num_streams != 1 && with_ties)
- throw Exception("Cannot use LimitTransform with multiple ports and ties.", ErrorCodes::LOGICAL_ERROR);
-
- ports_data.resize(num_streams);
-
- size_t cur_stream = 0;
- for (auto & input : inputs)
- {
- ports_data[cur_stream].input_port = &input;
- ++cur_stream;
- }
-
- cur_stream = 0;
- for (auto & output : outputs)
- {
- ports_data[cur_stream].output_port = &output;
- ++cur_stream;
- }
-
- for (const auto & desc : description)
- {
- if (!desc.column_name.empty())
- sort_column_positions.push_back(header_.getPositionByName(desc.column_name));
- else
- sort_column_positions.push_back(desc.column_number);
- }
-}
-
+ bool always_read_till_end_, bool with_ties_,
+ SortDescription description_)
+ : IProcessor(InputPorts(num_streams, header_), OutputPorts(num_streams, header_))
+ , limit(limit_), offset(offset_)
+ , always_read_till_end(always_read_till_end_)
+ , with_ties(with_ties_), description(std::move(description_))
+{
+ if (num_streams != 1 && with_ties)
+ throw Exception("Cannot use LimitTransform with multiple ports and ties.", ErrorCodes::LOGICAL_ERROR);
+
+ ports_data.resize(num_streams);
+
+ size_t cur_stream = 0;
+ for (auto & input : inputs)
+ {
+ ports_data[cur_stream].input_port = &input;
+ ++cur_stream;
+ }
+
+ cur_stream = 0;
+ for (auto & output : outputs)
+ {
+ ports_data[cur_stream].output_port = &output;
+ ++cur_stream;
+ }
+
+ for (const auto & desc : description)
+ {
+ if (!desc.column_name.empty())
+ sort_column_positions.push_back(header_.getPositionByName(desc.column_name));
+ else
+ sort_column_positions.push_back(desc.column_number);
+ }
+}
+
Chunk LimitTransform::makeChunkWithPreviousRow(const Chunk & chunk, UInt64 row) const
-{
- assert(row < chunk.getNumRows());
- ColumnRawPtrs current_columns = extractSortColumns(chunk.getColumns());
- MutableColumns last_row_sort_columns;
- for (size_t i = 0; i < current_columns.size(); ++i)
- {
- last_row_sort_columns.emplace_back(current_columns[i]->cloneEmpty());
- last_row_sort_columns[i]->insertFrom(*current_columns[i], row);
- }
- return Chunk(std::move(last_row_sort_columns), 1);
-}
-
-
-IProcessor::Status LimitTransform::prepare(
- const PortNumbers & updated_input_ports,
- const PortNumbers & updated_output_ports)
-{
- bool has_full_port = false;
-
- auto process_pair = [&](size_t pos)
- {
- auto status = preparePair(ports_data[pos]);
-
- switch (status)
- {
- case IProcessor::Status::Finished:
- {
- if (!ports_data[pos].is_finished)
- {
- ports_data[pos].is_finished = true;
- ++num_finished_port_pairs;
- }
-
- return;
- }
- case IProcessor::Status::PortFull:
- {
- has_full_port = true;
- return;
- }
- case IProcessor::Status::NeedData:
- return;
- default:
- throw Exception(
- "Unexpected status for LimitTransform::preparePair : " + IProcessor::statusToName(status),
- ErrorCodes::LOGICAL_ERROR);
- }
- };
-
- for (auto pos : updated_input_ports)
- process_pair(pos);
-
- for (auto pos : updated_output_ports)
- process_pair(pos);
-
- /// All ports are finished. It may happen even before we reached the limit (has less data then limit).
- if (num_finished_port_pairs == ports_data.size())
- return Status::Finished;
-
+{
+ assert(row < chunk.getNumRows());
+ ColumnRawPtrs current_columns = extractSortColumns(chunk.getColumns());
+ MutableColumns last_row_sort_columns;
+ for (size_t i = 0; i < current_columns.size(); ++i)
+ {
+ last_row_sort_columns.emplace_back(current_columns[i]->cloneEmpty());
+ last_row_sort_columns[i]->insertFrom(*current_columns[i], row);
+ }
+ return Chunk(std::move(last_row_sort_columns), 1);
+}
+
+
+IProcessor::Status LimitTransform::prepare(
+ const PortNumbers & updated_input_ports,
+ const PortNumbers & updated_output_ports)
+{
+ bool has_full_port = false;
+
+ auto process_pair = [&](size_t pos)
+ {
+ auto status = preparePair(ports_data[pos]);
+
+ switch (status)
+ {
+ case IProcessor::Status::Finished:
+ {
+ if (!ports_data[pos].is_finished)
+ {
+ ports_data[pos].is_finished = true;
+ ++num_finished_port_pairs;
+ }
+
+ return;
+ }
+ case IProcessor::Status::PortFull:
+ {
+ has_full_port = true;
+ return;
+ }
+ case IProcessor::Status::NeedData:
+ return;
+ default:
+ throw Exception(
+ "Unexpected status for LimitTransform::preparePair : " + IProcessor::statusToName(status),
+ ErrorCodes::LOGICAL_ERROR);
+ }
+ };
+
+ for (auto pos : updated_input_ports)
+ process_pair(pos);
+
+ for (auto pos : updated_output_ports)
+ process_pair(pos);
+
+ /// All ports are finished. It may happen even before we reached the limit (has less data then limit).
+ if (num_finished_port_pairs == ports_data.size())
+ return Status::Finished;
+
bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset);
- /// If we reached limit for some port, then close others. Otherwise some sources may infinitely read data.
- /// Example: SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1
+ /// If we reached limit for some port, then close others. Otherwise some sources may infinitely read data.
+ /// Example: SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1
if ((!limit_is_unreachable && rows_read >= offset + limit)
&& !previous_row_chunk && !always_read_till_end)
- {
- for (auto & input : inputs)
- input.close();
-
- for (auto & output : outputs)
- output.finish();
-
- return Status::Finished;
- }
-
- if (has_full_port)
- return Status::PortFull;
-
- return Status::NeedData;
-}
-
-LimitTransform::Status LimitTransform::prepare()
-{
- if (ports_data.size() != 1)
- throw Exception("prepare without arguments is not supported for multi-port LimitTransform.",
- ErrorCodes::LOGICAL_ERROR);
-
- return prepare({0}, {0});
-}
-
-LimitTransform::Status LimitTransform::preparePair(PortsData & data)
-{
- auto & output = *data.output_port;
- auto & input = *data.input_port;
-
- /// Check can output.
- bool output_finished = false;
- if (output.isFinished())
- {
- output_finished = true;
- if (!always_read_till_end)
- {
- input.close();
- return Status::Finished;
- }
- }
-
- if (!output_finished && !output.canPush())
- {
- input.setNotNeeded();
- return Status::PortFull;
- }
-
+ {
+ for (auto & input : inputs)
+ input.close();
+
+ for (auto & output : outputs)
+ output.finish();
+
+ return Status::Finished;
+ }
+
+ if (has_full_port)
+ return Status::PortFull;
+
+ return Status::NeedData;
+}
+
+LimitTransform::Status LimitTransform::prepare()
+{
+ if (ports_data.size() != 1)
+ throw Exception("prepare without arguments is not supported for multi-port LimitTransform.",
+ ErrorCodes::LOGICAL_ERROR);
+
+ return prepare({0}, {0});
+}
+
+LimitTransform::Status LimitTransform::preparePair(PortsData & data)
+{
+ auto & output = *data.output_port;
+ auto & input = *data.input_port;
+
+ /// Check can output.
+ bool output_finished = false;
+ if (output.isFinished())
+ {
+ output_finished = true;
+ if (!always_read_till_end)
+ {
+ input.close();
+ return Status::Finished;
+ }
+ }
+
+ if (!output_finished && !output.canPush())
+ {
+ input.setNotNeeded();
+ return Status::PortFull;
+ }
+
bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset);
- /// Check if we are done with pushing.
+ /// Check if we are done with pushing.
bool is_limit_reached = !limit_is_unreachable && rows_read >= offset + limit && !previous_row_chunk;
- if (is_limit_reached)
- {
- if (!always_read_till_end)
- {
- output.finish();
- input.close();
- return Status::Finished;
- }
- }
-
- /// Check can input.
-
- if (input.isFinished())
- {
- output.finish();
- return Status::Finished;
- }
-
- input.setNeeded();
- if (!input.hasData())
- return Status::NeedData;
-
- data.current_chunk = input.pull(true);
-
- auto rows = data.current_chunk.getNumRows();
-
- if (rows_before_limit_at_least)
- rows_before_limit_at_least->add(rows);
-
- /// Skip block (for 'always_read_till_end' case).
- if (is_limit_reached || output_finished)
- {
- data.current_chunk.clear();
- if (input.isFinished())
- {
- output.finish();
- return Status::Finished;
- }
-
- /// Now, we pulled from input, and it must be empty.
- input.setNeeded();
- return Status::NeedData;
- }
-
- /// Process block.
-
- rows_read += rows;
-
- if (rows_read <= offset)
- {
- data.current_chunk.clear();
-
- if (input.isFinished())
- {
- output.finish();
- return Status::Finished;
- }
-
- /// Now, we pulled from input, and it must be empty.
- input.setNeeded();
- return Status::NeedData;
- }
-
+ if (is_limit_reached)
+ {
+ if (!always_read_till_end)
+ {
+ output.finish();
+ input.close();
+ return Status::Finished;
+ }
+ }
+
+ /// Check can input.
+
+ if (input.isFinished())
+ {
+ output.finish();
+ return Status::Finished;
+ }
+
+ input.setNeeded();
+ if (!input.hasData())
+ return Status::NeedData;
+
+ data.current_chunk = input.pull(true);
+
+ auto rows = data.current_chunk.getNumRows();
+
+ if (rows_before_limit_at_least)
+ rows_before_limit_at_least->add(rows);
+
+ /// Skip block (for 'always_read_till_end' case).
+ if (is_limit_reached || output_finished)
+ {
+ data.current_chunk.clear();
+ if (input.isFinished())
+ {
+ output.finish();
+ return Status::Finished;
+ }
+
+ /// Now, we pulled from input, and it must be empty.
+ input.setNeeded();
+ return Status::NeedData;
+ }
+
+ /// Process block.
+
+ rows_read += rows;
+
+ if (rows_read <= offset)
+ {
+ data.current_chunk.clear();
+
+ if (input.isFinished())
+ {
+ output.finish();
+ return Status::Finished;
+ }
+
+ /// Now, we pulled from input, and it must be empty.
+ input.setNeeded();
+ return Status::NeedData;
+ }
+
if (rows <= std::numeric_limits<UInt64>::max() - offset && rows_read >= offset + rows
&& !limit_is_unreachable && rows_read <= offset + limit)
- {
- /// Return the whole chunk.
-
- /// Save the last row of current chunk to check if next block begins with the same row (for WITH TIES).
- if (with_ties && rows_read == offset + limit)
- previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, data.current_chunk.getNumRows() - 1);
- }
- else
+ {
+ /// Return the whole chunk.
+
+ /// Save the last row of current chunk to check if next block begins with the same row (for WITH TIES).
+ if (with_ties && rows_read == offset + limit)
+ previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, data.current_chunk.getNumRows() - 1);
+ }
+ else
/// This function may be heavy to execute in prepare. But it happens no more than twice, and make code simpler.
- splitChunk(data);
-
- bool may_need_more_data_for_ties = previous_row_chunk || rows_read - rows <= offset + limit;
- /// No more data is needed.
+ splitChunk(data);
+
+ bool may_need_more_data_for_ties = previous_row_chunk || rows_read - rows <= offset + limit;
+ /// No more data is needed.
if (!always_read_till_end && !limit_is_unreachable && rows_read >= offset + limit && !may_need_more_data_for_ties)
- input.close();
-
- output.push(std::move(data.current_chunk));
-
- return Status::PortFull;
-}
-
-
-void LimitTransform::splitChunk(PortsData & data)
-{
- auto current_chunk_sort_columns = extractSortColumns(data.current_chunk.getColumns());
+ input.close();
+
+ output.push(std::move(data.current_chunk));
+
+ return Status::PortFull;
+}
+
+
+void LimitTransform::splitChunk(PortsData & data)
+{
+ auto current_chunk_sort_columns = extractSortColumns(data.current_chunk.getColumns());
UInt64 num_rows = data.current_chunk.getNumRows();
UInt64 num_columns = data.current_chunk.getNumColumns();
-
+
bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset);
if (previous_row_chunk && !limit_is_unreachable && rows_read >= offset + limit)
- {
- /// Scan until the first row, which is not equal to previous_row_chunk (for WITH TIES)
+ {
+ /// Scan until the first row, which is not equal to previous_row_chunk (for WITH TIES)
UInt64 current_row_num = 0;
- for (; current_row_num < num_rows; ++current_row_num)
- {
- if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num))
- break;
- }
-
- auto columns = data.current_chunk.detachColumns();
-
- if (current_row_num < num_rows)
- {
- previous_row_chunk = {};
+ for (; current_row_num < num_rows; ++current_row_num)
+ {
+ if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num))
+ break;
+ }
+
+ auto columns = data.current_chunk.detachColumns();
+
+ if (current_row_num < num_rows)
+ {
+ previous_row_chunk = {};
for (UInt64 i = 0; i < num_columns; ++i)
- columns[i] = columns[i]->cut(0, current_row_num);
- }
-
- data.current_chunk.setColumns(std::move(columns), current_row_num);
- return;
- }
-
- /// return a piece of the block
+ columns[i] = columns[i]->cut(0, current_row_num);
+ }
+
+ data.current_chunk.setColumns(std::move(columns), current_row_num);
+ return;
+ }
+
+ /// return a piece of the block
UInt64 start = 0;
-
+
/// ------------[....(...).]
/// <----------------------> rows_read
/// <----------> num_rows
/// <---------------> offset
/// <---> start
-
+
assert(offset < rows_read);
if (offset + num_rows > rows_read)
@@ -324,55 +324,55 @@ void LimitTransform::splitChunk(PortsData & data)
length = offset + limit - (rows_read - num_rows) - start;
}
- /// check if other rows in current block equals to last one in limit
- if (with_ties && length)
- {
+ /// check if other rows in current block equals to last one in limit
+ if (with_ties && length)
+ {
UInt64 current_row_num = start + length;
- previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, current_row_num - 1);
-
- for (; current_row_num < num_rows; ++current_row_num)
- {
- if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num))
- {
- previous_row_chunk = {};
- break;
- }
- }
-
- length = current_row_num - start;
- }
-
- if (length == num_rows)
- return;
-
- auto columns = data.current_chunk.detachColumns();
-
+ previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, current_row_num - 1);
+
+ for (; current_row_num < num_rows; ++current_row_num)
+ {
+ if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num))
+ {
+ previous_row_chunk = {};
+ break;
+ }
+ }
+
+ length = current_row_num - start;
+ }
+
+ if (length == num_rows)
+ return;
+
+ auto columns = data.current_chunk.detachColumns();
+
for (UInt64 i = 0; i < num_columns; ++i)
- columns[i] = columns[i]->cut(start, length);
-
- data.current_chunk.setColumns(std::move(columns), length);
-}
-
-ColumnRawPtrs LimitTransform::extractSortColumns(const Columns & columns) const
-{
- ColumnRawPtrs res;
- res.reserve(description.size());
- for (size_t pos : sort_column_positions)
- res.push_back(columns[pos].get());
-
- return res;
-}
-
+ columns[i] = columns[i]->cut(start, length);
+
+ data.current_chunk.setColumns(std::move(columns), length);
+}
+
+ColumnRawPtrs LimitTransform::extractSortColumns(const Columns & columns) const
+{
+ ColumnRawPtrs res;
+ res.reserve(description.size());
+ for (size_t pos : sort_column_positions)
+ res.push_back(columns[pos].get());
+
+ return res;
+}
+
bool LimitTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const
-{
- assert(current_chunk_sort_columns.size() == previous_row_chunk.getNumColumns());
- size_t size = current_chunk_sort_columns.size();
- const auto & previous_row_sort_columns = previous_row_chunk.getColumns();
- for (size_t i = 0; i < size; ++i)
- if (0 != current_chunk_sort_columns[i]->compareAt(current_chunk_row_num, 0, *previous_row_sort_columns[i], 1))
- return false;
- return true;
-}
-
-}
-
+{
+ assert(current_chunk_sort_columns.size() == previous_row_chunk.getNumColumns());
+ size_t size = current_chunk_sort_columns.size();
+ const auto & previous_row_sort_columns = previous_row_chunk.getColumns();
+ for (size_t i = 0; i < size; ++i)
+ if (0 != current_chunk_sort_columns[i]->compareAt(current_chunk_row_num, 0, *previous_row_sort_columns[i], 1))
+ return false;
+ return true;
+}
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h
index 8865eab732..46ffc891c2 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/LimitTransform.h
@@ -1,72 +1,72 @@
-#pragma once
-
-#include <Processors/IProcessor.h>
-#include <Processors/RowsBeforeLimitCounter.h>
-#include <Core/SortDescription.h>
-
-namespace DB
-{
-
-/// Implementation for LIMIT N OFFSET M
-/// This processor support multiple inputs and outputs (the same number).
-/// Each pair of input and output port works independently.
-/// The reason to have multiple ports is to be able to stop all sources when limit is reached, in a query like:
-/// SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1
-///
-/// always_read_till_end - read all data from input ports even if limit was reached.
-/// with_ties, description - implementation of LIMIT WITH TIES. It works only for single port.
-class LimitTransform : public IProcessor
-{
-private:
+#pragma once
+
+#include <Processors/IProcessor.h>
+#include <Processors/RowsBeforeLimitCounter.h>
+#include <Core/SortDescription.h>
+
+namespace DB
+{
+
+/// Implementation for LIMIT N OFFSET M
+/// This processor support multiple inputs and outputs (the same number).
+/// Each pair of input and output port works independently.
+/// The reason to have multiple ports is to be able to stop all sources when limit is reached, in a query like:
+/// SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1
+///
+/// always_read_till_end - read all data from input ports even if limit was reached.
+/// with_ties, description - implementation of LIMIT WITH TIES. It works only for single port.
+class LimitTransform : public IProcessor
+{
+private:
UInt64 limit;
UInt64 offset;
-
- bool always_read_till_end;
-
- bool with_ties;
- const SortDescription description;
-
- Chunk previous_row_chunk; /// for WITH TIES, contains only sort columns
- std::vector<size_t> sort_column_positions;
-
+
+ bool always_read_till_end;
+
+ bool with_ties;
+ const SortDescription description;
+
+ Chunk previous_row_chunk; /// for WITH TIES, contains only sort columns
+ std::vector<size_t> sort_column_positions;
+
UInt64 rows_read = 0; /// including the last read block
- RowsBeforeLimitCounterPtr rows_before_limit_at_least;
-
- /// State of port's pair.
- /// Chunks from different port pairs are not mixed for better cache locality.
- struct PortsData
- {
- Chunk current_chunk;
-
- InputPort * input_port = nullptr;
- OutputPort * output_port = nullptr;
- bool is_finished = false;
- };
-
- std::vector<PortsData> ports_data;
- size_t num_finished_port_pairs = 0;
-
+ RowsBeforeLimitCounterPtr rows_before_limit_at_least;
+
+ /// State of port's pair.
+ /// Chunks from different port pairs are not mixed for better cache locality.
+ struct PortsData
+ {
+ Chunk current_chunk;
+
+ InputPort * input_port = nullptr;
+ OutputPort * output_port = nullptr;
+ bool is_finished = false;
+ };
+
+ std::vector<PortsData> ports_data;
+ size_t num_finished_port_pairs = 0;
+
Chunk makeChunkWithPreviousRow(const Chunk & current_chunk, UInt64 row_num) const;
- ColumnRawPtrs extractSortColumns(const Columns & columns) const;
+ ColumnRawPtrs extractSortColumns(const Columns & columns) const;
bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const;
-
-public:
- LimitTransform(
+
+public:
+ LimitTransform(
const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams = 1,
- bool always_read_till_end_ = false, bool with_ties_ = false,
- SortDescription description_ = {});
-
- String getName() const override { return "Limit"; }
-
- Status prepare(const PortNumbers & /*updated_input_ports*/, const PortNumbers & /*updated_output_ports*/) override;
- Status prepare() override; /// Compatibility for TreeExecutor.
- Status preparePair(PortsData & data);
- void splitChunk(PortsData & data);
-
- InputPort & getInputPort() { return inputs.front(); }
- OutputPort & getOutputPort() { return outputs.front(); }
-
- void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_at_least.swap(counter); }
-};
-
-}
+ bool always_read_till_end_ = false, bool with_ties_ = false,
+ SortDescription description_ = {});
+
+ String getName() const override { return "Limit"; }
+
+ Status prepare(const PortNumbers & /*updated_input_ports*/, const PortNumbers & /*updated_output_ports*/) override;
+ Status prepare() override; /// Compatibility for TreeExecutor.
+ Status preparePair(PortsData & data);
+ void splitChunk(PortsData & data);
+
+ InputPort & getInputPort() { return inputs.front(); }
+ OutputPort & getOutputPort() { return outputs.front(); }
+
+ void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_at_least.swap(counter); }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp
index e0da79f148..3ad0687fbd 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp
@@ -1,6 +1,6 @@
-#include <Processors/Pipe.h>
-#include <IO/WriteHelpers.h>
-#include <Processors/Sources/SourceFromInputStream.h>
+#include <Processors/Pipe.h>
+#include <IO/WriteHelpers.h>
+#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/ResizeProcessor.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/LimitTransform.h>
@@ -10,17 +10,17 @@
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Sources/NullSource.h>
#include <Columns/ColumnConst.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
static void checkSource(const IProcessor & source)
-{
+{
if (!source.getInputs().empty())
throw Exception("Source for pipe shouldn't have any input, but " + source.getName() + " has " +
toString(source.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
@@ -32,10 +32,10 @@ static void checkSource(const IProcessor & source)
if (source.getOutputs().size() > 1)
throw Exception("Source for pipe should have single output, but " + source.getName() + " has " +
toString(source.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
-}
-
+}
+
static OutputPort * uniteExtremes(const OutputPortRawPtrs & ports, const Block & header, Processors & processors)
-{
+{
if (ports.empty())
return nullptr;
@@ -66,10 +66,10 @@ static OutputPort * uniteExtremes(const OutputPortRawPtrs & ports, const Block &
processors.emplace_back(std::move(sink));
return extremes_port;
-}
-
+}
+
static OutputPort * uniteTotals(const OutputPortRawPtrs & ports, const Block & header, Processors & processors)
-{
+{
if (ports.empty())
return nullptr;
@@ -96,10 +96,10 @@ static OutputPort * uniteTotals(const OutputPortRawPtrs & ports, const Block & h
processors.emplace_back(std::move(limit));
return totals_port;
-}
-
+}
+
Pipe::Holder & Pipe::Holder::operator=(Holder && rhs)
-{
+{
table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end());
storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end());
interpreter_context.insert(interpreter_context.end(),
@@ -110,18 +110,18 @@ Pipe::Holder & Pipe::Holder::operator=(Holder && rhs)
query_id_holder = std::move(rhs.query_id_holder);
return *this;
-}
-
+}
+
Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes)
-{
+{
if (!source->getInputs().empty())
throw Exception("Source for pipe shouldn't have any input, but " + source->getName() + " has " +
toString(source->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
-
+
if (!output)
throw Exception("Cannot create Pipe from source because specified output port is nullptr",
- ErrorCodes::LOGICAL_ERROR);
-
+ ErrorCodes::LOGICAL_ERROR);
+
if (output == totals || output == extremes || (totals && totals == extremes))
throw Exception("Cannot create Pipe from source because some of specified ports are the same",
ErrorCodes::LOGICAL_ERROR);
@@ -163,30 +163,30 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output
output_ports.push_back(output);
processors.emplace_back(std::move(source));
max_parallel_streams = 1;
-}
-
-Pipe::Pipe(ProcessorPtr source)
-{
- if (auto * source_from_input_stream = typeid_cast<SourceFromInputStream *>(source.get()))
- {
+}
+
+Pipe::Pipe(ProcessorPtr source)
+{
+ if (auto * source_from_input_stream = typeid_cast<SourceFromInputStream *>(source.get()))
+ {
/// Special case for SourceFromInputStream. Will remove it later.
totals_port = source_from_input_stream->getTotalsPort();
extremes_port = source_from_input_stream->getExtremesPort();
- }
- else if (source->getOutputs().size() != 1)
- checkSource(*source);
-
+ }
+ else if (source->getOutputs().size() != 1)
+ checkSource(*source);
+
if (collected_processors)
collected_processors->emplace_back(source);
-
+
output_ports.push_back(&source->getOutputs().front());
header = output_ports.front()->getHeader();
- processors.emplace_back(std::move(source));
- max_parallel_streams = 1;
-}
-
+ processors.emplace_back(std::move(source));
+ max_parallel_streams = 1;
+}
+
Pipe::Pipe(Processors processors_) : processors(std::move(processors_))
-{
+{
/// Create hash table with processors.
std::unordered_set<const IProcessor *> set;
for (const auto & processor : processors)
@@ -236,19 +236,19 @@ Pipe::Pipe(Processors processors_) : processors(std::move(processors_))
if (collected_processors)
for (const auto & processor : processors)
collected_processors->emplace_back(processor);
-}
-
+}
+
static Pipes removeEmptyPipes(Pipes pipes)
-{
+{
Pipes res;
res.reserve(pipes.size());
-
+
for (auto & pipe : pipes)
{
if (!pipe.empty())
res.emplace_back(std::move(pipe));
}
-
+
return res;
}
@@ -302,7 +302,7 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow
{
Pipe res;
- for (auto & pipe : pipes)
+ for (auto & pipe : pipes)
res.holder = std::move(pipe.holder); /// see move assignment for Pipe::Holder.
pipes = removeEmptyPipes(std::move(pipes));
@@ -311,11 +311,11 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow
return res;
if (pipes.size() == 1)
- {
+ {
pipes[0].holder = std::move(res.holder);
return std::move(pipes[0]);
}
-
+
OutputPortRawPtrs totals;
OutputPortRawPtrs extremes;
res.collected_processors = collected_processors;
@@ -336,8 +336,8 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow
if (pipe.extremes_port)
extremes.emplace_back(pipe.extremes_port);
- }
-
+ }
+
size_t num_processors = res.processors.size();
res.totals_port = uniteTotals(totals, res.header, res.processors);
@@ -350,10 +350,10 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow
}
return res;
-}
-
+}
+
void Pipe::addSource(ProcessorPtr source)
-{
+{
checkSource(*source);
const auto & source_header = source->getOutputs().front().getHeader();
@@ -369,10 +369,10 @@ void Pipe::addSource(ProcessorPtr source)
processors.emplace_back(std::move(source));
max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
-}
-
+}
+
void Pipe::addTotalsSource(ProcessorPtr source)
-{
+{
if (output_ports.empty())
throw Exception("Cannot add totals source to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
@@ -389,10 +389,10 @@ void Pipe::addTotalsSource(ProcessorPtr source)
totals_port = &source->getOutputs().front();
processors.emplace_back(std::move(source));
-}
-
+}
+
void Pipe::addExtremesSource(ProcessorPtr source)
-{
+{
if (output_ports.empty())
throw Exception("Cannot add extremes source to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
@@ -409,10 +409,10 @@ void Pipe::addExtremesSource(ProcessorPtr source)
extremes_port = &source->getOutputs().front();
processors.emplace_back(std::move(source));
-}
-
+}
+
static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors)
-{
+{
if (port == nullptr)
return;
@@ -467,10 +467,10 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort
size_t next_output = 0;
for (auto & input : inputs)
- {
+ {
connect(*output_ports[next_output], input);
++next_output;
- }
+ }
auto & outputs = transform->getOutputs();
@@ -519,8 +519,8 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort
processors.emplace_back(std::move(transform));
max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
-}
-
+}
+
void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes)
{
if (output_ports.empty())
@@ -607,14 +607,14 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort *
}
void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter)
-{
+{
if (output_ports.empty())
throw Exception("Cannot add simple transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
Block new_header;
auto add_transform = [&](OutputPort *& port, StreamType stream_type)
- {
+ {
if (!port)
return;
@@ -784,7 +784,7 @@ void Pipe::transform(const Transformer & transformer)
port->getHeader().dumpStructure() + ") is not connected", ErrorCodes::LOGICAL_ERROR);
set.emplace(&port->getProcessor());
- }
+ }
output_ports.clear();
@@ -842,17 +842,17 @@ void Pipe::transform(const Transformer & transformer)
processors.insert(processors.end(), new_processors.begin(), new_processors.end());
max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
-}
-
+}
+
void Pipe::setLimits(const StreamLocalLimits & limits)
-{
- for (auto & processor : processors)
- {
+{
+ for (auto & processor : processors)
+ {
if (auto * source_with_progress = dynamic_cast<ISourceWithProgress *>(processor.get()))
source_with_progress->setLimits(limits);
- }
-}
-
+ }
+}
+
void Pipe::setLeafLimits(const SizeLimits & leaf_limits)
{
for (auto & processor : processors)
@@ -863,12 +863,12 @@ void Pipe::setLeafLimits(const SizeLimits & leaf_limits)
}
void Pipe::setQuota(const std::shared_ptr<const EnabledQuota> & quota)
-{
- for (auto & processor : processors)
- {
+{
+ for (auto & processor : processors)
+ {
if (auto * source_with_progress = dynamic_cast<ISourceWithProgress *>(processor.get()))
source_with_progress->setQuota(quota);
- }
-}
-
-}
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp
index 0a6026b27f..02add2e09e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Port.cpp
@@ -1,27 +1,27 @@
-#include <Processors/Port.h>
-#include <Processors/IProcessor.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-void connect(OutputPort & output, InputPort & input)
-{
- if (input.state || output.state)
- throw Exception("Port is already connected", ErrorCodes::LOGICAL_ERROR);
-
- auto out_name = output.getProcessor().getName();
- auto in_name = input.getProcessor().getName();
-
+#include <Processors/Port.h>
+#include <Processors/IProcessor.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+void connect(OutputPort & output, InputPort & input)
+{
+ if (input.state || output.state)
+ throw Exception("Port is already connected", ErrorCodes::LOGICAL_ERROR);
+
+ auto out_name = output.getProcessor().getName();
+ auto in_name = input.getProcessor().getName();
+
assertCompatibleHeader(output.getHeader(), input.getHeader(), " function connect between " + out_name + " and " + in_name);
-
- input.output_port = &output;
- output.input_port = &input;
- input.state = std::make_shared<Port::State>();
- output.state = input.state;
-}
-
-}
+
+ input.output_port = &output;
+ output.input_port = &input;
+ input.state = std::make_shared<Port::State>();
+ output.state = input.state;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp
index d652a34215..d51772cafc 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.cpp
@@ -1,421 +1,421 @@
-#include <Processors/ResizeProcessor.h>
-#include <iostream>
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-ResizeProcessor::Status ResizeProcessor::prepare()
-{
- bool is_first_output = true;
- auto output_end = current_output;
-
- bool all_outs_full_or_unneeded = true;
- bool all_outs_finished = true;
-
- bool is_first_input = true;
- auto input_end = current_input;
-
- bool all_inputs_finished = true;
-
- auto is_end_input = [&]() { return !is_first_input && current_input == input_end; };
- auto is_end_output = [&]() { return !is_first_output && current_output == output_end; };
-
- auto inc_current_input = [&]()
- {
- is_first_input = false;
- ++current_input;
-
- if (current_input == inputs.end())
- current_input = inputs.begin();
- };
-
- auto inc_current_output = [&]()
- {
- is_first_output = false;
- ++current_output;
-
- if (current_output == outputs.end())
- current_output = outputs.begin();
- };
-
- /// Find next output where can push.
- auto get_next_out = [&, this]() -> OutputPorts::iterator
- {
- while (!is_end_output())
- {
- if (!current_output->isFinished())
- {
- all_outs_finished = false;
-
- if (current_output->canPush())
- {
- all_outs_full_or_unneeded = false;
- auto res_output = current_output;
- inc_current_output();
- return res_output;
- }
- }
-
- inc_current_output();
- }
-
- return outputs.end();
- };
-
- /// Find next input from where can pull.
- auto get_next_input = [&, this]() -> InputPorts::iterator
- {
- while (!is_end_input())
- {
- if (!current_input->isFinished())
- {
- all_inputs_finished = false;
-
- current_input->setNeeded();
- if (current_input->hasData())
- {
- auto res_input = current_input;
- inc_current_input();
- return res_input;
- }
- }
-
- inc_current_input();
- }
-
- return inputs.end();
- };
-
- auto get_status_if_no_outputs = [&]() -> Status
- {
- if (all_outs_finished)
- {
- for (auto & in : inputs)
- in.close();
-
- return Status::Finished;
- }
-
- if (all_outs_full_or_unneeded)
- {
- for (auto & in : inputs)
- in.setNotNeeded();
-
- return Status::PortFull;
- }
-
- /// Now, we pushed to output, and it must be full.
- return Status::PortFull;
- };
-
- auto get_status_if_no_inputs = [&]() -> Status
- {
- if (all_inputs_finished)
- {
- for (auto & out : outputs)
- out.finish();
-
- return Status::Finished;
- }
-
- return Status::NeedData;
- };
-
- /// Set all inputs needed in order to evenly process them.
- /// Otherwise, in case num_outputs < num_inputs and chunks are consumed faster than produced,
- /// some inputs can be skipped.
-// auto set_all_unprocessed_inputs_needed = [&]()
-// {
-// for (; cur_input != inputs.end(); ++cur_input)
-// if (!cur_input->isFinished())
-// cur_input->setNeeded();
-// };
-
- while (!is_end_input() && !is_end_output())
- {
- auto output = get_next_out();
- auto input = get_next_input();
-
- if (output == outputs.end())
- return get_status_if_no_outputs();
-
-
- if (input == inputs.end())
- return get_status_if_no_inputs();
-
- output->push(input->pull());
- }
-
- if (is_end_input())
- return get_status_if_no_outputs();
-
- /// cur_input == inputs_end()
- return get_status_if_no_inputs();
-}
-
-IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
-{
- if (!initialized)
- {
- initialized = true;
-
- for (auto & input : inputs)
- {
- input.setNeeded();
- input_ports.push_back({.port = &input, .status = InputStatus::NotActive});
- }
-
- for (auto & output : outputs)
- output_ports.push_back({.port = &output, .status = OutputStatus::NotActive});
- }
-
- for (const auto & output_number : updated_outputs)
- {
- auto & output = output_ports[output_number];
- if (output.port->isFinished())
- {
- if (output.status != OutputStatus::Finished)
- {
- ++num_finished_outputs;
- output.status = OutputStatus::Finished;
- }
-
- continue;
- }
-
- if (output.port->canPush())
- {
- if (output.status != OutputStatus::NeedData)
- {
- output.status = OutputStatus::NeedData;
- waiting_outputs.push(output_number);
- }
- }
- }
-
- if (num_finished_outputs == outputs.size())
- {
- for (auto & input : inputs)
- input.close();
-
- return Status::Finished;
- }
-
- for (const auto & input_number : updated_inputs)
- {
- auto & input = input_ports[input_number];
- if (input.port->isFinished())
- {
- if (input.status != InputStatus::Finished)
- {
- input.status = InputStatus::Finished;
- ++num_finished_inputs;
- }
- continue;
- }
-
- if (input.port->hasData())
- {
- if (input.status != InputStatus::HasData)
- {
- input.status = InputStatus::HasData;
- inputs_with_data.push(input_number);
- }
- }
- }
-
- while (!waiting_outputs.empty() && !inputs_with_data.empty())
- {
- auto & waiting_output = output_ports[waiting_outputs.front()];
- waiting_outputs.pop();
-
- auto & input_with_data = input_ports[inputs_with_data.front()];
- inputs_with_data.pop();
-
- waiting_output.port->pushData(input_with_data.port->pullData());
- input_with_data.status = InputStatus::NotActive;
- waiting_output.status = OutputStatus::NotActive;
-
- if (input_with_data.port->isFinished())
- {
- input_with_data.status = InputStatus::Finished;
- ++num_finished_inputs;
- }
- }
-
- if (num_finished_inputs == inputs.size())
- {
- for (auto & output : outputs)
- output.finish();
-
- return Status::Finished;
- }
-
- if (!waiting_outputs.empty())
- return Status::NeedData;
-
- return Status::PortFull;
-}
-
-IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
-{
- if (!initialized)
- {
- initialized = true;
-
- for (auto & input : inputs)
- input_ports.push_back({.port = &input, .status = InputStatus::NotActive, .waiting_output = -1});
-
- for (UInt64 i = 0; i < input_ports.size(); ++i)
- disabled_input_ports.push(i);
-
- for (auto & output : outputs)
- output_ports.push_back({.port = &output, .status = OutputStatus::NotActive});
- }
-
- for (const auto & output_number : updated_outputs)
- {
- auto & output = output_ports[output_number];
- if (output.port->isFinished())
- {
- if (output.status != OutputStatus::Finished)
- {
- ++num_finished_outputs;
- output.status = OutputStatus::Finished;
- }
-
- continue;
- }
-
- if (output.port->canPush())
- {
- if (output.status != OutputStatus::NeedData)
- {
- output.status = OutputStatus::NeedData;
- waiting_outputs.push(output_number);
- }
- }
- }
-
- if (num_finished_outputs == outputs.size())
- {
- for (auto & input : inputs)
- input.close();
-
- return Status::Finished;
- }
-
- std::queue<UInt64> inputs_with_data;
-
- for (const auto & input_number : updated_inputs)
- {
- auto & input = input_ports[input_number];
- if (input.port->isFinished())
- {
- if (input.status != InputStatus::Finished)
- {
- input.status = InputStatus::Finished;
- ++num_finished_inputs;
-
- waiting_outputs.push(input.waiting_output);
- }
- continue;
- }
-
- if (input.port->hasData())
- {
- if (input.status != InputStatus::NotActive)
- {
- input.status = InputStatus::NotActive;
- inputs_with_data.push(input_number);
- }
- }
- }
-
- while (!inputs_with_data.empty())
- {
- auto input_number = inputs_with_data.front();
- auto & input_with_data = input_ports[input_number];
- inputs_with_data.pop();
-
- if (input_with_data.waiting_output == -1)
- throw Exception("No associated output for input with data.", ErrorCodes::LOGICAL_ERROR);
-
- auto & waiting_output = output_ports[input_with_data.waiting_output];
-
- if (waiting_output.status == OutputStatus::NotActive)
- throw Exception("Invalid status NotActive for associated output.", ErrorCodes::LOGICAL_ERROR);
-
- if (waiting_output.status != OutputStatus::Finished)
- {
- waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_needed = */ true));
- waiting_output.status = OutputStatus::NotActive;
- }
- else
- abandoned_chunks.emplace_back(input_with_data.port->pullData(/* set_not_needed = */ true));
-
- if (input_with_data.port->isFinished())
- {
- input_with_data.status = InputStatus::Finished;
- ++num_finished_inputs;
- }
- else
- disabled_input_ports.push(input_number);
- }
-
- if (num_finished_inputs == inputs.size())
- {
- for (auto & output : outputs)
- output.finish();
-
- return Status::Finished;
- }
-
- /// Process abandoned chunks if any.
- while (!abandoned_chunks.empty() && !waiting_outputs.empty())
- {
- auto & waiting_output = output_ports[waiting_outputs.front()];
- waiting_outputs.pop();
-
- waiting_output.port->pushData(std::move(abandoned_chunks.back()));
- abandoned_chunks.pop_back();
-
- waiting_output.status = OutputStatus::NotActive;
- }
-
- /// Enable more inputs if needed.
- while (!disabled_input_ports.empty() && !waiting_outputs.empty())
- {
- auto & input = input_ports[disabled_input_ports.front()];
- disabled_input_ports.pop();
-
- input.port->setNeeded();
- input.status = InputStatus::NeedData;
- input.waiting_output = waiting_outputs.front();
-
- waiting_outputs.pop();
- }
-
- /// Close all other waiting for data outputs (there is no corresponding input for them).
- while (!waiting_outputs.empty())
- {
- auto & output = output_ports[waiting_outputs.front()];
- waiting_outputs.pop();
-
- output.status = OutputStatus::Finished;
- output.port->finish();
- ++num_finished_outputs;
- }
-
- if (disabled_input_ports.empty())
- return Status::NeedData;
-
- return Status::PortFull;
-}
-
-}
-
+#include <Processors/ResizeProcessor.h>
+#include <iostream>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+ResizeProcessor::Status ResizeProcessor::prepare()
+{
+ bool is_first_output = true;
+ auto output_end = current_output;
+
+ bool all_outs_full_or_unneeded = true;
+ bool all_outs_finished = true;
+
+ bool is_first_input = true;
+ auto input_end = current_input;
+
+ bool all_inputs_finished = true;
+
+ auto is_end_input = [&]() { return !is_first_input && current_input == input_end; };
+ auto is_end_output = [&]() { return !is_first_output && current_output == output_end; };
+
+ auto inc_current_input = [&]()
+ {
+ is_first_input = false;
+ ++current_input;
+
+ if (current_input == inputs.end())
+ current_input = inputs.begin();
+ };
+
+ auto inc_current_output = [&]()
+ {
+ is_first_output = false;
+ ++current_output;
+
+ if (current_output == outputs.end())
+ current_output = outputs.begin();
+ };
+
+ /// Find next output where can push.
+ auto get_next_out = [&, this]() -> OutputPorts::iterator
+ {
+ while (!is_end_output())
+ {
+ if (!current_output->isFinished())
+ {
+ all_outs_finished = false;
+
+ if (current_output->canPush())
+ {
+ all_outs_full_or_unneeded = false;
+ auto res_output = current_output;
+ inc_current_output();
+ return res_output;
+ }
+ }
+
+ inc_current_output();
+ }
+
+ return outputs.end();
+ };
+
+ /// Find next input from where can pull.
+ auto get_next_input = [&, this]() -> InputPorts::iterator
+ {
+ while (!is_end_input())
+ {
+ if (!current_input->isFinished())
+ {
+ all_inputs_finished = false;
+
+ current_input->setNeeded();
+ if (current_input->hasData())
+ {
+ auto res_input = current_input;
+ inc_current_input();
+ return res_input;
+ }
+ }
+
+ inc_current_input();
+ }
+
+ return inputs.end();
+ };
+
+ auto get_status_if_no_outputs = [&]() -> Status
+ {
+ if (all_outs_finished)
+ {
+ for (auto & in : inputs)
+ in.close();
+
+ return Status::Finished;
+ }
+
+ if (all_outs_full_or_unneeded)
+ {
+ for (auto & in : inputs)
+ in.setNotNeeded();
+
+ return Status::PortFull;
+ }
+
+ /// Now, we pushed to output, and it must be full.
+ return Status::PortFull;
+ };
+
+ auto get_status_if_no_inputs = [&]() -> Status
+ {
+ if (all_inputs_finished)
+ {
+ for (auto & out : outputs)
+ out.finish();
+
+ return Status::Finished;
+ }
+
+ return Status::NeedData;
+ };
+
+ /// Set all inputs needed in order to evenly process them.
+ /// Otherwise, in case num_outputs < num_inputs and chunks are consumed faster than produced,
+ /// some inputs can be skipped.
+// auto set_all_unprocessed_inputs_needed = [&]()
+// {
+// for (; cur_input != inputs.end(); ++cur_input)
+// if (!cur_input->isFinished())
+// cur_input->setNeeded();
+// };
+
+ while (!is_end_input() && !is_end_output())
+ {
+ auto output = get_next_out();
+ auto input = get_next_input();
+
+ if (output == outputs.end())
+ return get_status_if_no_outputs();
+
+
+ if (input == inputs.end())
+ return get_status_if_no_inputs();
+
+ output->push(input->pull());
+ }
+
+ if (is_end_input())
+ return get_status_if_no_outputs();
+
+ /// cur_input == inputs_end()
+ return get_status_if_no_inputs();
+}
+
+IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
+{
+ if (!initialized)
+ {
+ initialized = true;
+
+ for (auto & input : inputs)
+ {
+ input.setNeeded();
+ input_ports.push_back({.port = &input, .status = InputStatus::NotActive});
+ }
+
+ for (auto & output : outputs)
+ output_ports.push_back({.port = &output, .status = OutputStatus::NotActive});
+ }
+
+ for (const auto & output_number : updated_outputs)
+ {
+ auto & output = output_ports[output_number];
+ if (output.port->isFinished())
+ {
+ if (output.status != OutputStatus::Finished)
+ {
+ ++num_finished_outputs;
+ output.status = OutputStatus::Finished;
+ }
+
+ continue;
+ }
+
+ if (output.port->canPush())
+ {
+ if (output.status != OutputStatus::NeedData)
+ {
+ output.status = OutputStatus::NeedData;
+ waiting_outputs.push(output_number);
+ }
+ }
+ }
+
+ if (num_finished_outputs == outputs.size())
+ {
+ for (auto & input : inputs)
+ input.close();
+
+ return Status::Finished;
+ }
+
+ for (const auto & input_number : updated_inputs)
+ {
+ auto & input = input_ports[input_number];
+ if (input.port->isFinished())
+ {
+ if (input.status != InputStatus::Finished)
+ {
+ input.status = InputStatus::Finished;
+ ++num_finished_inputs;
+ }
+ continue;
+ }
+
+ if (input.port->hasData())
+ {
+ if (input.status != InputStatus::HasData)
+ {
+ input.status = InputStatus::HasData;
+ inputs_with_data.push(input_number);
+ }
+ }
+ }
+
+ while (!waiting_outputs.empty() && !inputs_with_data.empty())
+ {
+ auto & waiting_output = output_ports[waiting_outputs.front()];
+ waiting_outputs.pop();
+
+ auto & input_with_data = input_ports[inputs_with_data.front()];
+ inputs_with_data.pop();
+
+ waiting_output.port->pushData(input_with_data.port->pullData());
+ input_with_data.status = InputStatus::NotActive;
+ waiting_output.status = OutputStatus::NotActive;
+
+ if (input_with_data.port->isFinished())
+ {
+ input_with_data.status = InputStatus::Finished;
+ ++num_finished_inputs;
+ }
+ }
+
+ if (num_finished_inputs == inputs.size())
+ {
+ for (auto & output : outputs)
+ output.finish();
+
+ return Status::Finished;
+ }
+
+ if (!waiting_outputs.empty())
+ return Status::NeedData;
+
+ return Status::PortFull;
+}
+
+IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
+{
+ if (!initialized)
+ {
+ initialized = true;
+
+ for (auto & input : inputs)
+ input_ports.push_back({.port = &input, .status = InputStatus::NotActive, .waiting_output = -1});
+
+ for (UInt64 i = 0; i < input_ports.size(); ++i)
+ disabled_input_ports.push(i);
+
+ for (auto & output : outputs)
+ output_ports.push_back({.port = &output, .status = OutputStatus::NotActive});
+ }
+
+ for (const auto & output_number : updated_outputs)
+ {
+ auto & output = output_ports[output_number];
+ if (output.port->isFinished())
+ {
+ if (output.status != OutputStatus::Finished)
+ {
+ ++num_finished_outputs;
+ output.status = OutputStatus::Finished;
+ }
+
+ continue;
+ }
+
+ if (output.port->canPush())
+ {
+ if (output.status != OutputStatus::NeedData)
+ {
+ output.status = OutputStatus::NeedData;
+ waiting_outputs.push(output_number);
+ }
+ }
+ }
+
+ if (num_finished_outputs == outputs.size())
+ {
+ for (auto & input : inputs)
+ input.close();
+
+ return Status::Finished;
+ }
+
+ std::queue<UInt64> inputs_with_data;
+
+ for (const auto & input_number : updated_inputs)
+ {
+ auto & input = input_ports[input_number];
+ if (input.port->isFinished())
+ {
+ if (input.status != InputStatus::Finished)
+ {
+ input.status = InputStatus::Finished;
+ ++num_finished_inputs;
+
+ waiting_outputs.push(input.waiting_output);
+ }
+ continue;
+ }
+
+ if (input.port->hasData())
+ {
+ if (input.status != InputStatus::NotActive)
+ {
+ input.status = InputStatus::NotActive;
+ inputs_with_data.push(input_number);
+ }
+ }
+ }
+
+ while (!inputs_with_data.empty())
+ {
+ auto input_number = inputs_with_data.front();
+ auto & input_with_data = input_ports[input_number];
+ inputs_with_data.pop();
+
+ if (input_with_data.waiting_output == -1)
+ throw Exception("No associated output for input with data.", ErrorCodes::LOGICAL_ERROR);
+
+ auto & waiting_output = output_ports[input_with_data.waiting_output];
+
+ if (waiting_output.status == OutputStatus::NotActive)
+ throw Exception("Invalid status NotActive for associated output.", ErrorCodes::LOGICAL_ERROR);
+
+ if (waiting_output.status != OutputStatus::Finished)
+ {
+ waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_needed = */ true));
+ waiting_output.status = OutputStatus::NotActive;
+ }
+ else
+ abandoned_chunks.emplace_back(input_with_data.port->pullData(/* set_not_needed = */ true));
+
+ if (input_with_data.port->isFinished())
+ {
+ input_with_data.status = InputStatus::Finished;
+ ++num_finished_inputs;
+ }
+ else
+ disabled_input_ports.push(input_number);
+ }
+
+ if (num_finished_inputs == inputs.size())
+ {
+ for (auto & output : outputs)
+ output.finish();
+
+ return Status::Finished;
+ }
+
+ /// Process abandoned chunks if any.
+ while (!abandoned_chunks.empty() && !waiting_outputs.empty())
+ {
+ auto & waiting_output = output_ports[waiting_outputs.front()];
+ waiting_outputs.pop();
+
+ waiting_output.port->pushData(std::move(abandoned_chunks.back()));
+ abandoned_chunks.pop_back();
+
+ waiting_output.status = OutputStatus::NotActive;
+ }
+
+ /// Enable more inputs if needed.
+ while (!disabled_input_ports.empty() && !waiting_outputs.empty())
+ {
+ auto & input = input_ports[disabled_input_ports.front()];
+ disabled_input_ports.pop();
+
+ input.port->setNeeded();
+ input.status = InputStatus::NeedData;
+ input.waiting_output = waiting_outputs.front();
+
+ waiting_outputs.pop();
+ }
+
+ /// Close all other waiting for data outputs (there is no corresponding input for them).
+ while (!waiting_outputs.empty())
+ {
+ auto & output = output_ports[waiting_outputs.front()];
+ waiting_outputs.pop();
+
+ output.status = OutputStatus::Finished;
+ output.port->finish();
+ ++num_finished_outputs;
+ }
+
+ if (disabled_input_ports.empty())
+ return Status::NeedData;
+
+ return Status::PortFull;
+}
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h
index f9c188e041..ba1caa6605 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/ResizeProcessor.h
@@ -1,136 +1,136 @@
-#pragma once
-
-#include <Processors/IProcessor.h>
-#include <queue>
-
-
-namespace DB
-{
-
+#pragma once
+
+#include <Processors/IProcessor.h>
+#include <queue>
+
+
+namespace DB
+{
+
/** Has arbitrary non zero number of inputs and arbitrary non zero number of outputs.
- * All of them have the same structure.
- *
+ * All of them have the same structure.
+ *
* Pulls data from arbitrary input (whenever it is ready) and pushes it to arbitrary output (whenever is is not full).
- * Doesn't do any heavy calculations.
- * Doesn't preserve an order of data.
- *
- * Examples:
- * - union data from multiple inputs to single output - to serialize data that was processed in parallel.
- * - split data from single input to multiple outputs - to allow further parallel processing.
- */
-class ResizeProcessor : public IProcessor
-{
-public:
- /// TODO Check that there is non zero number of inputs and outputs.
- ResizeProcessor(const Block & header, size_t num_inputs, size_t num_outputs)
- : IProcessor(InputPorts(num_inputs, header), OutputPorts(num_outputs, header))
- , current_input(inputs.begin())
- , current_output(outputs.begin())
- {
- }
-
- String getName() const override { return "Resize"; }
-
- Status prepare() override;
- Status prepare(const PortNumbers &, const PortNumbers &) override;
-
-private:
- InputPorts::iterator current_input;
- OutputPorts::iterator current_output;
-
- size_t num_finished_inputs = 0;
- size_t num_finished_outputs = 0;
- std::queue<UInt64> waiting_outputs;
- std::queue<UInt64> inputs_with_data;
- bool initialized = false;
-
- enum class OutputStatus
- {
- NotActive,
- NeedData,
- Finished,
- };
-
- enum class InputStatus
- {
- NotActive,
- HasData,
- Finished,
- };
-
- struct InputPortWithStatus
- {
- InputPort * port;
- InputStatus status;
- };
-
- struct OutputPortWithStatus
- {
- OutputPort * port;
- OutputStatus status;
- };
-
- std::vector<InputPortWithStatus> input_ports;
- std::vector<OutputPortWithStatus> output_ports;
-};
-
-class StrictResizeProcessor : public IProcessor
-{
-public:
- /// TODO Check that there is non zero number of inputs and outputs.
- StrictResizeProcessor(const Block & header, size_t num_inputs, size_t num_outputs)
- : IProcessor(InputPorts(num_inputs, header), OutputPorts(num_outputs, header))
- , current_input(inputs.begin())
- , current_output(outputs.begin())
- {
- }
-
- String getName() const override { return "StrictResize"; }
-
- Status prepare(const PortNumbers &, const PortNumbers &) override;
-
-private:
- InputPorts::iterator current_input;
- OutputPorts::iterator current_output;
-
- size_t num_finished_inputs = 0;
- size_t num_finished_outputs = 0;
- std::queue<UInt64> disabled_input_ports;
- std::queue<UInt64> waiting_outputs;
- bool initialized = false;
-
- enum class OutputStatus
- {
- NotActive,
- NeedData,
- Finished,
- };
-
- enum class InputStatus
- {
- NotActive,
- NeedData,
- Finished,
- };
-
- struct InputPortWithStatus
- {
- InputPort * port;
- InputStatus status;
- ssize_t waiting_output;
- };
-
- struct OutputPortWithStatus
- {
- OutputPort * port;
- OutputStatus status;
- };
-
- std::vector<InputPortWithStatus> input_ports;
- std::vector<OutputPortWithStatus> output_ports;
- /// This field contained chunks which were read for output which had became finished while reading was happening.
- /// They will be pushed to any next waiting output.
- std::vector<Port::Data> abandoned_chunks;
-};
-
-}
+ * Doesn't do any heavy calculations.
+ * Doesn't preserve an order of data.
+ *
+ * Examples:
+ * - union data from multiple inputs to single output - to serialize data that was processed in parallel.
+ * - split data from single input to multiple outputs - to allow further parallel processing.
+ */
+class ResizeProcessor : public IProcessor
+{
+public:
+ /// TODO Check that there is non zero number of inputs and outputs.
+ ResizeProcessor(const Block & header, size_t num_inputs, size_t num_outputs)
+ : IProcessor(InputPorts(num_inputs, header), OutputPorts(num_outputs, header))
+ , current_input(inputs.begin())
+ , current_output(outputs.begin())
+ {
+ }
+
+ String getName() const override { return "Resize"; }
+
+ Status prepare() override;
+ Status prepare(const PortNumbers &, const PortNumbers &) override;
+
+private:
+ InputPorts::iterator current_input;
+ OutputPorts::iterator current_output;
+
+ size_t num_finished_inputs = 0;
+ size_t num_finished_outputs = 0;
+ std::queue<UInt64> waiting_outputs;
+ std::queue<UInt64> inputs_with_data;
+ bool initialized = false;
+
+ enum class OutputStatus
+ {
+ NotActive,
+ NeedData,
+ Finished,
+ };
+
+ enum class InputStatus
+ {
+ NotActive,
+ HasData,
+ Finished,
+ };
+
+ struct InputPortWithStatus
+ {
+ InputPort * port;
+ InputStatus status;
+ };
+
+ struct OutputPortWithStatus
+ {
+ OutputPort * port;
+ OutputStatus status;
+ };
+
+ std::vector<InputPortWithStatus> input_ports;
+ std::vector<OutputPortWithStatus> output_ports;
+};
+
+class StrictResizeProcessor : public IProcessor
+{
+public:
+ /// TODO Check that there is non zero number of inputs and outputs.
+ StrictResizeProcessor(const Block & header, size_t num_inputs, size_t num_outputs)
+ : IProcessor(InputPorts(num_inputs, header), OutputPorts(num_outputs, header))
+ , current_input(inputs.begin())
+ , current_output(outputs.begin())
+ {
+ }
+
+ String getName() const override { return "StrictResize"; }
+
+ Status prepare(const PortNumbers &, const PortNumbers &) override;
+
+private:
+ InputPorts::iterator current_input;
+ OutputPorts::iterator current_output;
+
+ size_t num_finished_inputs = 0;
+ size_t num_finished_outputs = 0;
+ std::queue<UInt64> disabled_input_ports;
+ std::queue<UInt64> waiting_outputs;
+ bool initialized = false;
+
+ enum class OutputStatus
+ {
+ NotActive,
+ NeedData,
+ Finished,
+ };
+
+ enum class InputStatus
+ {
+ NotActive,
+ NeedData,
+ Finished,
+ };
+
+ struct InputPortWithStatus
+ {
+ InputPort * port;
+ InputStatus status;
+ ssize_t waiting_output;
+ };
+
+ struct OutputPortWithStatus
+ {
+ OutputPort * port;
+ OutputStatus status;
+ };
+
+ std::vector<InputPortWithStatus> input_ports;
+ std::vector<OutputPortWithStatus> output_ports;
+ /// This field contained chunks which were read for output which had became finished while reading was happening.
+ /// They will be pushed to any next waiting output.
+ std::vector<Port::Data> abandoned_chunks;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h
index d1f0ec5e6c..5676051537 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h
@@ -1,18 +1,18 @@
-#pragma once
-#include <Processors/ISource.h>
-
-
-namespace DB
-{
-
-class NullSource : public ISource
-{
-public:
- explicit NullSource(Block header) : ISource(std::move(header)) {}
- String getName() const override { return "NullSource"; }
-
-protected:
- Chunk generate() override { return Chunk(); }
-};
-
-}
+#pragma once
+#include <Processors/ISource.h>
+
+
+namespace DB
+{
+
+class NullSource : public ISource
+{
+public:
+ explicit NullSource(Block header) : ISource(std::move(header)) {}
+ String getName() const override { return "NullSource"; }
+
+protected:
+ Chunk generate() override { return Chunk(); }
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp
index 7c88c6dfbe..bdcb9d461a 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp
@@ -1,200 +1,200 @@
-#include <Processors/Sources/SourceFromInputStream.h>
-#include <Processors/Transforms/AggregatingTransform.h>
-#include <DataTypes/DataTypeAggregateFunction.h>
-#include <DataStreams/RemoteBlockInputStream.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_)
- : ISourceWithProgress(stream_->getHeader())
- , force_add_aggregating_info(force_add_aggregating_info_)
- , stream(std::move(stream_))
-{
- init();
-}
-
-void SourceFromInputStream::init()
-{
- const auto & sample = getPort().getHeader();
- for (auto & type : sample.getDataTypes())
- if (typeid_cast<const DataTypeAggregateFunction *>(type.get()))
- has_aggregate_functions = true;
-}
-
-void SourceFromInputStream::addTotalsPort()
-{
- if (totals_port)
- throw Exception("Totals port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR);
-
- outputs.emplace_back(outputs.front().getHeader(), this);
- totals_port = &outputs.back();
-}
-
-void SourceFromInputStream::addExtremesPort()
-{
- if (extremes_port)
- throw Exception("Extremes port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR);
-
- outputs.emplace_back(outputs.front().getHeader(), this);
- extremes_port = &outputs.back();
-}
-
-IProcessor::Status SourceFromInputStream::prepare()
-{
- auto status = ISource::prepare();
-
- if (status == Status::Finished)
- {
- is_generating_finished = true;
-
- /// Read postfix and get totals if needed.
- if (!is_stream_finished && !isCancelled())
- return Status::Ready;
-
- if (totals_port && !totals_port->isFinished())
- {
- if (has_totals)
- {
- if (!totals_port->canPush())
- return Status::PortFull;
-
- totals_port->push(std::move(totals));
- has_totals = false;
- }
-
- totals_port->finish();
- }
-
- if (extremes_port && !extremes_port->isFinished())
- {
- if (has_extremes)
- {
- if (!extremes_port->canPush())
- return Status::PortFull;
-
- extremes_port->push(std::move(extremes));
- has_extremes = false;
- }
-
- extremes_port->finish();
- }
- }
-
- return status;
-}
-
-void SourceFromInputStream::work()
-{
- if (!is_generating_finished)
- {
- try
- {
- ISource::work();
- }
- catch (...)
- {
- /// Won't read suffix in case of exception.
- is_stream_finished = true;
- throw;
- }
-
- return;
- }
-
- if (is_stream_finished)
- return;
-
- /// Don't cancel for RemoteBlockInputStream (otherwise readSuffix can stack)
- if (!typeid_cast<const RemoteBlockInputStream *>(stream.get()))
- stream->cancel(false);
-
- if (rows_before_limit)
- {
- const auto & info = stream->getProfileInfo();
- if (info.hasAppliedLimit())
- rows_before_limit->add(info.getRowsBeforeLimit());
- }
-
- stream->readSuffix();
-
- if (auto totals_block = stream->getTotals())
- {
- totals.setColumns(totals_block.getColumns(), 1);
- has_totals = true;
- }
-
- is_stream_finished = true;
-}
-
-Chunk SourceFromInputStream::generate()
-{
- if (is_stream_finished)
- return {};
-
- if (!is_stream_started)
- {
- stream->readPrefix();
- is_stream_started = true;
- }
-
- auto block = stream->read();
- if (!block && !isCancelled())
- {
- if (rows_before_limit)
- {
- const auto & info = stream->getProfileInfo();
- if (info.hasAppliedLimit())
- rows_before_limit->add(info.getRowsBeforeLimit());
- }
-
- stream->readSuffix();
-
- if (auto totals_block = stream->getTotals())
- {
- if (totals_block.rows() > 0) /// Sometimes we can get empty totals. Skip it.
- {
- totals.setColumns(totals_block.getColumns(), totals_block.rows());
- has_totals = true;
- }
- }
-
- if (auto extremes_block = stream->getExtremes())
- {
- if (extremes_block.rows() > 0) /// Sometimes we can get empty extremes. Skip it.
- {
- extremes.setColumns(extremes_block.getColumns(), extremes_block.rows());
- has_extremes = true;
- }
- }
-
- is_stream_finished = true;
- return {};
- }
-
- if (isCancelled())
- return {};
-
-#ifndef NDEBUG
- assertBlocksHaveEqualStructure(getPort().getHeader(), block, "SourceFromInputStream");
-#endif
-
- UInt64 num_rows = block.rows();
- Chunk chunk(block.getColumns(), num_rows);
-
- if (force_add_aggregating_info || has_aggregate_functions)
- {
- auto info = std::make_shared<AggregatedChunkInfo>();
- info->bucket_num = block.info.bucket_num;
- info->is_overflows = block.info.is_overflows;
- chunk.setChunkInfo(std::move(info));
- }
-
- return chunk;
-}
-
-}
+#include <Processors/Sources/SourceFromInputStream.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <DataTypes/DataTypeAggregateFunction.h>
+#include <DataStreams/RemoteBlockInputStream.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_)
+ : ISourceWithProgress(stream_->getHeader())
+ , force_add_aggregating_info(force_add_aggregating_info_)
+ , stream(std::move(stream_))
+{
+ init();
+}
+
+void SourceFromInputStream::init()
+{
+ const auto & sample = getPort().getHeader();
+ for (auto & type : sample.getDataTypes())
+ if (typeid_cast<const DataTypeAggregateFunction *>(type.get()))
+ has_aggregate_functions = true;
+}
+
+void SourceFromInputStream::addTotalsPort()
+{
+ if (totals_port)
+ throw Exception("Totals port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR);
+
+ outputs.emplace_back(outputs.front().getHeader(), this);
+ totals_port = &outputs.back();
+}
+
+void SourceFromInputStream::addExtremesPort()
+{
+ if (extremes_port)
+ throw Exception("Extremes port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR);
+
+ outputs.emplace_back(outputs.front().getHeader(), this);
+ extremes_port = &outputs.back();
+}
+
+IProcessor::Status SourceFromInputStream::prepare()
+{
+ auto status = ISource::prepare();
+
+ if (status == Status::Finished)
+ {
+ is_generating_finished = true;
+
+ /// Read postfix and get totals if needed.
+ if (!is_stream_finished && !isCancelled())
+ return Status::Ready;
+
+ if (totals_port && !totals_port->isFinished())
+ {
+ if (has_totals)
+ {
+ if (!totals_port->canPush())
+ return Status::PortFull;
+
+ totals_port->push(std::move(totals));
+ has_totals = false;
+ }
+
+ totals_port->finish();
+ }
+
+ if (extremes_port && !extremes_port->isFinished())
+ {
+ if (has_extremes)
+ {
+ if (!extremes_port->canPush())
+ return Status::PortFull;
+
+ extremes_port->push(std::move(extremes));
+ has_extremes = false;
+ }
+
+ extremes_port->finish();
+ }
+ }
+
+ return status;
+}
+
+void SourceFromInputStream::work()
+{
+ if (!is_generating_finished)
+ {
+ try
+ {
+ ISource::work();
+ }
+ catch (...)
+ {
+ /// Won't read suffix in case of exception.
+ is_stream_finished = true;
+ throw;
+ }
+
+ return;
+ }
+
+ if (is_stream_finished)
+ return;
+
+ /// Don't cancel for RemoteBlockInputStream (otherwise readSuffix can stack)
+ if (!typeid_cast<const RemoteBlockInputStream *>(stream.get()))
+ stream->cancel(false);
+
+ if (rows_before_limit)
+ {
+ const auto & info = stream->getProfileInfo();
+ if (info.hasAppliedLimit())
+ rows_before_limit->add(info.getRowsBeforeLimit());
+ }
+
+ stream->readSuffix();
+
+ if (auto totals_block = stream->getTotals())
+ {
+ totals.setColumns(totals_block.getColumns(), 1);
+ has_totals = true;
+ }
+
+ is_stream_finished = true;
+}
+
+Chunk SourceFromInputStream::generate()
+{
+ if (is_stream_finished)
+ return {};
+
+ if (!is_stream_started)
+ {
+ stream->readPrefix();
+ is_stream_started = true;
+ }
+
+ auto block = stream->read();
+ if (!block && !isCancelled())
+ {
+ if (rows_before_limit)
+ {
+ const auto & info = stream->getProfileInfo();
+ if (info.hasAppliedLimit())
+ rows_before_limit->add(info.getRowsBeforeLimit());
+ }
+
+ stream->readSuffix();
+
+ if (auto totals_block = stream->getTotals())
+ {
+ if (totals_block.rows() > 0) /// Sometimes we can get empty totals. Skip it.
+ {
+ totals.setColumns(totals_block.getColumns(), totals_block.rows());
+ has_totals = true;
+ }
+ }
+
+ if (auto extremes_block = stream->getExtremes())
+ {
+ if (extremes_block.rows() > 0) /// Sometimes we can get empty extremes. Skip it.
+ {
+ extremes.setColumns(extremes_block.getColumns(), extremes_block.rows());
+ has_extremes = true;
+ }
+ }
+
+ is_stream_finished = true;
+ return {};
+ }
+
+ if (isCancelled())
+ return {};
+
+#ifndef NDEBUG
+ assertBlocksHaveEqualStructure(getPort().getHeader(), block, "SourceFromInputStream");
+#endif
+
+ UInt64 num_rows = block.rows();
+ Chunk chunk(block.getColumns(), num_rows);
+
+ if (force_add_aggregating_info || has_aggregate_functions)
+ {
+ auto info = std::make_shared<AggregatedChunkInfo>();
+ info->bucket_num = block.info.bucket_num;
+ info->is_overflows = block.info.is_overflows;
+ chunk.setChunkInfo(std::move(info));
+ }
+
+ return chunk;
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h
index 9649385909..c30fd8dcb4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h
@@ -1,77 +1,77 @@
-#pragma once
+#pragma once
-#include <Processors/Sources/SourceWithProgress.h>
-#include <Processors/RowsBeforeLimitCounter.h>
+#include <Processors/Sources/SourceWithProgress.h>
+#include <Processors/RowsBeforeLimitCounter.h>
#include <DataStreams/IBlockInputStream.h>
-
-
-namespace DB
-{
-
-class IBlockInputStream;
-using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
-
-/// Wrapper for IBlockInputStream which implements ISourceWithProgress.
-class SourceFromInputStream : public ISourceWithProgress
-{
-public:
+
+
+namespace DB
+{
+
+class IBlockInputStream;
+using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
+
+/// Wrapper for IBlockInputStream which implements ISourceWithProgress.
+class SourceFromInputStream : public ISourceWithProgress
+{
+public:
/// If force_add_aggregating_info is enabled, AggregatedChunkInfo (with bucket number and is_overflows flag) will be added to result chunk.
- explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false);
- String getName() const override { return "SourceFromInputStream"; }
-
- Status prepare() override;
- void work() override;
-
- Chunk generate() override;
-
- BlockInputStreamPtr & getStream() { return stream; }
-
- void addTotalsPort();
- void addExtremesPort();
-
- OutputPort * getTotalsPort() const { return totals_port; }
- OutputPort * getExtremesPort() const { return extremes_port; }
-
- void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); }
-
- /// Implementation for methods from ISourceWithProgress.
+ explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false);
+ String getName() const override { return "SourceFromInputStream"; }
+
+ Status prepare() override;
+ void work() override;
+
+ Chunk generate() override;
+
+ BlockInputStreamPtr & getStream() { return stream; }
+
+ void addTotalsPort();
+ void addExtremesPort();
+
+ OutputPort * getTotalsPort() const { return totals_port; }
+ OutputPort * getExtremesPort() const { return extremes_port; }
+
+ void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); }
+
+ /// Implementation for methods from ISourceWithProgress.
void setLimits(const StreamLocalLimits & limits_) final { stream->setLimits(limits_); }
void setLeafLimits(const SizeLimits &) final { }
- void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { stream->setQuota(quota_); }
- void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); }
- void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); }
- void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); }
-
- /// Stop reading from stream if output port is finished.
- void onUpdatePorts() override
- {
- if (getPort().isFinished())
- cancel();
- }
-
-protected:
- void onCancel() override { stream->cancel(false); }
-
-private:
- bool has_aggregate_functions = false;
- bool force_add_aggregating_info = false;
- BlockInputStreamPtr stream;
-
- RowsBeforeLimitCounterPtr rows_before_limit;
-
- Chunk totals;
- OutputPort * totals_port = nullptr;
- bool has_totals = false;
-
- Chunk extremes;
- OutputPort * extremes_port = nullptr;
- bool has_extremes = false;
-
- bool is_generating_finished = false;
- bool is_stream_finished = false;
- bool is_stream_started = false;
-
- void init();
-};
-
-}
+ void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { stream->setQuota(quota_); }
+ void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); }
+ void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); }
+ void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); }
+
+ /// Stop reading from stream if output port is finished.
+ void onUpdatePorts() override
+ {
+ if (getPort().isFinished())
+ cancel();
+ }
+
+protected:
+ void onCancel() override { stream->cancel(false); }
+
+private:
+ bool has_aggregate_functions = false;
+ bool force_add_aggregating_info = false;
+ BlockInputStreamPtr stream;
+
+ RowsBeforeLimitCounterPtr rows_before_limit;
+
+ Chunk totals;
+ OutputPort * totals_port = nullptr;
+ bool has_totals = false;
+
+ Chunk extremes;
+ OutputPort * extremes_port = nullptr;
+ bool has_extremes = false;
+
+ bool is_generating_finished = false;
+ bool is_stream_finished = false;
+ bool is_stream_started = false;
+
+ void init();
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h
index d304bdbab9..f6e8c3b22e 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h
@@ -1,21 +1,21 @@
-#pragma once
-#include <Processors/Sources/SourceWithProgress.h>
-
-
-namespace DB
-{
-
-class SourceFromSingleChunk : public SourceWithProgress
-{
-public:
- explicit SourceFromSingleChunk(Block header, Chunk chunk_) : SourceWithProgress(std::move(header)), chunk(std::move(chunk_)) {}
- String getName() const override { return "SourceFromSingleChunk"; }
-
-protected:
- Chunk generate() override { return std::move(chunk); }
-
-private:
- Chunk chunk;
-};
-
-}
+#pragma once
+#include <Processors/Sources/SourceWithProgress.h>
+
+
+namespace DB
+{
+
+class SourceFromSingleChunk : public SourceWithProgress
+{
+public:
+ explicit SourceFromSingleChunk(Block header, Chunk chunk_) : SourceWithProgress(std::move(header)), chunk(std::move(chunk_)) {}
+ String getName() const override { return "SourceFromSingleChunk"; }
+
+protected:
+ Chunk generate() override { return std::move(chunk); }
+
+private:
+ Chunk chunk;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp
index 647ad0f205..6eca81ce07 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp
@@ -1,28 +1,28 @@
-#include <Processors/Sources/SourceWithProgress.h>
-
-#include <Interpreters/ProcessList.h>
-#include <Access/EnabledQuota.h>
-
+#include <Processors/Sources/SourceWithProgress.h>
+
+#include <Interpreters/ProcessList.h>
+#include <Access/EnabledQuota.h>
+
namespace ProfileEvents
{
extern const Event SelectedRows;
extern const Event SelectedBytes;
}
-namespace DB
-{
-
-namespace ErrorCodes
-{
- extern const int TOO_MANY_ROWS;
- extern const int TOO_MANY_BYTES;
-}
-
-SourceWithProgress::SourceWithProgress(Block header, bool enable_auto_progress)
- : ISourceWithProgress(header), auto_progress(enable_auto_progress)
-{
-}
-
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int TOO_MANY_ROWS;
+ extern const int TOO_MANY_BYTES;
+}
+
+SourceWithProgress::SourceWithProgress(Block header, bool enable_auto_progress)
+ : ISourceWithProgress(header), auto_progress(enable_auto_progress)
+{
+}
+
void SourceWithProgress::setProcessListElement(QueryStatus * elem)
{
process_list_elem = elem;
@@ -47,105 +47,105 @@ void SourceWithProgress::setProcessListElement(QueryStatus * elem)
}
}
-void SourceWithProgress::work()
-{
+void SourceWithProgress::work()
+{
if (!limits.speed_limits.checkTimeLimit(total_stopwatch, limits.timeout_overflow_mode))
- {
- cancel();
- }
- else
- {
- was_progress_called = false;
-
- ISourceWithProgress::work();
-
- if (auto_progress && !was_progress_called && has_input)
- progress({ current_chunk.chunk.getNumRows(), current_chunk.chunk.bytes() });
- }
-}
-
-/// Aggregated copy-paste from IBlockInputStream::progressImpl.
-/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream.
-void SourceWithProgress::progress(const Progress & value)
-{
- was_progress_called = true;
-
- if (total_rows_approx != 0)
- {
- Progress total_rows_progress = {0, 0, total_rows_approx};
-
- if (progress_callback)
- progress_callback(total_rows_progress);
-
- if (process_list_elem)
- process_list_elem->updateProgressIn(total_rows_progress);
-
- total_rows_approx = 0;
- }
-
- if (progress_callback)
- progress_callback(value);
-
- if (process_list_elem)
- {
- if (!process_list_elem->updateProgressIn(value))
- cancel();
-
- /// The total amount of data processed or intended for processing in all sources, possibly on remote servers.
-
- ProgressValues progress = process_list_elem->getProgressIn();
-
- /// If the mode is "throw" and estimate of total rows is known, then throw early if an estimate is too high.
- /// If the mode is "break", then allow to read before limit even if estimate is very high.
-
- size_t rows_to_check_limit = progress.read_rows;
- if (limits.size_limits.overflow_mode == OverflowMode::THROW && progress.total_rows_to_read > progress.read_rows)
- rows_to_check_limit = progress.total_rows_to_read;
-
- /// Check the restrictions on the
- /// * amount of data to read
- /// * speed of the query
- /// * quota on the amount of data to read
- /// NOTE: Maybe it makes sense to have them checked directly in ProcessList?
-
- if (limits.mode == LimitsMode::LIMITS_TOTAL)
- {
- if (!limits.size_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read",
- ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
- {
- cancel();
- }
- }
-
+ {
+ cancel();
+ }
+ else
+ {
+ was_progress_called = false;
+
+ ISourceWithProgress::work();
+
+ if (auto_progress && !was_progress_called && has_input)
+ progress({ current_chunk.chunk.getNumRows(), current_chunk.chunk.bytes() });
+ }
+}
+
+/// Aggregated copy-paste from IBlockInputStream::progressImpl.
+/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream.
+void SourceWithProgress::progress(const Progress & value)
+{
+ was_progress_called = true;
+
+ if (total_rows_approx != 0)
+ {
+ Progress total_rows_progress = {0, 0, total_rows_approx};
+
+ if (progress_callback)
+ progress_callback(total_rows_progress);
+
+ if (process_list_elem)
+ process_list_elem->updateProgressIn(total_rows_progress);
+
+ total_rows_approx = 0;
+ }
+
+ if (progress_callback)
+ progress_callback(value);
+
+ if (process_list_elem)
+ {
+ if (!process_list_elem->updateProgressIn(value))
+ cancel();
+
+ /// The total amount of data processed or intended for processing in all sources, possibly on remote servers.
+
+ ProgressValues progress = process_list_elem->getProgressIn();
+
+ /// If the mode is "throw" and estimate of total rows is known, then throw early if an estimate is too high.
+ /// If the mode is "break", then allow to read before limit even if estimate is very high.
+
+ size_t rows_to_check_limit = progress.read_rows;
+ if (limits.size_limits.overflow_mode == OverflowMode::THROW && progress.total_rows_to_read > progress.read_rows)
+ rows_to_check_limit = progress.total_rows_to_read;
+
+ /// Check the restrictions on the
+ /// * amount of data to read
+ /// * speed of the query
+ /// * quota on the amount of data to read
+ /// NOTE: Maybe it makes sense to have them checked directly in ProcessList?
+
+ if (limits.mode == LimitsMode::LIMITS_TOTAL)
+ {
+ if (!limits.size_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read",
+ ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
+ {
+ cancel();
+ }
+ }
+
if (!leaf_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read on leaf node",
ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
{
cancel();
}
- size_t total_rows = progress.total_rows_to_read;
-
- constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds
- UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds();
-
- if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds)
- {
- /// Should be done in PipelineExecutor.
- /// It is here for compatibility with IBlockInputsStream.
- CurrentThread::updatePerformanceCounters();
- last_profile_events_update_time = total_elapsed_microseconds;
- }
-
- /// Should be done in PipelineExecutor.
- /// It is here for compatibility with IBlockInputsStream.
- limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
-
- if (quota && limits.mode == LimitsMode::LIMITS_TOTAL)
- quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes});
- }
+ size_t total_rows = progress.total_rows_to_read;
+
+ constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds
+ UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds();
+
+ if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds)
+ {
+ /// Should be done in PipelineExecutor.
+ /// It is here for compatibility with IBlockInputsStream.
+ CurrentThread::updatePerformanceCounters();
+ last_profile_events_update_time = total_elapsed_microseconds;
+ }
+
+ /// Should be done in PipelineExecutor.
+ /// It is here for compatibility with IBlockInputsStream.
+ limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
+
+ if (quota && limits.mode == LimitsMode::LIMITS_TOTAL)
+ quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes});
+ }
ProfileEvents::increment(ProfileEvents::SelectedRows, value.read_rows);
ProfileEvents::increment(ProfileEvents::SelectedBytes, value.read_bytes);
-}
-
-}
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h
index 49728be01e..256930b6d1 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h
@@ -1,90 +1,90 @@
-#pragma once
-#include <Processors/ISource.h>
-#include <Common/Stopwatch.h>
+#pragma once
+#include <Processors/ISource.h>
+#include <Common/Stopwatch.h>
#include <DataStreams/StreamLocalLimits.h>
#include <IO/Progress.h>
+
-
-namespace DB
-{
-
+namespace DB
+{
+
class QueryStatus;
class EnabledQuota;
-/// Adds progress to ISource.
-/// This class takes care of limits, quotas, callback on progress and updating performance counters for current thread.
-class ISourceWithProgress : public ISource
-{
-public:
- using ISource::ISource;
-
- /// Set limitations that checked on each chunk.
+/// Adds progress to ISource.
+/// This class takes care of limits, quotas, callback on progress and updating performance counters for current thread.
+class ISourceWithProgress : public ISource
+{
+public:
+ using ISource::ISource;
+
+ /// Set limitations that checked on each chunk.
virtual void setLimits(const StreamLocalLimits & limits_) = 0;
-
+
/// Set limitations that checked on each chunk for distributed queries on leaf nodes.
virtual void setLeafLimits(const SizeLimits & leaf_limits_) = 0;
- /// Set the quota. If you set a quota on the amount of raw data,
- /// then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits.
- virtual void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) = 0;
-
- /// Set the pointer to the process list item.
- /// General information about the resources spent on the request will be written into it.
- /// Based on this information, the quota and some restrictions will be checked.
- /// This information will also be available in the SHOW PROCESSLIST request.
- virtual void setProcessListElement(QueryStatus * elem) = 0;
-
- /// Set the execution progress bar callback.
- /// It is called after each chunk.
- /// The function takes the number of rows in the last chunk, the number of bytes in the last chunk.
- /// Note that the callback can be called from different threads.
- virtual void setProgressCallback(const ProgressCallback & callback) = 0;
-
- /// Set the approximate total number of rows to read.
- virtual void addTotalRowsApprox(size_t value) = 0;
-};
-
-/// Implementation for ISourceWithProgress
-class SourceWithProgress : public ISourceWithProgress
-{
-public:
- using ISourceWithProgress::ISourceWithProgress;
- /// If enable_auto_progress flag is set, progress() will be automatically called on each generated chunk.
- SourceWithProgress(Block header, bool enable_auto_progress);
-
+ /// Set the quota. If you set a quota on the amount of raw data,
+ /// then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits.
+ virtual void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) = 0;
+
+ /// Set the pointer to the process list item.
+ /// General information about the resources spent on the request will be written into it.
+ /// Based on this information, the quota and some restrictions will be checked.
+ /// This information will also be available in the SHOW PROCESSLIST request.
+ virtual void setProcessListElement(QueryStatus * elem) = 0;
+
+ /// Set the execution progress bar callback.
+ /// It is called after each chunk.
+ /// The function takes the number of rows in the last chunk, the number of bytes in the last chunk.
+ /// Note that the callback can be called from different threads.
+ virtual void setProgressCallback(const ProgressCallback & callback) = 0;
+
+ /// Set the approximate total number of rows to read.
+ virtual void addTotalRowsApprox(size_t value) = 0;
+};
+
+/// Implementation for ISourceWithProgress
+class SourceWithProgress : public ISourceWithProgress
+{
+public:
+ using ISourceWithProgress::ISourceWithProgress;
+ /// If enable_auto_progress flag is set, progress() will be automatically called on each generated chunk.
+ SourceWithProgress(Block header, bool enable_auto_progress);
+
void setLimits(const StreamLocalLimits & limits_) final { limits = limits_; }
void setLeafLimits(const SizeLimits & leaf_limits_) final {leaf_limits = leaf_limits_; }
- void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { quota = quota_; }
+ void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { quota = quota_; }
void setProcessListElement(QueryStatus * elem) final;
- void setProgressCallback(const ProgressCallback & callback) final { progress_callback = callback; }
- void addTotalRowsApprox(size_t value) final { total_rows_approx += value; }
-
-protected:
- /// Call this method to provide information about progress.
- void progress(const Progress & value);
-
- void work() override;
-
-private:
+ void setProgressCallback(const ProgressCallback & callback) final { progress_callback = callback; }
+ void addTotalRowsApprox(size_t value) final { total_rows_approx += value; }
+
+protected:
+ /// Call this method to provide information about progress.
+ void progress(const Progress & value);
+
+ void work() override;
+
+private:
StreamLocalLimits limits;
SizeLimits leaf_limits;
- std::shared_ptr<const EnabledQuota> quota;
- ProgressCallback progress_callback;
- QueryStatus * process_list_elem = nullptr;
-
- /// The approximate total number of rows to read. For progress bar.
- size_t total_rows_approx = 0;
-
- Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time.
- /// According to total_stopwatch in microseconds.
- UInt64 last_profile_events_update_time = 0;
-
- /// This flag checks if progress() was manually called at generate() call.
- /// If not, it will be called for chunk after generate() was finished.
- bool was_progress_called = false;
-
- /// If enabled, progress() will be automatically called on each generated chunk.
- bool auto_progress = true;
-};
-
-}
+ std::shared_ptr<const EnabledQuota> quota;
+ ProgressCallback progress_callback;
+ QueryStatus * process_list_elem = nullptr;
+
+ /// The approximate total number of rows to read. For progress bar.
+ size_t total_rows_approx = 0;
+
+ Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time.
+ /// According to total_stopwatch in microseconds.
+ UInt64 last_profile_events_update_time = 0;
+
+ /// This flag checks if progress() was manually called at generate() call.
+ /// If not, it will be called for chunk after generate() was finished.
+ bool was_progress_called = false;
+
+ /// If enabled, progress() will be automatically called on each generated chunk.
+ bool auto_progress = true;
+};
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp
index a8a93e5366..24712cec1d 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp
@@ -1,400 +1,400 @@
-#include <Processors/Transforms/AggregatingTransform.h>
-
-#include <DataStreams/NativeBlockInputStream.h>
-#include <Processors/ISource.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+
+#include <DataStreams/NativeBlockInputStream.h>
+#include <Processors/ISource.h>
#include <Processors/Pipe.h>
-#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
+#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
#include <DataStreams/materializeBlock.h>
-
-namespace ProfileEvents
-{
- extern const Event ExternalAggregationMerge;
-}
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
- extern const int LOGICAL_ERROR;
-}
-
-/// Convert block to chunk.
-/// Adds additional info about aggregation.
-Chunk convertToChunk(const Block & block)
-{
- auto info = std::make_shared<AggregatedChunkInfo>();
- info->bucket_num = block.info.bucket_num;
- info->is_overflows = block.info.is_overflows;
-
- UInt64 num_rows = block.rows();
- Chunk chunk(block.getColumns(), num_rows);
- chunk.setChunkInfo(std::move(info));
-
- return chunk;
-}
-
-namespace
-{
- const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
- {
- const auto & info = chunk.getChunkInfo();
- if (!info)
- throw Exception("Chunk info was not set for chunk.", ErrorCodes::LOGICAL_ERROR);
-
- const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
- if (!agg_info)
- throw Exception("Chunk should have AggregatedChunkInfo.", ErrorCodes::LOGICAL_ERROR);
-
- return agg_info;
- }
-
- /// Reads chunks from file in native format. Provide chunks with aggregation info.
- class SourceFromNativeStream : public ISource
- {
- public:
- SourceFromNativeStream(const Block & header, const std::string & path)
- : ISource(header), file_in(path), compressed_in(file_in),
+
+namespace ProfileEvents
+{
+ extern const Event ExternalAggregationMerge;
+}
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
+ extern const int LOGICAL_ERROR;
+}
+
+/// Convert block to chunk.
+/// Adds additional info about aggregation.
+Chunk convertToChunk(const Block & block)
+{
+ auto info = std::make_shared<AggregatedChunkInfo>();
+ info->bucket_num = block.info.bucket_num;
+ info->is_overflows = block.info.is_overflows;
+
+ UInt64 num_rows = block.rows();
+ Chunk chunk(block.getColumns(), num_rows);
+ chunk.setChunkInfo(std::move(info));
+
+ return chunk;
+}
+
+namespace
+{
+ const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
+ {
+ const auto & info = chunk.getChunkInfo();
+ if (!info)
+ throw Exception("Chunk info was not set for chunk.", ErrorCodes::LOGICAL_ERROR);
+
+ const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
+ if (!agg_info)
+ throw Exception("Chunk should have AggregatedChunkInfo.", ErrorCodes::LOGICAL_ERROR);
+
+ return agg_info;
+ }
+
+ /// Reads chunks from file in native format. Provide chunks with aggregation info.
+ class SourceFromNativeStream : public ISource
+ {
+ public:
+ SourceFromNativeStream(const Block & header, const std::string & path)
+ : ISource(header), file_in(path), compressed_in(file_in),
block_in(std::make_shared<NativeBlockInputStream>(compressed_in, DBMS_TCP_PROTOCOL_VERSION))
- {
- block_in->readPrefix();
- }
-
- String getName() const override { return "SourceFromNativeStream"; }
-
- Chunk generate() override
- {
- if (!block_in)
- return {};
-
- auto block = block_in->read();
- if (!block)
- {
- block_in->readSuffix();
- block_in.reset();
- return {};
- }
-
- return convertToChunk(block);
- }
-
- private:
- ReadBufferFromFile file_in;
- CompressedReadBuffer compressed_in;
- BlockInputStreamPtr block_in;
- };
-}
-
-/// Worker which merges buckets for two-level aggregation.
-/// Atomically increments bucket counter and returns merged result.
-class ConvertingAggregatedToChunksSource : public ISource
-{
-public:
- static constexpr UInt32 NUM_BUCKETS = 256;
-
- struct SharedData
- {
- std::atomic<UInt32> next_bucket_to_merge = 0;
+ {
+ block_in->readPrefix();
+ }
+
+ String getName() const override { return "SourceFromNativeStream"; }
+
+ Chunk generate() override
+ {
+ if (!block_in)
+ return {};
+
+ auto block = block_in->read();
+ if (!block)
+ {
+ block_in->readSuffix();
+ block_in.reset();
+ return {};
+ }
+
+ return convertToChunk(block);
+ }
+
+ private:
+ ReadBufferFromFile file_in;
+ CompressedReadBuffer compressed_in;
+ BlockInputStreamPtr block_in;
+ };
+}
+
+/// Worker which merges buckets for two-level aggregation.
+/// Atomically increments bucket counter and returns merged result.
+class ConvertingAggregatedToChunksSource : public ISource
+{
+public:
+ static constexpr UInt32 NUM_BUCKETS = 256;
+
+ struct SharedData
+ {
+ std::atomic<UInt32> next_bucket_to_merge = 0;
std::array<std::atomic<bool>, NUM_BUCKETS> is_bucket_processed{};
- std::atomic<bool> is_cancelled = false;
-
- SharedData()
- {
- for (auto & flag : is_bucket_processed)
- flag = false;
- }
- };
-
- using SharedDataPtr = std::shared_ptr<SharedData>;
-
- ConvertingAggregatedToChunksSource(
- AggregatingTransformParamsPtr params_,
- ManyAggregatedDataVariantsPtr data_,
- SharedDataPtr shared_data_,
- Arena * arena_)
- : ISource(params_->getHeader())
- , params(std::move(params_))
- , data(std::move(data_))
- , shared_data(std::move(shared_data_))
- , arena(arena_)
- {}
-
- String getName() const override { return "ConvertingAggregatedToChunksSource"; }
-
-protected:
- Chunk generate() override
- {
- UInt32 bucket_num = shared_data->next_bucket_to_merge.fetch_add(1);
-
- if (bucket_num >= NUM_BUCKETS)
- return {};
-
- Block block = params->aggregator.mergeAndConvertOneBucketToBlock(*data, arena, params->final, bucket_num, &shared_data->is_cancelled);
- Chunk chunk = convertToChunk(block);
-
- shared_data->is_bucket_processed[bucket_num] = true;
-
- return chunk;
- }
-
-private:
- AggregatingTransformParamsPtr params;
- ManyAggregatedDataVariantsPtr data;
- SharedDataPtr shared_data;
- Arena * arena;
-};
-
-/// Generates chunks with aggregated data.
-/// In single level case, aggregates data itself.
-/// In two-level case, creates `ConvertingAggregatedToChunksSource` workers:
-///
-/// ConvertingAggregatedToChunksSource ->
-/// ConvertingAggregatedToChunksSource -> ConvertingAggregatedToChunksTransform -> AggregatingTransform
-/// ConvertingAggregatedToChunksSource ->
-///
-/// Result chunks guaranteed to be sorted by bucket number.
-class ConvertingAggregatedToChunksTransform : public IProcessor
-{
-public:
- ConvertingAggregatedToChunksTransform(AggregatingTransformParamsPtr params_, ManyAggregatedDataVariantsPtr data_, size_t num_threads_)
- : IProcessor({}, {params_->getHeader()})
- , params(std::move(params_)), data(std::move(data_)), num_threads(num_threads_) {}
-
- String getName() const override { return "ConvertingAggregatedToChunksTransform"; }
-
- void work() override
- {
- if (data->empty())
- {
- finished = true;
- return;
- }
-
- if (!is_initialized)
- {
- initialize();
- return;
- }
-
- if (data->at(0)->isTwoLevel())
- {
- /// In two-level case will only create sources.
- if (inputs.empty())
- createSources();
- }
- else
- {
- mergeSingleLevel();
- }
- }
-
- Processors expandPipeline() override
- {
- for (auto & source : processors)
- {
- auto & out = source->getOutputs().front();
- inputs.emplace_back(out.getHeader(), this);
- connect(out, inputs.back());
- inputs.back().setNeeded();
- }
-
- return std::move(processors);
- }
-
- IProcessor::Status prepare() override
- {
- auto & output = outputs.front();
-
- if (finished && !has_input)
- {
- output.finish();
- return Status::Finished;
- }
-
- /// Check can output.
- if (output.isFinished())
- {
- for (auto & input : inputs)
- input.close();
-
- if (shared_data)
- shared_data->is_cancelled.store(true);
-
- return Status::Finished;
- }
-
- if (!output.canPush())
- return Status::PortFull;
-
- if (!is_initialized)
- return Status::Ready;
-
- if (!processors.empty())
- return Status::ExpandPipeline;
-
- if (has_input)
- return preparePushToOutput();
-
- /// Single level case.
- if (inputs.empty())
- return Status::Ready;
-
- /// Two-level case.
- return prepareTwoLevel();
- }
-
-private:
- IProcessor::Status preparePushToOutput()
- {
- auto & output = outputs.front();
- output.push(std::move(current_chunk));
- has_input = false;
-
- if (finished)
- {
- output.finish();
- return Status::Finished;
- }
-
- return Status::PortFull;
- }
-
- /// Read all sources and try to push current bucket.
- IProcessor::Status prepareTwoLevel()
- {
- auto & output = outputs.front();
-
- for (auto & input : inputs)
- {
- if (!input.isFinished() && input.hasData())
- {
- auto chunk = input.pull();
- auto bucket = getInfoFromChunk(chunk)->bucket_num;
- chunks[bucket] = std::move(chunk);
- }
- }
-
- if (!shared_data->is_bucket_processed[current_bucket_num])
- return Status::NeedData;
-
- if (!chunks[current_bucket_num])
- return Status::NeedData;
-
- output.push(std::move(chunks[current_bucket_num]));
-
- ++current_bucket_num;
- if (current_bucket_num == NUM_BUCKETS)
- {
- output.finish();
- /// Do not close inputs, they must be finished.
- return Status::Finished;
- }
-
- return Status::PortFull;
- }
-
- AggregatingTransformParamsPtr params;
- ManyAggregatedDataVariantsPtr data;
- ConvertingAggregatedToChunksSource::SharedDataPtr shared_data;
-
- size_t num_threads;
-
- bool is_initialized = false;
- bool has_input = false;
- bool finished = false;
-
- Chunk current_chunk;
-
- UInt32 current_bucket_num = 0;
- static constexpr Int32 NUM_BUCKETS = 256;
- std::array<Chunk, NUM_BUCKETS> chunks;
-
- Processors processors;
-
- void setCurrentChunk(Chunk chunk)
- {
- if (has_input)
- throw Exception("Current chunk was already set in "
- "ConvertingAggregatedToChunksTransform.", ErrorCodes::LOGICAL_ERROR);
-
- has_input = true;
- current_chunk = std::move(chunk);
- }
-
- void initialize()
- {
- is_initialized = true;
-
- AggregatedDataVariantsPtr & first = data->at(0);
-
- /// At least we need one arena in first data item per thread
- if (num_threads > first->aggregates_pools.size())
- {
- Arenas & first_pool = first->aggregates_pools;
- for (size_t j = first_pool.size(); j < num_threads; j++)
- first_pool.emplace_back(std::make_shared<Arena>());
- }
-
- if (first->type == AggregatedDataVariants::Type::without_key || params->params.overflow_row)
- {
- params->aggregator.mergeWithoutKeyDataImpl(*data);
- auto block = params->aggregator.prepareBlockAndFillWithoutKey(
+ std::atomic<bool> is_cancelled = false;
+
+ SharedData()
+ {
+ for (auto & flag : is_bucket_processed)
+ flag = false;
+ }
+ };
+
+ using SharedDataPtr = std::shared_ptr<SharedData>;
+
+ ConvertingAggregatedToChunksSource(
+ AggregatingTransformParamsPtr params_,
+ ManyAggregatedDataVariantsPtr data_,
+ SharedDataPtr shared_data_,
+ Arena * arena_)
+ : ISource(params_->getHeader())
+ , params(std::move(params_))
+ , data(std::move(data_))
+ , shared_data(std::move(shared_data_))
+ , arena(arena_)
+ {}
+
+ String getName() const override { return "ConvertingAggregatedToChunksSource"; }
+
+protected:
+ Chunk generate() override
+ {
+ UInt32 bucket_num = shared_data->next_bucket_to_merge.fetch_add(1);
+
+ if (bucket_num >= NUM_BUCKETS)
+ return {};
+
+ Block block = params->aggregator.mergeAndConvertOneBucketToBlock(*data, arena, params->final, bucket_num, &shared_data->is_cancelled);
+ Chunk chunk = convertToChunk(block);
+
+ shared_data->is_bucket_processed[bucket_num] = true;
+
+ return chunk;
+ }
+
+private:
+ AggregatingTransformParamsPtr params;
+ ManyAggregatedDataVariantsPtr data;
+ SharedDataPtr shared_data;
+ Arena * arena;
+};
+
+/// Generates chunks with aggregated data.
+/// In single level case, aggregates data itself.
+/// In two-level case, creates `ConvertingAggregatedToChunksSource` workers:
+///
+/// ConvertingAggregatedToChunksSource ->
+/// ConvertingAggregatedToChunksSource -> ConvertingAggregatedToChunksTransform -> AggregatingTransform
+/// ConvertingAggregatedToChunksSource ->
+///
+/// Result chunks guaranteed to be sorted by bucket number.
+class ConvertingAggregatedToChunksTransform : public IProcessor
+{
+public:
+ ConvertingAggregatedToChunksTransform(AggregatingTransformParamsPtr params_, ManyAggregatedDataVariantsPtr data_, size_t num_threads_)
+ : IProcessor({}, {params_->getHeader()})
+ , params(std::move(params_)), data(std::move(data_)), num_threads(num_threads_) {}
+
+ String getName() const override { return "ConvertingAggregatedToChunksTransform"; }
+
+ void work() override
+ {
+ if (data->empty())
+ {
+ finished = true;
+ return;
+ }
+
+ if (!is_initialized)
+ {
+ initialize();
+ return;
+ }
+
+ if (data->at(0)->isTwoLevel())
+ {
+ /// In two-level case will only create sources.
+ if (inputs.empty())
+ createSources();
+ }
+ else
+ {
+ mergeSingleLevel();
+ }
+ }
+
+ Processors expandPipeline() override
+ {
+ for (auto & source : processors)
+ {
+ auto & out = source->getOutputs().front();
+ inputs.emplace_back(out.getHeader(), this);
+ connect(out, inputs.back());
+ inputs.back().setNeeded();
+ }
+
+ return std::move(processors);
+ }
+
+ IProcessor::Status prepare() override
+ {
+ auto & output = outputs.front();
+
+ if (finished && !has_input)
+ {
+ output.finish();
+ return Status::Finished;
+ }
+
+ /// Check can output.
+ if (output.isFinished())
+ {
+ for (auto & input : inputs)
+ input.close();
+
+ if (shared_data)
+ shared_data->is_cancelled.store(true);
+
+ return Status::Finished;
+ }
+
+ if (!output.canPush())
+ return Status::PortFull;
+
+ if (!is_initialized)
+ return Status::Ready;
+
+ if (!processors.empty())
+ return Status::ExpandPipeline;
+
+ if (has_input)
+ return preparePushToOutput();
+
+ /// Single level case.
+ if (inputs.empty())
+ return Status::Ready;
+
+ /// Two-level case.
+ return prepareTwoLevel();
+ }
+
+private:
+ IProcessor::Status preparePushToOutput()
+ {
+ auto & output = outputs.front();
+ output.push(std::move(current_chunk));
+ has_input = false;
+
+ if (finished)
+ {
+ output.finish();
+ return Status::Finished;
+ }
+
+ return Status::PortFull;
+ }
+
+ /// Read all sources and try to push current bucket.
+ IProcessor::Status prepareTwoLevel()
+ {
+ auto & output = outputs.front();
+
+ for (auto & input : inputs)
+ {
+ if (!input.isFinished() && input.hasData())
+ {
+ auto chunk = input.pull();
+ auto bucket = getInfoFromChunk(chunk)->bucket_num;
+ chunks[bucket] = std::move(chunk);
+ }
+ }
+
+ if (!shared_data->is_bucket_processed[current_bucket_num])
+ return Status::NeedData;
+
+ if (!chunks[current_bucket_num])
+ return Status::NeedData;
+
+ output.push(std::move(chunks[current_bucket_num]));
+
+ ++current_bucket_num;
+ if (current_bucket_num == NUM_BUCKETS)
+ {
+ output.finish();
+ /// Do not close inputs, they must be finished.
+ return Status::Finished;
+ }
+
+ return Status::PortFull;
+ }
+
+ AggregatingTransformParamsPtr params;
+ ManyAggregatedDataVariantsPtr data;
+ ConvertingAggregatedToChunksSource::SharedDataPtr shared_data;
+
+ size_t num_threads;
+
+ bool is_initialized = false;
+ bool has_input = false;
+ bool finished = false;
+
+ Chunk current_chunk;
+
+ UInt32 current_bucket_num = 0;
+ static constexpr Int32 NUM_BUCKETS = 256;
+ std::array<Chunk, NUM_BUCKETS> chunks;
+
+ Processors processors;
+
+ void setCurrentChunk(Chunk chunk)
+ {
+ if (has_input)
+ throw Exception("Current chunk was already set in "
+ "ConvertingAggregatedToChunksTransform.", ErrorCodes::LOGICAL_ERROR);
+
+ has_input = true;
+ current_chunk = std::move(chunk);
+ }
+
+ void initialize()
+ {
+ is_initialized = true;
+
+ AggregatedDataVariantsPtr & first = data->at(0);
+
+ /// At least we need one arena in first data item per thread
+ if (num_threads > first->aggregates_pools.size())
+ {
+ Arenas & first_pool = first->aggregates_pools;
+ for (size_t j = first_pool.size(); j < num_threads; j++)
+ first_pool.emplace_back(std::make_shared<Arena>());
+ }
+
+ if (first->type == AggregatedDataVariants::Type::without_key || params->params.overflow_row)
+ {
+ params->aggregator.mergeWithoutKeyDataImpl(*data);
+ auto block = params->aggregator.prepareBlockAndFillWithoutKey(
*first, params->final, first->type != AggregatedDataVariants::Type::without_key);
-
- setCurrentChunk(convertToChunk(block));
- }
- }
-
- void mergeSingleLevel()
- {
- AggregatedDataVariantsPtr & first = data->at(0);
-
- if (current_bucket_num > 0 || first->type == AggregatedDataVariants::Type::without_key)
- {
- finished = true;
- return;
- }
-
- ++current_bucket_num;
-
- #define M(NAME) \
- else if (first->type == AggregatedDataVariants::Type::NAME) \
- params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data);
- if (false) {} // NOLINT
- APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
- #undef M
- else
- throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
-
- auto block = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final);
-
- setCurrentChunk(convertToChunk(block));
- finished = true;
- }
-
- void createSources()
- {
- AggregatedDataVariantsPtr & first = data->at(0);
- shared_data = std::make_shared<ConvertingAggregatedToChunksSource::SharedData>();
-
- for (size_t thread = 0; thread < num_threads; ++thread)
- {
+
+ setCurrentChunk(convertToChunk(block));
+ }
+ }
+
+ void mergeSingleLevel()
+ {
+ AggregatedDataVariantsPtr & first = data->at(0);
+
+ if (current_bucket_num > 0 || first->type == AggregatedDataVariants::Type::without_key)
+ {
+ finished = true;
+ return;
+ }
+
+ ++current_bucket_num;
+
+ #define M(NAME) \
+ else if (first->type == AggregatedDataVariants::Type::NAME) \
+ params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data);
+ if (false) {} // NOLINT
+ APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
+ #undef M
+ else
+ throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
+
+ auto block = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final);
+
+ setCurrentChunk(convertToChunk(block));
+ finished = true;
+ }
+
+ void createSources()
+ {
+ AggregatedDataVariantsPtr & first = data->at(0);
+ shared_data = std::make_shared<ConvertingAggregatedToChunksSource::SharedData>();
+
+ for (size_t thread = 0; thread < num_threads; ++thread)
+ {
/// Select Arena to avoid race conditions
- Arena * arena = first->aggregates_pools.at(thread).get();
+ Arena * arena = first->aggregates_pools.at(thread).get();
auto source = std::make_shared<ConvertingAggregatedToChunksSource>(params, data, shared_data, arena);
-
- processors.emplace_back(std::move(source));
- }
- }
-};
-
-AggregatingTransform::AggregatingTransform(Block header, AggregatingTransformParamsPtr params_)
- : AggregatingTransform(std::move(header), std::move(params_)
- , std::make_unique<ManyAggregatedData>(1), 0, 1, 1)
-{
-}
-
-AggregatingTransform::AggregatingTransform(
+
+ processors.emplace_back(std::move(source));
+ }
+ }
+};
+
+AggregatingTransform::AggregatingTransform(Block header, AggregatingTransformParamsPtr params_)
+ : AggregatingTransform(std::move(header), std::move(params_)
+ , std::make_unique<ManyAggregatedData>(1), 0, 1, 1)
+{
+}
+
+AggregatingTransform::AggregatingTransform(
Block header,
AggregatingTransformParamsPtr params_,
ManyAggregatedDataPtr many_data_,
@@ -403,129 +403,129 @@ AggregatingTransform::AggregatingTransform(
size_t temporary_data_merge_threads_)
: IProcessor({std::move(header)}, {params_->getHeader()})
, params(std::move(params_))
- , key_columns(params->params.keys_size)
- , aggregate_columns(params->params.aggregates_size)
- , many_data(std::move(many_data_))
- , variants(*many_data->variants[current_variant])
- , max_threads(std::min(many_data->variants.size(), max_threads_))
- , temporary_data_merge_threads(temporary_data_merge_threads_)
-{
-}
-
-AggregatingTransform::~AggregatingTransform() = default;
-
-IProcessor::Status AggregatingTransform::prepare()
-{
- /// There are one or two input ports.
- /// The first one is used at aggregation step, the second one - while reading merged data from ConvertingAggregated
-
- auto & output = outputs.front();
- /// Last output is current. All other outputs should already be closed.
- auto & input = inputs.back();
-
- /// Check can output.
- if (output.isFinished())
- {
- input.close();
- return Status::Finished;
- }
-
- if (!output.canPush())
- {
- input.setNotNeeded();
- return Status::PortFull;
- }
-
- /// Finish data processing, prepare to generating.
- if (is_consume_finished && !is_generate_initialized)
- {
- /// Close input port in case max_rows_to_group_by was reached but not all data was read.
- inputs.front().close();
-
- return Status::Ready;
- }
-
- if (is_generate_initialized && !is_pipeline_created && !processors.empty())
- return Status::ExpandPipeline;
-
- /// Only possible while consuming.
- if (read_current_chunk)
- return Status::Ready;
-
- /// Get chunk from input.
- if (input.isFinished())
- {
- if (is_consume_finished)
- {
- output.finish();
- return Status::Finished;
- }
- else
- {
- /// Finish data processing and create another pipe.
- is_consume_finished = true;
- return Status::Ready;
- }
- }
-
- if (!input.hasData())
- {
- input.setNeeded();
- return Status::NeedData;
- }
-
- if (is_consume_finished)
- input.setNeeded();
-
- current_chunk = input.pull(/*set_not_needed = */ !is_consume_finished);
- read_current_chunk = true;
-
- if (is_consume_finished)
- {
- output.push(std::move(current_chunk));
- read_current_chunk = false;
- return Status::PortFull;
- }
-
- return Status::Ready;
-}
-
-void AggregatingTransform::work()
-{
- if (is_consume_finished)
- initGenerate();
- else
- {
- consume(std::move(current_chunk));
- read_current_chunk = false;
- }
-}
-
-Processors AggregatingTransform::expandPipeline()
-{
- auto & out = processors.back()->getOutputs().front();
- inputs.emplace_back(out.getHeader(), this);
- connect(out, inputs.back());
- is_pipeline_created = true;
- return std::move(processors);
-}
-
-void AggregatingTransform::consume(Chunk chunk)
-{
+ , key_columns(params->params.keys_size)
+ , aggregate_columns(params->params.aggregates_size)
+ , many_data(std::move(many_data_))
+ , variants(*many_data->variants[current_variant])
+ , max_threads(std::min(many_data->variants.size(), max_threads_))
+ , temporary_data_merge_threads(temporary_data_merge_threads_)
+{
+}
+
+AggregatingTransform::~AggregatingTransform() = default;
+
+IProcessor::Status AggregatingTransform::prepare()
+{
+ /// There are one or two input ports.
+ /// The first one is used at aggregation step, the second one - while reading merged data from ConvertingAggregated
+
+ auto & output = outputs.front();
+ /// Last output is current. All other outputs should already be closed.
+ auto & input = inputs.back();
+
+ /// Check can output.
+ if (output.isFinished())
+ {
+ input.close();
+ return Status::Finished;
+ }
+
+ if (!output.canPush())
+ {
+ input.setNotNeeded();
+ return Status::PortFull;
+ }
+
+ /// Finish data processing, prepare to generating.
+ if (is_consume_finished && !is_generate_initialized)
+ {
+ /// Close input port in case max_rows_to_group_by was reached but not all data was read.
+ inputs.front().close();
+
+ return Status::Ready;
+ }
+
+ if (is_generate_initialized && !is_pipeline_created && !processors.empty())
+ return Status::ExpandPipeline;
+
+ /// Only possible while consuming.
+ if (read_current_chunk)
+ return Status::Ready;
+
+ /// Get chunk from input.
+ if (input.isFinished())
+ {
+ if (is_consume_finished)
+ {
+ output.finish();
+ return Status::Finished;
+ }
+ else
+ {
+ /// Finish data processing and create another pipe.
+ is_consume_finished = true;
+ return Status::Ready;
+ }
+ }
+
+ if (!input.hasData())
+ {
+ input.setNeeded();
+ return Status::NeedData;
+ }
+
+ if (is_consume_finished)
+ input.setNeeded();
+
+ current_chunk = input.pull(/*set_not_needed = */ !is_consume_finished);
+ read_current_chunk = true;
+
+ if (is_consume_finished)
+ {
+ output.push(std::move(current_chunk));
+ read_current_chunk = false;
+ return Status::PortFull;
+ }
+
+ return Status::Ready;
+}
+
+void AggregatingTransform::work()
+{
+ if (is_consume_finished)
+ initGenerate();
+ else
+ {
+ consume(std::move(current_chunk));
+ read_current_chunk = false;
+ }
+}
+
+Processors AggregatingTransform::expandPipeline()
+{
+ auto & out = processors.back()->getOutputs().front();
+ inputs.emplace_back(out.getHeader(), this);
+ connect(out, inputs.back());
+ is_pipeline_created = true;
+ return std::move(processors);
+}
+
+void AggregatingTransform::consume(Chunk chunk)
+{
const UInt64 num_rows = chunk.getNumRows();
-
- if (num_rows == 0 && params->params.empty_result_for_aggregation_by_empty_set)
- return;
-
- if (!is_consume_started)
- {
- LOG_TRACE(log, "Aggregating");
- is_consume_started = true;
- }
-
+
+ if (num_rows == 0 && params->params.empty_result_for_aggregation_by_empty_set)
+ return;
+
+ if (!is_consume_started)
+ {
+ LOG_TRACE(log, "Aggregating");
+ is_consume_started = true;
+ }
+
src_rows += num_rows;
- src_bytes += chunk.bytes();
-
+ src_bytes += chunk.bytes();
+
if (params->only_merge)
{
auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns());
@@ -538,76 +538,76 @@ void AggregatingTransform::consume(Chunk chunk)
if (!params->aggregator.executeOnBlock(chunk.detachColumns(), num_rows, variants, key_columns, aggregate_columns, no_more_keys))
is_consume_finished = true;
}
-}
-
-void AggregatingTransform::initGenerate()
-{
- if (is_generate_initialized)
- return;
-
- is_generate_initialized = true;
-
- /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation.
- /// To do this, we pass a block with zero rows to aggregate.
- if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set)
+}
+
+void AggregatingTransform::initGenerate()
+{
+ if (is_generate_initialized)
+ return;
+
+ is_generate_initialized = true;
+
+ /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation.
+ /// To do this, we pass a block with zero rows to aggregate.
+ if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set)
{
if (params->only_merge)
params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys);
else
params->aggregator.executeOnBlock(getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys);
}
-
- double elapsed_seconds = watch.elapsedSeconds();
- size_t rows = variants.sizeWithoutOverflowRow();
-
+
+ double elapsed_seconds = watch.elapsedSeconds();
+ size_t rows = variants.sizeWithoutOverflowRow();
+
LOG_DEBUG(log, "Aggregated. {} to {} rows (from {}) in {} sec. ({:.3f} rows/sec., {}/sec.)",
- src_rows, rows, ReadableSize(src_bytes),
+ src_rows, rows, ReadableSize(src_bytes),
elapsed_seconds, src_rows / elapsed_seconds,
- ReadableSize(src_bytes / elapsed_seconds));
-
- if (params->aggregator.hasTemporaryFiles())
- {
- if (variants.isConvertibleToTwoLevel())
- variants.convertToTwoLevel();
-
- /// Flush data in the RAM to disk also. It's easier than merging on-disk and RAM data.
- if (!variants.empty())
- params->aggregator.writeToTemporaryFile(variants);
- }
-
- if (many_data->num_finished.fetch_add(1) + 1 < many_data->variants.size())
- return;
-
- if (!params->aggregator.hasTemporaryFiles())
- {
- auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants);
- auto prepared_data_ptr = std::make_shared<ManyAggregatedDataVariants>(std::move(prepared_data));
- processors.emplace_back(std::make_shared<ConvertingAggregatedToChunksTransform>(params, std::move(prepared_data_ptr), max_threads));
- }
- else
- {
- /// If there are temporary files with partially-aggregated data on the disk,
- /// then read and merge them, spending the minimum amount of memory.
-
- ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge);
-
- if (many_data->variants.size() > 1)
- {
- /// It may happen that some data has not yet been flushed,
- /// because at the time thread has finished, no data has been flushed to disk, and then some were.
- for (auto & cur_variants : many_data->variants)
- {
- if (cur_variants->isConvertibleToTwoLevel())
- cur_variants->convertToTwoLevel();
-
- if (!cur_variants->empty())
- params->aggregator.writeToTemporaryFile(*cur_variants);
- }
- }
-
- const auto & files = params->aggregator.getTemporaryFiles();
+ ReadableSize(src_bytes / elapsed_seconds));
+
+ if (params->aggregator.hasTemporaryFiles())
+ {
+ if (variants.isConvertibleToTwoLevel())
+ variants.convertToTwoLevel();
+
+ /// Flush data in the RAM to disk also. It's easier than merging on-disk and RAM data.
+ if (!variants.empty())
+ params->aggregator.writeToTemporaryFile(variants);
+ }
+
+ if (many_data->num_finished.fetch_add(1) + 1 < many_data->variants.size())
+ return;
+
+ if (!params->aggregator.hasTemporaryFiles())
+ {
+ auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants);
+ auto prepared_data_ptr = std::make_shared<ManyAggregatedDataVariants>(std::move(prepared_data));
+ processors.emplace_back(std::make_shared<ConvertingAggregatedToChunksTransform>(params, std::move(prepared_data_ptr), max_threads));
+ }
+ else
+ {
+ /// If there are temporary files with partially-aggregated data on the disk,
+ /// then read and merge them, spending the minimum amount of memory.
+
+ ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge);
+
+ if (many_data->variants.size() > 1)
+ {
+ /// It may happen that some data has not yet been flushed,
+ /// because at the time thread has finished, no data has been flushed to disk, and then some were.
+ for (auto & cur_variants : many_data->variants)
+ {
+ if (cur_variants->isConvertibleToTwoLevel())
+ cur_variants->convertToTwoLevel();
+
+ if (!cur_variants->empty())
+ params->aggregator.writeToTemporaryFile(*cur_variants);
+ }
+ }
+
+ const auto & files = params->aggregator.getTemporaryFiles();
Pipe pipe;
-
+
{
auto header = params->aggregator.getHeader(false);
Pipes pipes;
@@ -624,11 +624,11 @@ void AggregatingTransform::initGenerate()
files.files.size(),
ReadableSize(files.sum_size_compressed),
ReadableSize(files.sum_size_uncompressed));
-
+
addMergingAggregatedMemoryEfficientTransform(pipe, params, temporary_data_merge_threads);
-
+
processors = Pipe::detachProcessors(std::move(pipe));
- }
-}
-
-}
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h
index 1639bc4df4..a673b4fdfd 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h
@@ -1,13 +1,13 @@
-#pragma once
-#include <Processors/IAccumulatingTransform.h>
-#include <Interpreters/Aggregator.h>
-#include <IO/ReadBufferFromFile.h>
-#include <Compression/CompressedReadBuffer.h>
-#include <Common/Stopwatch.h>
-
-namespace DB
-{
-
+#pragma once
+#include <Processors/IAccumulatingTransform.h>
+#include <Interpreters/Aggregator.h>
+#include <IO/ReadBufferFromFile.h>
+#include <Compression/CompressedReadBuffer.h>
+#include <Common/Stopwatch.h>
+
+namespace DB
+{
+
class AggregatedArenasChunkInfo : public ChunkInfo
{
public:
@@ -17,22 +17,22 @@ public:
{}
};
-class AggregatedChunkInfo : public ChunkInfo
-{
-public:
- bool is_overflows = false;
- Int32 bucket_num = -1;
-};
-
-class IBlockInputStream;
-using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
-
+class AggregatedChunkInfo : public ChunkInfo
+{
+public:
+ bool is_overflows = false;
+ Int32 bucket_num = -1;
+};
+
+class IBlockInputStream;
+using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
+
using AggregatorList = std::list<Aggregator>;
using AggregatorListPtr = std::shared_ptr<AggregatorList>;
-struct AggregatingTransformParams
-{
- Aggregator::Params params;
+struct AggregatingTransformParams
+{
+ Aggregator::Params params;
/// Each params holds a list of aggregators which are used in query. It's needed because we need
/// to use a pointer of aggregator to proper destroy complex aggregation states on exception
@@ -41,17 +41,17 @@ struct AggregatingTransformParams
/// projections, and one of them might gets destroyed before used.
AggregatorListPtr aggregator_list_ptr;
Aggregator & aggregator;
- bool final;
+ bool final;
bool only_merge = false;
-
- AggregatingTransformParams(const Aggregator::Params & params_, bool final_)
+
+ AggregatingTransformParams(const Aggregator::Params & params_, bool final_)
: params(params_)
, aggregator_list_ptr(std::make_shared<AggregatorList>())
, aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), params))
, final(final_)
{
}
-
+
AggregatingTransformParams(const Aggregator::Params & params_, const AggregatorListPtr & aggregator_list_ptr_, bool final_)
: params(params_)
, aggregator_list_ptr(aggregator_list_ptr_)
@@ -60,51 +60,51 @@ struct AggregatingTransformParams
{
}
- Block getHeader() const { return aggregator.getHeader(final); }
-
- Block getCustomHeader(bool final_) const { return aggregator.getHeader(final_); }
-};
-
-struct ManyAggregatedData
-{
- ManyAggregatedDataVariants variants;
- std::vector<std::unique_ptr<std::mutex>> mutexes;
- std::atomic<UInt32> num_finished = 0;
-
- explicit ManyAggregatedData(size_t num_threads = 0) : variants(num_threads), mutexes(num_threads)
- {
- for (auto & elem : variants)
- elem = std::make_shared<AggregatedDataVariants>();
-
- for (auto & mut : mutexes)
- mut = std::make_unique<std::mutex>();
- }
-};
-
-using AggregatingTransformParamsPtr = std::shared_ptr<AggregatingTransformParams>;
-using ManyAggregatedDataPtr = std::shared_ptr<ManyAggregatedData>;
-
-/** Aggregates the stream of blocks using the specified key columns and aggregate functions.
- * Columns with aggregate functions adds to the end of the block.
- * If final = false, the aggregate functions are not finalized, that is, they are not replaced by their value, but contain an intermediate state of calculations.
- * This is necessary so that aggregation can continue (for example, by combining streams of partially aggregated data).
- *
- * For every separate stream of data separate AggregatingTransform is created.
- * Every AggregatingTransform reads data from the first port till is is not run out, or max_rows_to_group_by reached.
- * When the last AggregatingTransform finish reading, the result of aggregation is needed to be merged together.
- * This task is performed by ConvertingAggregatedToChunksTransform.
- * Last AggregatingTransform expands pipeline and adds second input port, which reads from ConvertingAggregated.
- *
- * Aggregation data is passed by ManyAggregatedData structure, which is shared between all aggregating transforms.
- * At aggregation step, every transform uses it's own AggregatedDataVariants structure.
- * At merging step, all structures pass to ConvertingAggregatedToChunksTransform.
- */
-class AggregatingTransform : public IProcessor
-{
-public:
- AggregatingTransform(Block header, AggregatingTransformParamsPtr params_);
-
- /// For Parallel aggregating.
+ Block getHeader() const { return aggregator.getHeader(final); }
+
+ Block getCustomHeader(bool final_) const { return aggregator.getHeader(final_); }
+};
+
+struct ManyAggregatedData
+{
+ ManyAggregatedDataVariants variants;
+ std::vector<std::unique_ptr<std::mutex>> mutexes;
+ std::atomic<UInt32> num_finished = 0;
+
+ explicit ManyAggregatedData(size_t num_threads = 0) : variants(num_threads), mutexes(num_threads)
+ {
+ for (auto & elem : variants)
+ elem = std::make_shared<AggregatedDataVariants>();
+
+ for (auto & mut : mutexes)
+ mut = std::make_unique<std::mutex>();
+ }
+};
+
+using AggregatingTransformParamsPtr = std::shared_ptr<AggregatingTransformParams>;
+using ManyAggregatedDataPtr = std::shared_ptr<ManyAggregatedData>;
+
+/** Aggregates the stream of blocks using the specified key columns and aggregate functions.
+ * Columns with aggregate functions adds to the end of the block.
+ * If final = false, the aggregate functions are not finalized, that is, they are not replaced by their value, but contain an intermediate state of calculations.
+ * This is necessary so that aggregation can continue (for example, by combining streams of partially aggregated data).
+ *
+ * For every separate stream of data separate AggregatingTransform is created.
+ * Every AggregatingTransform reads data from the first port till is is not run out, or max_rows_to_group_by reached.
+ * When the last AggregatingTransform finish reading, the result of aggregation is needed to be merged together.
+ * This task is performed by ConvertingAggregatedToChunksTransform.
+ * Last AggregatingTransform expands pipeline and adds second input port, which reads from ConvertingAggregated.
+ *
+ * Aggregation data is passed by ManyAggregatedData structure, which is shared between all aggregating transforms.
+ * At aggregation step, every transform uses it's own AggregatedDataVariants structure.
+ * At merging step, all structures pass to ConvertingAggregatedToChunksTransform.
+ */
+class AggregatingTransform : public IProcessor
+{
+public:
+ AggregatingTransform(Block header, AggregatingTransformParamsPtr params_);
+
+ /// For Parallel aggregating.
AggregatingTransform(
Block header,
AggregatingTransformParamsPtr params_,
@@ -112,56 +112,56 @@ public:
size_t current_variant,
size_t max_threads,
size_t temporary_data_merge_threads);
- ~AggregatingTransform() override;
-
- String getName() const override { return "AggregatingTransform"; }
- Status prepare() override;
- void work() override;
- Processors expandPipeline() override;
-
-protected:
- void consume(Chunk chunk);
-
-private:
- /// To read the data that was flushed into the temporary data file.
- Processors processors;
-
- AggregatingTransformParamsPtr params;
- Poco::Logger * log = &Poco::Logger::get("AggregatingTransform");
-
- ColumnRawPtrs key_columns;
- Aggregator::AggregateColumns aggregate_columns;
+ ~AggregatingTransform() override;
+
+ String getName() const override { return "AggregatingTransform"; }
+ Status prepare() override;
+ void work() override;
+ Processors expandPipeline() override;
+
+protected:
+ void consume(Chunk chunk);
+
+private:
+ /// To read the data that was flushed into the temporary data file.
+ Processors processors;
+
+ AggregatingTransformParamsPtr params;
+ Poco::Logger * log = &Poco::Logger::get("AggregatingTransform");
+
+ ColumnRawPtrs key_columns;
+ Aggregator::AggregateColumns aggregate_columns;
/** Used if there is a limit on the maximum number of rows in the aggregation,
* and if group_by_overflow_mode == ANY.
* In this case, new keys are not added to the set, but aggregation is performed only by
* keys that have already managed to get into the set.
*/
- bool no_more_keys = false;
-
- ManyAggregatedDataPtr many_data;
- AggregatedDataVariants & variants;
- size_t max_threads = 1;
- size_t temporary_data_merge_threads = 1;
-
- /// TODO: calculate time only for aggregation.
- Stopwatch watch;
-
- UInt64 src_rows = 0;
- UInt64 src_bytes = 0;
-
- bool is_generate_initialized = false;
- bool is_consume_finished = false;
- bool is_pipeline_created = false;
-
- Chunk current_chunk;
- bool read_current_chunk = false;
-
- bool is_consume_started = false;
-
- void initGenerate();
-};
-
-Chunk convertToChunk(const Block & block);
-
-}
+ bool no_more_keys = false;
+
+ ManyAggregatedDataPtr many_data;
+ AggregatedDataVariants & variants;
+ size_t max_threads = 1;
+ size_t temporary_data_merge_threads = 1;
+
+ /// TODO: calculate time only for aggregation.
+ Stopwatch watch;
+
+ UInt64 src_rows = 0;
+ UInt64 src_bytes = 0;
+
+ bool is_generate_initialized = false;
+ bool is_consume_finished = false;
+ bool is_pipeline_created = false;
+
+ Chunk current_chunk;
+ bool read_current_chunk = false;
+
+ bool is_consume_started = false;
+
+ void initGenerate();
+};
+
+Chunk convertToChunk(const Block & block);
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp
index 526bbc6337..8245b22465 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp
@@ -1,123 +1,123 @@
-#include <Processors/Transforms/ExtremesTransform.h>
-
-#include <Core/Field.h>
-
-namespace DB
-{
-
-ExtremesTransform::ExtremesTransform(const Block & header)
- : ISimpleTransform(header, header, true)
-{
- /// Port for Extremes.
- outputs.emplace_back(outputs.front().getHeader(), this);
-}
-
-IProcessor::Status ExtremesTransform::prepare()
-{
- if (!finished_transform)
- {
- auto status = ISimpleTransform::prepare();
-
- if (status != Status::Finished)
- return status;
-
- finished_transform = true;
- }
-
- auto & totals_output = getExtremesPort();
-
- /// Check can output.
- if (totals_output.isFinished())
- return Status::Finished;
-
- if (!totals_output.canPush())
- return Status::PortFull;
-
- if (!extremes && !extremes_columns.empty())
- return Status::Ready;
-
- if (extremes)
- totals_output.push(std::move(extremes));
-
- totals_output.finish();
- return Status::Finished;
-}
-
-void ExtremesTransform::work()
-{
- if (finished_transform)
- {
- if (!extremes && !extremes_columns.empty())
- extremes.setColumns(std::move(extremes_columns), 2);
- }
- else
- ISimpleTransform::work();
-}
-
-void ExtremesTransform::transform(DB::Chunk & chunk)
-{
-
- if (chunk.getNumRows() == 0)
- return;
-
- size_t num_columns = chunk.getNumColumns();
- const auto & columns = chunk.getColumns();
-
- if (extremes_columns.empty())
- {
- extremes_columns.resize(num_columns);
-
- for (size_t i = 0; i < num_columns; ++i)
- {
- const ColumnPtr & src = columns[i];
-
- if (isColumnConst(*src))
- {
- /// Equal min and max.
- extremes_columns[i] = src->cloneResized(2);
- }
- else
- {
- Field min_value;
- Field max_value;
-
- src->getExtremes(min_value, max_value);
-
- extremes_columns[i] = src->cloneEmpty();
-
- extremes_columns[i]->insert(min_value);
- extremes_columns[i]->insert(max_value);
- }
- }
- }
- else
- {
- for (size_t i = 0; i < num_columns; ++i)
- {
- if (isColumnConst(*extremes_columns[i]))
- continue;
-
- Field min_value = (*extremes_columns[i])[0];
- Field max_value = (*extremes_columns[i])[1];
-
- Field cur_min_value;
- Field cur_max_value;
-
- columns[i]->getExtremes(cur_min_value, cur_max_value);
-
- if (cur_min_value < min_value)
- min_value = cur_min_value;
- if (cur_max_value > max_value)
- max_value = cur_max_value;
-
- MutableColumnPtr new_extremes = extremes_columns[i]->cloneEmpty();
-
- new_extremes->insert(min_value);
- new_extremes->insert(max_value);
-
- extremes_columns[i] = std::move(new_extremes);
- }
- }
-}
-
-}
+#include <Processors/Transforms/ExtremesTransform.h>
+
+#include <Core/Field.h>
+
+namespace DB
+{
+
+ExtremesTransform::ExtremesTransform(const Block & header)
+ : ISimpleTransform(header, header, true)
+{
+ /// Port for Extremes.
+ outputs.emplace_back(outputs.front().getHeader(), this);
+}
+
+IProcessor::Status ExtremesTransform::prepare()
+{
+ if (!finished_transform)
+ {
+ auto status = ISimpleTransform::prepare();
+
+ if (status != Status::Finished)
+ return status;
+
+ finished_transform = true;
+ }
+
+ auto & totals_output = getExtremesPort();
+
+ /// Check can output.
+ if (totals_output.isFinished())
+ return Status::Finished;
+
+ if (!totals_output.canPush())
+ return Status::PortFull;
+
+ if (!extremes && !extremes_columns.empty())
+ return Status::Ready;
+
+ if (extremes)
+ totals_output.push(std::move(extremes));
+
+ totals_output.finish();
+ return Status::Finished;
+}
+
+void ExtremesTransform::work()
+{
+ if (finished_transform)
+ {
+ if (!extremes && !extremes_columns.empty())
+ extremes.setColumns(std::move(extremes_columns), 2);
+ }
+ else
+ ISimpleTransform::work();
+}
+
+void ExtremesTransform::transform(DB::Chunk & chunk)
+{
+
+ if (chunk.getNumRows() == 0)
+ return;
+
+ size_t num_columns = chunk.getNumColumns();
+ const auto & columns = chunk.getColumns();
+
+ if (extremes_columns.empty())
+ {
+ extremes_columns.resize(num_columns);
+
+ for (size_t i = 0; i < num_columns; ++i)
+ {
+ const ColumnPtr & src = columns[i];
+
+ if (isColumnConst(*src))
+ {
+ /// Equal min and max.
+ extremes_columns[i] = src->cloneResized(2);
+ }
+ else
+ {
+ Field min_value;
+ Field max_value;
+
+ src->getExtremes(min_value, max_value);
+
+ extremes_columns[i] = src->cloneEmpty();
+
+ extremes_columns[i]->insert(min_value);
+ extremes_columns[i]->insert(max_value);
+ }
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < num_columns; ++i)
+ {
+ if (isColumnConst(*extremes_columns[i]))
+ continue;
+
+ Field min_value = (*extremes_columns[i])[0];
+ Field max_value = (*extremes_columns[i])[1];
+
+ Field cur_min_value;
+ Field cur_max_value;
+
+ columns[i]->getExtremes(cur_min_value, cur_max_value);
+
+ if (cur_min_value < min_value)
+ min_value = cur_min_value;
+ if (cur_max_value > max_value)
+ max_value = cur_max_value;
+
+ MutableColumnPtr new_extremes = extremes_columns[i]->cloneEmpty();
+
+ new_extremes->insert(min_value);
+ new_extremes->insert(max_value);
+
+ extremes_columns[i] = std::move(new_extremes);
+ }
+ }
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h
index 8c3f6348d4..1c951b3540 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h
@@ -1,31 +1,31 @@
#pragma once
-#include <Processors/ISimpleTransform.h>
-
-namespace DB
-{
-
-class ExtremesTransform : public ISimpleTransform
-{
-
-public:
- explicit ExtremesTransform(const Block & header);
-
- String getName() const override { return "ExtremesTransform"; }
-
- OutputPort & getExtremesPort() { return outputs.back(); }
-
- Status prepare() override;
- void work() override;
-
-protected:
- void transform(Chunk & chunk) override;
-
- bool finished_transform = false;
- Chunk extremes;
-
-private:
- MutableColumns extremes_columns;
-};
-
-}
-
+#include <Processors/ISimpleTransform.h>
+
+namespace DB
+{
+
+class ExtremesTransform : public ISimpleTransform
+{
+
+public:
+ explicit ExtremesTransform(const Block & header);
+
+ String getName() const override { return "ExtremesTransform"; }
+
+ OutputPort & getExtremesPort() { return outputs.back(); }
+
+ Status prepare() override;
+ void work() override;
+
+protected:
+ void transform(Chunk & chunk) override;
+
+ bool finished_transform = false;
+ Chunk extremes;
+
+private:
+ MutableColumns extremes_columns;
+};
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
index df2ea4b03f..1fab9ca34c 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
@@ -1,525 +1,525 @@
-#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
-
+#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
+
#include <Interpreters/Aggregator.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/ResizeProcessor.h>
+#include <Processors/ISimpleTransform.h>
+#include <Processors/ResizeProcessor.h>
#include <Processors/Pipe.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-struct ChunksToMerge : public ChunkInfo
-{
- std::unique_ptr<Chunks> chunks;
- Int32 bucket_num = -1;
- bool is_overflows = false;
-};
-
-GroupingAggregatedTransform::GroupingAggregatedTransform(
- const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_)
- : IProcessor(InputPorts(num_inputs_, header_), { Block() })
- , num_inputs(num_inputs_)
- , params(std::move(params_))
- , last_bucket_number(num_inputs, -1)
- , read_from_input(num_inputs, false)
-{
-}
-
-void GroupingAggregatedTransform::readFromAllInputs()
-{
- auto in = inputs.begin();
- read_from_all_inputs = true;
-
- for (size_t i = 0; i < num_inputs; ++i, ++in)
- {
- if (in->isFinished())
- continue;
-
- if (read_from_input[i])
- continue;
-
- in->setNeeded();
-
- if (!in->hasData())
- {
- read_from_all_inputs = false;
- continue;
- }
-
- auto chunk = in->pull();
- read_from_input[i] = true;
- addChunk(std::move(chunk), i);
- }
-}
-
-void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_overflows)
-{
- auto & output = outputs.front();
-
- auto info = std::make_shared<ChunksToMerge>();
- info->bucket_num = bucket;
- info->is_overflows = is_overflows;
- info->chunks = std::make_unique<Chunks>(std::move(chunks));
-
- Chunk chunk;
- chunk.setChunkInfo(std::move(info));
- output.push(std::move(chunk));
-}
-
-bool GroupingAggregatedTransform::tryPushTwoLevelData()
-{
- auto try_push_by_iter = [&](auto batch_it)
- {
- if (batch_it == chunks_map.end())
- return false;
-
- Chunks & cur_chunks = batch_it->second;
- if (cur_chunks.empty())
- {
- chunks_map.erase(batch_it);
- return false;
- }
-
- pushData(std::move(cur_chunks), batch_it->first, false);
- chunks_map.erase(batch_it);
- return true;
- };
-
- if (all_inputs_finished)
- {
- /// Chunks are sorted by bucket.
- while (!chunks_map.empty())
- if (try_push_by_iter(chunks_map.begin()))
- return true;
- }
- else
- {
- for (; next_bucket_to_push < current_bucket; ++next_bucket_to_push)
- if (try_push_by_iter(chunks_map.find(next_bucket_to_push)))
- return true;
- }
-
- return false;
-}
-
-bool GroupingAggregatedTransform::tryPushSingleLevelData()
-{
- if (single_level_chunks.empty())
- return false;
-
- pushData(std::move(single_level_chunks), -1, false);
- return true;
-}
-
-bool GroupingAggregatedTransform::tryPushOverflowData()
-{
- if (overflow_chunks.empty())
- return false;
-
- pushData(std::move(overflow_chunks), -1, true);
- return true;
-}
-
-IProcessor::Status GroupingAggregatedTransform::prepare()
-{
- /// Check can output.
- auto & output = outputs.front();
-
- if (output.isFinished())
- {
- for (auto & input : inputs)
- input.close();
-
- chunks_map.clear();
- last_bucket_number.clear();
- return Status::Finished;
- }
-
- /// Read first time from each input to understand if we have two-level aggregation.
- if (!read_from_all_inputs)
- {
- readFromAllInputs();
- if (!read_from_all_inputs)
- return Status::NeedData;
- }
-
- /// Convert single level to two levels if have two-level input.
- if (has_two_level && !single_level_chunks.empty())
- return Status::Ready;
-
- /// Check can push (to avoid data caching).
- if (!output.canPush())
- {
- for (auto & input : inputs)
- input.setNotNeeded();
-
- return Status::PortFull;
- }
-
- bool pushed_to_output = false;
-
- /// Output if has data.
- if (has_two_level)
- pushed_to_output = tryPushTwoLevelData();
-
- auto need_input = [this](size_t input_num)
- {
- if (last_bucket_number[input_num] < current_bucket)
- return true;
-
- return expect_several_chunks_for_single_bucket_per_source && last_bucket_number[input_num] == current_bucket;
- };
-
- /// Read next bucket if can.
- for (; ; ++current_bucket)
- {
- bool finished = true;
- bool need_data = false;
-
- auto in = inputs.begin();
- for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in)
- {
- if (in->isFinished())
- continue;
-
- finished = false;
-
- if (!need_input(input_num))
- continue;
-
- in->setNeeded();
-
- if (!in->hasData())
- {
- need_data = true;
- continue;
- }
-
- auto chunk = in->pull();
- addChunk(std::move(chunk), input_num);
-
- if (has_two_level && !single_level_chunks.empty())
- return Status::Ready;
-
- if (!in->isFinished() && need_input(input_num))
- need_data = true;
- }
-
- if (finished)
- {
- all_inputs_finished = true;
- break;
- }
-
- if (need_data)
- return Status::NeedData;
- }
-
- if (pushed_to_output)
- return Status::PortFull;
-
- if (has_two_level)
- {
- if (tryPushTwoLevelData())
- return Status::PortFull;
-
- /// Sanity check. If new bucket was read, we should be able to push it.
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+struct ChunksToMerge : public ChunkInfo
+{
+ std::unique_ptr<Chunks> chunks;
+ Int32 bucket_num = -1;
+ bool is_overflows = false;
+};
+
+GroupingAggregatedTransform::GroupingAggregatedTransform(
+ const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_)
+ : IProcessor(InputPorts(num_inputs_, header_), { Block() })
+ , num_inputs(num_inputs_)
+ , params(std::move(params_))
+ , last_bucket_number(num_inputs, -1)
+ , read_from_input(num_inputs, false)
+{
+}
+
+void GroupingAggregatedTransform::readFromAllInputs()
+{
+ auto in = inputs.begin();
+ read_from_all_inputs = true;
+
+ for (size_t i = 0; i < num_inputs; ++i, ++in)
+ {
+ if (in->isFinished())
+ continue;
+
+ if (read_from_input[i])
+ continue;
+
+ in->setNeeded();
+
+ if (!in->hasData())
+ {
+ read_from_all_inputs = false;
+ continue;
+ }
+
+ auto chunk = in->pull();
+ read_from_input[i] = true;
+ addChunk(std::move(chunk), i);
+ }
+}
+
+void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_overflows)
+{
+ auto & output = outputs.front();
+
+ auto info = std::make_shared<ChunksToMerge>();
+ info->bucket_num = bucket;
+ info->is_overflows = is_overflows;
+ info->chunks = std::make_unique<Chunks>(std::move(chunks));
+
+ Chunk chunk;
+ chunk.setChunkInfo(std::move(info));
+ output.push(std::move(chunk));
+}
+
+bool GroupingAggregatedTransform::tryPushTwoLevelData()
+{
+ auto try_push_by_iter = [&](auto batch_it)
+ {
+ if (batch_it == chunks_map.end())
+ return false;
+
+ Chunks & cur_chunks = batch_it->second;
+ if (cur_chunks.empty())
+ {
+ chunks_map.erase(batch_it);
+ return false;
+ }
+
+ pushData(std::move(cur_chunks), batch_it->first, false);
+ chunks_map.erase(batch_it);
+ return true;
+ };
+
+ if (all_inputs_finished)
+ {
+ /// Chunks are sorted by bucket.
+ while (!chunks_map.empty())
+ if (try_push_by_iter(chunks_map.begin()))
+ return true;
+ }
+ else
+ {
+ for (; next_bucket_to_push < current_bucket; ++next_bucket_to_push)
+ if (try_push_by_iter(chunks_map.find(next_bucket_to_push)))
+ return true;
+ }
+
+ return false;
+}
+
+bool GroupingAggregatedTransform::tryPushSingleLevelData()
+{
+ if (single_level_chunks.empty())
+ return false;
+
+ pushData(std::move(single_level_chunks), -1, false);
+ return true;
+}
+
+bool GroupingAggregatedTransform::tryPushOverflowData()
+{
+ if (overflow_chunks.empty())
+ return false;
+
+ pushData(std::move(overflow_chunks), -1, true);
+ return true;
+}
+
+IProcessor::Status GroupingAggregatedTransform::prepare()
+{
+ /// Check can output.
+ auto & output = outputs.front();
+
+ if (output.isFinished())
+ {
+ for (auto & input : inputs)
+ input.close();
+
+ chunks_map.clear();
+ last_bucket_number.clear();
+ return Status::Finished;
+ }
+
+ /// Read first time from each input to understand if we have two-level aggregation.
+ if (!read_from_all_inputs)
+ {
+ readFromAllInputs();
+ if (!read_from_all_inputs)
+ return Status::NeedData;
+ }
+
+ /// Convert single level to two levels if have two-level input.
+ if (has_two_level && !single_level_chunks.empty())
+ return Status::Ready;
+
+ /// Check can push (to avoid data caching).
+ if (!output.canPush())
+ {
+ for (auto & input : inputs)
+ input.setNotNeeded();
+
+ return Status::PortFull;
+ }
+
+ bool pushed_to_output = false;
+
+ /// Output if has data.
+ if (has_two_level)
+ pushed_to_output = tryPushTwoLevelData();
+
+ auto need_input = [this](size_t input_num)
+ {
+ if (last_bucket_number[input_num] < current_bucket)
+ return true;
+
+ return expect_several_chunks_for_single_bucket_per_source && last_bucket_number[input_num] == current_bucket;
+ };
+
+ /// Read next bucket if can.
+ for (; ; ++current_bucket)
+ {
+ bool finished = true;
+ bool need_data = false;
+
+ auto in = inputs.begin();
+ for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in)
+ {
+ if (in->isFinished())
+ continue;
+
+ finished = false;
+
+ if (!need_input(input_num))
+ continue;
+
+ in->setNeeded();
+
+ if (!in->hasData())
+ {
+ need_data = true;
+ continue;
+ }
+
+ auto chunk = in->pull();
+ addChunk(std::move(chunk), input_num);
+
+ if (has_two_level && !single_level_chunks.empty())
+ return Status::Ready;
+
+ if (!in->isFinished() && need_input(input_num))
+ need_data = true;
+ }
+
+ if (finished)
+ {
+ all_inputs_finished = true;
+ break;
+ }
+
+ if (need_data)
+ return Status::NeedData;
+ }
+
+ if (pushed_to_output)
+ return Status::PortFull;
+
+ if (has_two_level)
+ {
+ if (tryPushTwoLevelData())
+ return Status::PortFull;
+
+ /// Sanity check. If new bucket was read, we should be able to push it.
/// This is always false, but we still keep this condition in case the code will be changed.
if (!all_inputs_finished) // -V547
- throw Exception("GroupingAggregatedTransform has read new two-level bucket, but couldn't push it.",
- ErrorCodes::LOGICAL_ERROR);
- }
- else
- {
+ throw Exception("GroupingAggregatedTransform has read new two-level bucket, but couldn't push it.",
+ ErrorCodes::LOGICAL_ERROR);
+ }
+ else
+ {
if (!all_inputs_finished) // -V547
- throw Exception("GroupingAggregatedTransform should have read all chunks for single level aggregation, "
- "but not all of the inputs are finished.", ErrorCodes::LOGICAL_ERROR);
-
- if (tryPushSingleLevelData())
- return Status::PortFull;
- }
-
- /// If we haven't pushed to output, then all data was read. Push overflows if have.
- if (tryPushOverflowData())
- return Status::PortFull;
-
- output.finish();
- return Status::Finished;
-}
-
-void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input)
-{
- const auto & info = chunk.getChunkInfo();
- if (!info)
- throw Exception("Chunk info was not set for chunk in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
-
- const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
- if (!agg_info)
- throw Exception("Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
-
- Int32 bucket = agg_info->bucket_num;
- bool is_overflows = agg_info->is_overflows;
-
- if (is_overflows)
- overflow_chunks.emplace_back(std::move(chunk));
- else if (bucket < 0)
- single_level_chunks.emplace_back(std::move(chunk));
- else
- {
- chunks_map[bucket].emplace_back(std::move(chunk));
- has_two_level = true;
- last_bucket_number[input] = bucket;
- }
-}
-
-void GroupingAggregatedTransform::work()
-{
- /// Convert single level data to two level.
- if (!single_level_chunks.empty())
- {
- const auto & header = getInputs().front().getHeader(); /// Take header from input port. Output header is empty.
- auto block = header.cloneWithColumns(single_level_chunks.back().detachColumns());
- single_level_chunks.pop_back();
- auto blocks = params->aggregator.convertBlockToTwoLevel(block);
-
- for (auto & cur_block : blocks)
- {
- if (!cur_block)
- continue;
-
- Int32 bucket = cur_block.info.bucket_num;
- auto chunk_info = std::make_shared<AggregatedChunkInfo>();
- chunk_info->bucket_num = bucket;
- chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info)));
- }
- }
-}
-
-
-MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params_)
- : ISimpleTransform({}, params_->getHeader(), false), params(std::move(params_))
-{
- setInputNotNeededAfterRead(true);
-}
-
-void MergingAggregatedBucketTransform::transform(Chunk & chunk)
-{
- const auto & info = chunk.getChunkInfo();
- const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
-
- if (!chunks_to_merge)
- throw Exception("MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.",
- ErrorCodes::LOGICAL_ERROR);
-
- auto header = params->aggregator.getHeader(false);
-
- BlocksList blocks_list;
- for (auto & cur_chunk : *chunks_to_merge->chunks)
- {
- const auto & cur_info = cur_chunk.getChunkInfo();
- if (!cur_info)
- throw Exception("Chunk info was not set for chunk in MergingAggregatedBucketTransform.",
- ErrorCodes::LOGICAL_ERROR);
-
- const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(cur_info.get());
- if (!agg_info)
- throw Exception("Chunk should have AggregatedChunkInfo in MergingAggregatedBucketTransform.",
- ErrorCodes::LOGICAL_ERROR);
-
- Block block = header.cloneWithColumns(cur_chunk.detachColumns());
- block.info.is_overflows = agg_info->is_overflows;
- block.info.bucket_num = agg_info->bucket_num;
-
- blocks_list.emplace_back(std::move(block));
- }
-
- auto res_info = std::make_shared<AggregatedChunkInfo>();
- res_info->is_overflows = chunks_to_merge->is_overflows;
- res_info->bucket_num = chunks_to_merge->bucket_num;
- chunk.setChunkInfo(std::move(res_info));
-
- auto block = params->aggregator.mergeBlocks(blocks_list, params->final);
- size_t num_rows = block.rows();
- chunk.setColumns(block.getColumns(), num_rows);
-}
-
-
-SortingAggregatedTransform::SortingAggregatedTransform(size_t num_inputs_, AggregatingTransformParamsPtr params_)
- : IProcessor(InputPorts(num_inputs_, params_->getHeader()), {params_->getHeader()})
- , num_inputs(num_inputs_)
- , params(std::move(params_))
- , last_bucket_number(num_inputs, -1)
- , is_input_finished(num_inputs, false)
-{
-}
-
-bool SortingAggregatedTransform::tryPushChunk()
-{
- auto & output = outputs.front();
-
- if (chunks.empty())
- return false;
-
- /// Chunk with min current bucket.
- auto it = chunks.begin();
- auto cur_bucket = it->first;
-
- /// Check that can push it
- for (size_t input = 0; input < num_inputs; ++input)
- if (!is_input_finished[input] && last_bucket_number[input] < cur_bucket)
- return false;
-
- output.push(std::move(it->second));
- chunks.erase(it);
- return true;
-}
-
-void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input)
-{
- const auto & info = chunk.getChunkInfo();
- if (!info)
- throw Exception("Chunk info was not set for chunk in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
-
- const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
- if (!agg_info)
- throw Exception("Chunk should have AggregatedChunkInfo in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
-
- Int32 bucket = agg_info->bucket_num;
- bool is_overflows = agg_info->is_overflows;
-
- if (is_overflows)
- overflow_chunk = std::move(chunk);
- else
- {
- if (chunks[bucket])
- throw Exception("SortingAggregatedTransform already got bucket with number " + toString(bucket),
- ErrorCodes::LOGICAL_ERROR);
-
- chunks[bucket] = std::move(chunk);
- last_bucket_number[from_input] = bucket;
- }
-}
-
-IProcessor::Status SortingAggregatedTransform::prepare()
-{
- /// Check can output.
- auto & output = outputs.front();
-
- if (output.isFinished())
- {
- for (auto & input : inputs)
- input.close();
-
- chunks.clear();
- last_bucket_number.clear();
- return Status::Finished;
- }
-
- /// Check can push (to avoid data caching).
- if (!output.canPush())
- {
- for (auto & input : inputs)
- input.setNotNeeded();
-
- return Status::PortFull;
- }
-
- /// Push if have min version.
- bool pushed_to_output = tryPushChunk();
-
- bool need_data = false;
- bool all_finished = true;
-
- /// Try read anything.
- auto in = inputs.begin();
- for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in)
- {
- if (in->isFinished())
- {
- is_input_finished[input_num] = true;
- continue;
- }
-
- //all_finished = false;
-
- in->setNeeded();
-
- if (!in->hasData())
- {
- need_data = true;
- all_finished = false;
- continue;
- }
-
- auto chunk = in->pull();
- addChunk(std::move(chunk), input_num);
-
- if (in->isFinished())
- {
- is_input_finished[input_num] = true;
- }
- else
- {
- /// If chunk was pulled, then we need data from this port.
- need_data = true;
- all_finished = false;
- }
- }
-
- if (pushed_to_output)
- return Status::PortFull;
-
- if (tryPushChunk())
- return Status::PortFull;
-
- if (need_data)
- return Status::NeedData;
-
- if (!all_finished)
- throw Exception("SortingAggregatedTransform has read bucket, but couldn't push it.",
- ErrorCodes::LOGICAL_ERROR);
-
- if (overflow_chunk)
- {
- output.push(std::move(overflow_chunk));
- return Status::PortFull;
- }
-
- output.finish();
- return Status::Finished;
-}
-
-
+ throw Exception("GroupingAggregatedTransform should have read all chunks for single level aggregation, "
+ "but not all of the inputs are finished.", ErrorCodes::LOGICAL_ERROR);
+
+ if (tryPushSingleLevelData())
+ return Status::PortFull;
+ }
+
+ /// If we haven't pushed to output, then all data was read. Push overflows if have.
+ if (tryPushOverflowData())
+ return Status::PortFull;
+
+ output.finish();
+ return Status::Finished;
+}
+
+void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input)
+{
+ const auto & info = chunk.getChunkInfo();
+ if (!info)
+ throw Exception("Chunk info was not set for chunk in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
+
+ const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
+ if (!agg_info)
+ throw Exception("Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
+
+ Int32 bucket = agg_info->bucket_num;
+ bool is_overflows = agg_info->is_overflows;
+
+ if (is_overflows)
+ overflow_chunks.emplace_back(std::move(chunk));
+ else if (bucket < 0)
+ single_level_chunks.emplace_back(std::move(chunk));
+ else
+ {
+ chunks_map[bucket].emplace_back(std::move(chunk));
+ has_two_level = true;
+ last_bucket_number[input] = bucket;
+ }
+}
+
+void GroupingAggregatedTransform::work()
+{
+ /// Convert single level data to two level.
+ if (!single_level_chunks.empty())
+ {
+ const auto & header = getInputs().front().getHeader(); /// Take header from input port. Output header is empty.
+ auto block = header.cloneWithColumns(single_level_chunks.back().detachColumns());
+ single_level_chunks.pop_back();
+ auto blocks = params->aggregator.convertBlockToTwoLevel(block);
+
+ for (auto & cur_block : blocks)
+ {
+ if (!cur_block)
+ continue;
+
+ Int32 bucket = cur_block.info.bucket_num;
+ auto chunk_info = std::make_shared<AggregatedChunkInfo>();
+ chunk_info->bucket_num = bucket;
+ chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info)));
+ }
+ }
+}
+
+
+MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params_)
+ : ISimpleTransform({}, params_->getHeader(), false), params(std::move(params_))
+{
+ setInputNotNeededAfterRead(true);
+}
+
+void MergingAggregatedBucketTransform::transform(Chunk & chunk)
+{
+ const auto & info = chunk.getChunkInfo();
+ const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
+
+ if (!chunks_to_merge)
+ throw Exception("MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.",
+ ErrorCodes::LOGICAL_ERROR);
+
+ auto header = params->aggregator.getHeader(false);
+
+ BlocksList blocks_list;
+ for (auto & cur_chunk : *chunks_to_merge->chunks)
+ {
+ const auto & cur_info = cur_chunk.getChunkInfo();
+ if (!cur_info)
+ throw Exception("Chunk info was not set for chunk in MergingAggregatedBucketTransform.",
+ ErrorCodes::LOGICAL_ERROR);
+
+ const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(cur_info.get());
+ if (!agg_info)
+ throw Exception("Chunk should have AggregatedChunkInfo in MergingAggregatedBucketTransform.",
+ ErrorCodes::LOGICAL_ERROR);
+
+ Block block = header.cloneWithColumns(cur_chunk.detachColumns());
+ block.info.is_overflows = agg_info->is_overflows;
+ block.info.bucket_num = agg_info->bucket_num;
+
+ blocks_list.emplace_back(std::move(block));
+ }
+
+ auto res_info = std::make_shared<AggregatedChunkInfo>();
+ res_info->is_overflows = chunks_to_merge->is_overflows;
+ res_info->bucket_num = chunks_to_merge->bucket_num;
+ chunk.setChunkInfo(std::move(res_info));
+
+ auto block = params->aggregator.mergeBlocks(blocks_list, params->final);
+ size_t num_rows = block.rows();
+ chunk.setColumns(block.getColumns(), num_rows);
+}
+
+
+SortingAggregatedTransform::SortingAggregatedTransform(size_t num_inputs_, AggregatingTransformParamsPtr params_)
+ : IProcessor(InputPorts(num_inputs_, params_->getHeader()), {params_->getHeader()})
+ , num_inputs(num_inputs_)
+ , params(std::move(params_))
+ , last_bucket_number(num_inputs, -1)
+ , is_input_finished(num_inputs, false)
+{
+}
+
+bool SortingAggregatedTransform::tryPushChunk()
+{
+ auto & output = outputs.front();
+
+ if (chunks.empty())
+ return false;
+
+ /// Chunk with min current bucket.
+ auto it = chunks.begin();
+ auto cur_bucket = it->first;
+
+ /// Check that can push it
+ for (size_t input = 0; input < num_inputs; ++input)
+ if (!is_input_finished[input] && last_bucket_number[input] < cur_bucket)
+ return false;
+
+ output.push(std::move(it->second));
+ chunks.erase(it);
+ return true;
+}
+
+void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input)
+{
+ const auto & info = chunk.getChunkInfo();
+ if (!info)
+ throw Exception("Chunk info was not set for chunk in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
+
+ const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
+ if (!agg_info)
+ throw Exception("Chunk should have AggregatedChunkInfo in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
+
+ Int32 bucket = agg_info->bucket_num;
+ bool is_overflows = agg_info->is_overflows;
+
+ if (is_overflows)
+ overflow_chunk = std::move(chunk);
+ else
+ {
+ if (chunks[bucket])
+ throw Exception("SortingAggregatedTransform already got bucket with number " + toString(bucket),
+ ErrorCodes::LOGICAL_ERROR);
+
+ chunks[bucket] = std::move(chunk);
+ last_bucket_number[from_input] = bucket;
+ }
+}
+
+IProcessor::Status SortingAggregatedTransform::prepare()
+{
+ /// Check can output.
+ auto & output = outputs.front();
+
+ if (output.isFinished())
+ {
+ for (auto & input : inputs)
+ input.close();
+
+ chunks.clear();
+ last_bucket_number.clear();
+ return Status::Finished;
+ }
+
+ /// Check can push (to avoid data caching).
+ if (!output.canPush())
+ {
+ for (auto & input : inputs)
+ input.setNotNeeded();
+
+ return Status::PortFull;
+ }
+
+ /// Push if have min version.
+ bool pushed_to_output = tryPushChunk();
+
+ bool need_data = false;
+ bool all_finished = true;
+
+ /// Try read anything.
+ auto in = inputs.begin();
+ for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in)
+ {
+ if (in->isFinished())
+ {
+ is_input_finished[input_num] = true;
+ continue;
+ }
+
+ //all_finished = false;
+
+ in->setNeeded();
+
+ if (!in->hasData())
+ {
+ need_data = true;
+ all_finished = false;
+ continue;
+ }
+
+ auto chunk = in->pull();
+ addChunk(std::move(chunk), input_num);
+
+ if (in->isFinished())
+ {
+ is_input_finished[input_num] = true;
+ }
+ else
+ {
+ /// If chunk was pulled, then we need data from this port.
+ need_data = true;
+ all_finished = false;
+ }
+ }
+
+ if (pushed_to_output)
+ return Status::PortFull;
+
+ if (tryPushChunk())
+ return Status::PortFull;
+
+ if (need_data)
+ return Status::NeedData;
+
+ if (!all_finished)
+ throw Exception("SortingAggregatedTransform has read bucket, but couldn't push it.",
+ ErrorCodes::LOGICAL_ERROR);
+
+ if (overflow_chunk)
+ {
+ output.push(std::move(overflow_chunk));
+ return Status::PortFull;
+ }
+
+ output.finish();
+ return Status::Finished;
+}
+
+
void addMergingAggregatedMemoryEfficientTransform(
Pipe & pipe,
AggregatingTransformParamsPtr params,
size_t num_merging_processors)
-{
+{
pipe.addTransform(std::make_shared<GroupingAggregatedTransform>(pipe.getHeader(), pipe.numOutputPorts(), params));
-
- if (num_merging_processors <= 1)
- {
- /// --> GroupingAggregated --> MergingAggregatedBucket -->
+
+ if (num_merging_processors <= 1)
+ {
+ /// --> GroupingAggregated --> MergingAggregatedBucket -->
pipe.addTransform(std::make_shared<MergingAggregatedBucketTransform>(params));
return;
- }
-
- /// --> --> MergingAggregatedBucket -->
- /// --> GroupingAggregated --> ResizeProcessor --> MergingAggregatedBucket --> SortingAggregated -->
- /// --> --> MergingAggregatedBucket -->
-
+ }
+
+ /// --> --> MergingAggregatedBucket -->
+ /// --> GroupingAggregated --> ResizeProcessor --> MergingAggregatedBucket --> SortingAggregated -->
+ /// --> --> MergingAggregatedBucket -->
+
pipe.resize(num_merging_processors);
-
+
pipe.addSimpleTransform([params](const Block &)
- {
+ {
return std::make_shared<MergingAggregatedBucketTransform>(params);
});
-
+
pipe.addTransform(std::make_shared<SortingAggregatedTransform>(num_merging_processors, params));
-}
-
-}
+}
+
+}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
index 4367f6fec3..cf2fc92347 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
@@ -1,149 +1,149 @@
#pragma once
-#include <Processors/IProcessor.h>
-#include <Interpreters/Aggregator.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/Transforms/AggregatingTransform.h>
-#include <Processors/ResizeProcessor.h>
-
-
-namespace DB
-{
-
-/** Pre-aggregates data from ports, holding in RAM only one or more (up to merging_threads) blocks from each source.
- * This saves RAM in case of using two-level aggregation, where in each source there will be up to 256 blocks with parts of the result.
- *
- * Aggregate functions in blocks should not be finalized so that their states can be combined.
- *
- * Used to solve two tasks:
- *
- * 1. External aggregation with data flush to disk.
- * Partially aggregated data (previously divided into 256 buckets) is flushed to some number of files on the disk.
- * We need to read them and merge them by buckets - keeping only a few buckets from each file in RAM simultaneously.
- *
- * 2. Merge aggregation results for distributed query processing.
+#include <Processors/IProcessor.h>
+#include <Interpreters/Aggregator.h>
+#include <Processors/ISimpleTransform.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <Processors/ResizeProcessor.h>
+
+
+namespace DB
+{
+
+/** Pre-aggregates data from ports, holding in RAM only one or more (up to merging_threads) blocks from each source.
+ * This saves RAM in case of using two-level aggregation, where in each source there will be up to 256 blocks with parts of the result.
+ *
+ * Aggregate functions in blocks should not be finalized so that their states can be combined.
+ *
+ * Used to solve two tasks:
+ *
+ * 1. External aggregation with data flush to disk.
+ * Partially aggregated data (previously divided into 256 buckets) is flushed to some number of files on the disk.
+ * We need to read them and merge them by buckets - keeping only a few buckets from each file in RAM simultaneously.
+ *
+ * 2. Merge aggregation results for distributed query processing.
* Partially aggregated data arrives from different servers, which can be split down or not, into 256 buckets,
- * and these buckets are passed to us by the network from each server in sequence, one by one.
- * You should also read and merge by the buckets.
- *
- * The essence of the work:
- *
- * There are a number of sources. They give out blocks with partially aggregated data.
- * Each source can return one of the following block sequences:
- * 1. "unsplitted" block with bucket_num = -1;
+ * and these buckets are passed to us by the network from each server in sequence, one by one.
+ * You should also read and merge by the buckets.
+ *
+ * The essence of the work:
+ *
+ * There are a number of sources. They give out blocks with partially aggregated data.
+ * Each source can return one of the following block sequences:
+ * 1. "unsplitted" block with bucket_num = -1;
* 2. "split" (two_level) blocks with bucket_num from 0 to 255;
- * In both cases, there may also be a block of "overflows" with bucket_num = -1 and is_overflows = true;
- *
+ * In both cases, there may also be a block of "overflows" with bucket_num = -1 and is_overflows = true;
+ *
* We start from the convention that split blocks are always passed in the order of bucket_num.
- * That is, if a < b, then the bucket_num = a block goes before bucket_num = b.
- * This is needed for a memory-efficient merge
- * - so that you do not need to read the blocks up front, but go all the way up by bucket_num.
- *
- * In this case, not all bucket_num from the range of 0..255 can be present.
- * The overflow block can be presented in any order relative to other blocks (but it can be only one).
- *
- * It is necessary to combine these sequences of blocks and return the result as a sequence with the same properties.
+ * That is, if a < b, then the bucket_num = a block goes before bucket_num = b.
+ * This is needed for a memory-efficient merge
+ * - so that you do not need to read the blocks up front, but go all the way up by bucket_num.
+ *
+ * In this case, not all bucket_num from the range of 0..255 can be present.
+ * The overflow block can be presented in any order relative to other blocks (but it can be only one).
+ *
+ * It is necessary to combine these sequences of blocks and return the result as a sequence with the same properties.
* That is, at the output, if there are "split" blocks in the sequence, then they should go in the order of bucket_num.
- *
- * The merge can be performed using several (merging_threads) threads.
- * For this, receiving of a set of blocks for the next bucket_num should be done sequentially,
- * and then, when we have several received sets, they can be merged in parallel.
- *
- * When you receive next blocks from different sources,
- * data from sources can also be read in several threads (reading_threads)
- * for optimal performance in the presence of a fast network or disks (from where these blocks are read).
- */
-
-/// Has several inputs and single output.
-/// Read from inputs chunks with partially aggregated data, group them by bucket number
-/// and write data from single bucket as single chunk.
-class GroupingAggregatedTransform : public IProcessor
-{
-public:
- GroupingAggregatedTransform(const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_);
- String getName() const override { return "GroupingAggregatedTransform"; }
-
- /// Special setting: in case if single source can return several chunks with same bucket.
- void allowSeveralChunksForSingleBucketPerSource() { expect_several_chunks_for_single_bucket_per_source = true; }
-
-protected:
- Status prepare() override;
- void work() override;
-
-private:
- size_t num_inputs;
- AggregatingTransformParamsPtr params;
-
- std::vector<Int32> last_bucket_number; /// Last bucket read from each input.
- std::map<Int32, Chunks> chunks_map; /// bucket -> chunks
- Chunks overflow_chunks;
- Chunks single_level_chunks;
- Int32 current_bucket = 0; /// Currently processing bucket.
- Int32 next_bucket_to_push = 0; /// Always <= current_bucket.
- bool has_two_level = false;
-
- bool all_inputs_finished = false;
- bool read_from_all_inputs = false;
- std::vector<bool> read_from_input;
-
- bool expect_several_chunks_for_single_bucket_per_source = false;
-
- /// Add chunk read from input to chunks_map, overflow_chunks or single_level_chunks according to it's chunk info.
- void addChunk(Chunk chunk, size_t input);
- /// Read from all inputs first chunk. It is needed to detect if any source has two-level aggregation.
- void readFromAllInputs();
- /// Push chunks if all inputs has single level.
- bool tryPushSingleLevelData();
- /// Push chunks from ready bucket if has one.
- bool tryPushTwoLevelData();
- /// Push overflow chunks if has any.
- bool tryPushOverflowData();
- /// Push chunks from bucket to output port.
- void pushData(Chunks chunks, Int32 bucket, bool is_overflows);
-};
-
-/// Merge aggregated data from single bucket.
-class MergingAggregatedBucketTransform : public ISimpleTransform
-{
-public:
- explicit MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params);
- String getName() const override { return "MergingAggregatedBucketTransform"; }
-
-protected:
- void transform(Chunk & chunk) override;
-
-private:
- AggregatingTransformParamsPtr params;
-};
-
-/// Has several inputs and single output.
-/// Read from inputs merged bucket with aggregated data, sort them by bucket number and write to output.
-/// Presumption: inputs return chunks with increasing bucket number, there is at most one chunk per bucket.
-class SortingAggregatedTransform : public IProcessor
-{
-public:
- SortingAggregatedTransform(size_t num_inputs, AggregatingTransformParamsPtr params);
- String getName() const override { return "SortingAggregatedTransform"; }
- Status prepare() override;
-
-private:
- size_t num_inputs;
- AggregatingTransformParamsPtr params;
- std::vector<Int32> last_bucket_number;
- std::vector<bool> is_input_finished;
- std::map<Int32, Chunk> chunks;
- Chunk overflow_chunk;
-
- bool tryPushChunk();
- void addChunk(Chunk chunk, size_t from_input);
-};
-
+ *
+ * The merge can be performed using several (merging_threads) threads.
+ * For this, receiving of a set of blocks for the next bucket_num should be done sequentially,
+ * and then, when we have several received sets, they can be merged in parallel.
+ *
+ * When you receive next blocks from different sources,
+ * data from sources can also be read in several threads (reading_threads)
+ * for optimal performance in the presence of a fast network or disks (from where these blocks are read).
+ */
+
+/// Has several inputs and single output.
+/// Read from inputs chunks with partially aggregated data, group them by bucket number
+/// and write data from single bucket as single chunk.
+class GroupingAggregatedTransform : public IProcessor
+{
+public:
+ GroupingAggregatedTransform(const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_);
+ String getName() const override { return "GroupingAggregatedTransform"; }
+
+ /// Special setting: in case if single source can return several chunks with same bucket.
+ void allowSeveralChunksForSingleBucketPerSource() { expect_several_chunks_for_single_bucket_per_source = true; }
+
+protected:
+ Status prepare() override;
+ void work() override;
+
+private:
+ size_t num_inputs;
+ AggregatingTransformParamsPtr params;
+
+ std::vector<Int32> last_bucket_number; /// Last bucket read from each input.
+ std::map<Int32, Chunks> chunks_map; /// bucket -> chunks
+ Chunks overflow_chunks;
+ Chunks single_level_chunks;
+ Int32 current_bucket = 0; /// Currently processing bucket.
+ Int32 next_bucket_to_push = 0; /// Always <= current_bucket.
+ bool has_two_level = false;
+
+ bool all_inputs_finished = false;
+ bool read_from_all_inputs = false;
+ std::vector<bool> read_from_input;
+
+ bool expect_several_chunks_for_single_bucket_per_source = false;
+
+ /// Add chunk read from input to chunks_map, overflow_chunks or single_level_chunks according to it's chunk info.
+ void addChunk(Chunk chunk, size_t input);
+ /// Read from all inputs first chunk. It is needed to detect if any source has two-level aggregation.
+ void readFromAllInputs();
+ /// Push chunks if all inputs has single level.
+ bool tryPushSingleLevelData();
+ /// Push chunks from ready bucket if has one.
+ bool tryPushTwoLevelData();
+ /// Push overflow chunks if has any.
+ bool tryPushOverflowData();
+ /// Push chunks from bucket to output port.
+ void pushData(Chunks chunks, Int32 bucket, bool is_overflows);
+};
+
+/// Merge aggregated data from single bucket.
+class MergingAggregatedBucketTransform : public ISimpleTransform
+{
+public:
+ explicit MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params);
+ String getName() const override { return "MergingAggregatedBucketTransform"; }
+
+protected:
+ void transform(Chunk & chunk) override;
+
+private:
+ AggregatingTransformParamsPtr params;
+};
+
+/// Has several inputs and single output.
+/// Read from inputs merged bucket with aggregated data, sort them by bucket number and write to output.
+/// Presumption: inputs return chunks with increasing bucket number, there is at most one chunk per bucket.
+class SortingAggregatedTransform : public IProcessor
+{
+public:
+ SortingAggregatedTransform(size_t num_inputs, AggregatingTransformParamsPtr params);
+ String getName() const override { return "SortingAggregatedTransform"; }
+ Status prepare() override;
+
+private:
+ size_t num_inputs;
+ AggregatingTransformParamsPtr params;
+ std::vector<Int32> last_bucket_number;
+ std::vector<bool> is_input_finished;
+ std::map<Int32, Chunk> chunks;
+ Chunk overflow_chunk;
+
+ bool tryPushChunk();
+ void addChunk(Chunk chunk, size_t from_input);
+};
+
class Pipe;
/// Adds processors to pipe which performs memory efficient merging of partially aggregated data from several sources.
void addMergingAggregatedMemoryEfficientTransform(
Pipe & pipe,
- AggregatingTransformParamsPtr params,
- size_t num_merging_processors);
-
-}
-
+ AggregatingTransformParamsPtr params,
+ size_t num_merging_processors);
+
+}
+
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h
index b08818a2ba..8abbc413c4 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h
@@ -1,20 +1,20 @@
-#pragma once
-
-#include <Interpreters/PreparedSets.h>
-#include <Interpreters/DatabaseAndTableWithAlias.h>
-#include <Core/SortDescription.h>
-#include <Core/Names.h>
+#pragma once
+
+#include <Interpreters/PreparedSets.h>
+#include <Interpreters/DatabaseAndTableWithAlias.h>
+#include <Core/SortDescription.h>
+#include <Core/Names.h>
#include <Storages/ProjectionsDescription.h>
#include <Interpreters/AggregateDescription.h>
-#include <memory>
-
-namespace DB
-{
-
-class ExpressionActions;
-using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
-
+#include <memory>
+
+namespace DB
+{
+
+class ExpressionActions;
+using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
+
class ActionsDAG;
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
@@ -42,14 +42,14 @@ using ClusterPtr = std::shared_ptr<Cluster>;
struct MergeTreeDataSelectAnalysisResult;
using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr<MergeTreeDataSelectAnalysisResult>;
-struct PrewhereInfo
-{
- /// Actions which are executed in order to alias columns are used for prewhere actions.
+struct PrewhereInfo
+{
+ /// Actions which are executed in order to alias columns are used for prewhere actions.
ActionsDAGPtr alias_actions;
/// Actions for row level security filter. Applied separately before prewhere_actions.
/// This actions are separate because prewhere condition should not be executed over filtered rows.
ActionsDAGPtr row_level_filter;
- /// Actions which are executed on block in order to get filter column for prewhere step.
+ /// Actions which are executed on block in order to get filter column for prewhere step.
ActionsDAGPtr prewhere_actions;
String row_level_column_name;
String prewhere_column_name;
@@ -63,13 +63,13 @@ struct PrewhereInfo
std::string dump() const;
};
-/// Helper struct to store all the information about the filter expression.
-struct FilterInfo
-{
+/// Helper struct to store all the information about the filter expression.
+struct FilterInfo
+{
ExpressionActionsPtr alias_actions;
ExpressionActionsPtr actions;
- String column_name;
- bool do_remove_column = false;
+ String column_name;
+ bool do_remove_column = false;
};
/// Same as FilterInfo, but with ActionsDAG.
@@ -80,25 +80,25 @@ struct FilterDAGInfo
bool do_remove_column = false;
std::string dump() const;
-};
-
-struct InputOrderInfo
-{
- SortDescription order_key_prefix_descr;
- int direction;
+};
+
+struct InputOrderInfo
+{
+ SortDescription order_key_prefix_descr;
+ int direction;
UInt64 limit;
-
+
InputOrderInfo(const SortDescription & order_key_prefix_descr_, int direction_, UInt64 limit_)
: order_key_prefix_descr(order_key_prefix_descr_), direction(direction_), limit(limit_) {}
-
- bool operator ==(const InputOrderInfo & other) const
- {
- return order_key_prefix_descr == other.order_key_prefix_descr && direction == other.direction;
- }
-
- bool operator !=(const InputOrderInfo & other) const { return !(*this == other); }
-};
-
+
+ bool operator ==(const InputOrderInfo & other) const
+ {
+ return order_key_prefix_descr == other.order_key_prefix_descr && direction == other.direction;
+ }
+
+ bool operator !=(const InputOrderInfo & other) const { return !(*this == other); }
+};
+
class IMergeTreeDataPart;
using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
@@ -125,15 +125,15 @@ struct ProjectionCandidate
MergeTreeDataSelectAnalysisResultPtr merge_tree_normal_select_result_ptr;
};
-/** Query along with some additional data,
- * that can be used during query processing
- * inside storage engines.
- */
-struct SelectQueryInfo
-{
- ASTPtr query;
- ASTPtr view_query; /// Optimized VIEW query
-
+/** Query along with some additional data,
+ * that can be used during query processing
+ * inside storage engines.
+ */
+struct SelectQueryInfo
+{
+ ASTPtr query;
+ ASTPtr view_query; /// Optimized VIEW query
+
/// Cluster for the query.
ClusterPtr cluster;
/// Optimized cluster for the query.
@@ -143,16 +143,16 @@ struct SelectQueryInfo
ClusterPtr optimized_cluster;
TreeRewriterResultPtr syntax_analyzer_result;
-
- PrewhereInfoPtr prewhere_info;
-
- ReadInOrderOptimizerPtr order_optimizer;
+
+ PrewhereInfoPtr prewhere_info;
+
+ ReadInOrderOptimizerPtr order_optimizer;
/// Can be modified while reading from storage
InputOrderInfoPtr input_order_info;
-
- /// Prepared sets are used for indices by storage engine.
- /// Example: x IN (1, 2, 3)
- PreparedSets sets;
+
+ /// Prepared sets are used for indices by storage engine.
+ /// Example: x IN (1, 2, 3)
+ PreparedSets sets;
/// Cached value of ExpressionAnalysisResult::has_window
bool has_window = false;
@@ -166,6 +166,6 @@ struct SelectQueryInfo
bool merge_tree_empty_result = false;
Block minmax_count_projection_block;
MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr;
-};
-
-}
+};
+
+}