aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authora-romanov <a-romanov@yandex-team.ru>2022-07-04 21:28:48 +0300
committera-romanov <a-romanov@yandex-team.ru>2022-07-04 21:28:48 +0300
commit974cf14e5a79d4a754425920caf766f7f569ec43 (patch)
treee3c9bc84f2846c28b0d1b1cbcec18bebe311326f
parentc2b2a51e48da9720ea80c1ba0b8ea6633c4ed53a (diff)
downloadydb-974cf14e5a79d4a754425920caf766f7f569ec43.tar.gz
KIKIMR-15248 Drop unused files.
ref:07c5ac94f878147c82eb3094e5513abdb92b1a7f
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt1
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/chrono_io.h46
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp124
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.h19
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/common/scope_guard_safe.h68
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random/LICENSE201
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random/README2
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ActionLock.cpp33
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h689
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h394
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/Fiber.h7
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/FiberStack.h75
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h178
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h496
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h189
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashSet.h101
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h435
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h66
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h335
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h33
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h235
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h427
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/RWLock.cpp307
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h72
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/SettingsChanges.cpp50
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/Stopwatch.cpp19
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h37
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/UTF8Helpers.cpp207
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/WeakHash.cpp2
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h37
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/filesystemHelpers.cpp0
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Common/filesystemHelpers.h0
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp324
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h173
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h182
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/QueryProcessingStage.cpp37
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/SortDescription.cpp77
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Core/UUID.cpp19
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/BlockIO.cpp78
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h74
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp69
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h78
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp558
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h227
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutorReadContext.cpp226
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutorReadContext.h83
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h48
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp442
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h125
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp115
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h44
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskType.h38
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/Executor.h42
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp89
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h342
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp103
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h103
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/LocalDirectorySyncGuard.cpp50
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/LocalDirectorySyncGuard.h29
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h27
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/IO/ConnectionTimeoutsContext.h38
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h339
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h1335
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp723
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h306
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h379
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h51
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h71
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryViewsLog.cpp104
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryViewsLog.h87
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h143
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp123
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h37
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp625
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h121
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropFunctionQuery.cpp19
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropFunctionQuery.h20
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h17
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h17
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropFunctionQuery.cpp35
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropFunctionQuery.h14
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h32
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h32
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Executors/OutputStreamToOutputFormat.h39
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp213
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp246
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp874
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sinks/EmptySink.h18
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sinks/NullSink.h23
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h18
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp200
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h77
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h21
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp151
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h90
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp634
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h167
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp123
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h31
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp525
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h149
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ColumnDefault.cpp63
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ColumnsDescription.cpp691
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/IStorage.cpp260
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/IndicesDescription.cpp174
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/KeyDescription.cpp156
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp40
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/MergeTree/MergeTreeDataPartUUID.h34
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ProjectionsDescription.cpp370
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryDescription.cpp135
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h171
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/StorageInMemoryMetadata.cpp682
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Storages/TTLDescription.cpp377
113 files changed, 0 insertions, 20107 deletions
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt b/ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt
index 20d1860daa0..48ca0a7ec00 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt
+++ b/ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt
@@ -106,7 +106,6 @@ target_sources(clickhouse_client_udf.global PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/clickhouse/client/base/common/sleep.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/clickhouse/client/base/common/StringRef.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/clickhouse/client/base/common/getResource.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/clickhouse/client/base/common/preciseExp10.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/clickhouse/client/src/Common/AlignedBuffer.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Allocator.cpp
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/chrono_io.h b/ydb/library/yql/udfs/common/clickhouse/client/base/common/chrono_io.h
deleted file mode 100644
index 4ee8dec6634..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/chrono_io.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#pragma once
-
-#include <chrono>
-#include <string>
-#include <sstream>
-#include <cctz/time_zone.h>
-
-
-inline std::string to_string(const std::time_t & time)
-{
- return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone());
-}
-
-template <typename Clock, typename Duration = typename Clock::duration>
-std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
-{
- // Don't use DateLUT because it shows weird characters for
- // TimePoint::max(). I wish we could use C++20 format, but it's not
- // there yet.
- // return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
-
- auto in_time_t = std::chrono::system_clock::to_time_t(tp);
- return to_string(in_time_t);
-}
-
-template <typename Rep, typename Period = std::ratio<1>>
-std::string to_string(const std::chrono::duration<Rep, Period> & duration)
-{
- auto seconds_as_int = std::chrono::duration_cast<std::chrono::seconds>(duration);
- if (seconds_as_int == duration)
- return std::to_string(seconds_as_int.count()) + "s";
- auto seconds_as_double = std::chrono::duration_cast<std::chrono::duration<double>>(duration);
- return std::to_string(seconds_as_double.count()) + "s";
-}
-
-template <typename Clock, typename Duration = typename Clock::duration>
-std::ostream & operator<<(std::ostream & o, const std::chrono::time_point<Clock, Duration> & tp)
-{
- return o << to_string(tp);
-}
-
-template <typename Rep, typename Period = std::ratio<1>>
-std::ostream & operator<<(std::ostream & o, const std::chrono::duration<Rep, Period> & duration)
-{
- return o << to_string(duration);
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp b/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp
deleted file mode 100644
index 49d566dac19..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
-
-#include <common/defines.h>
-
-#if defined(__linux__) && !defined(THREAD_SANITIZER)
- #define USE_PHDR_CACHE 1
-#endif
-
-/// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own.
-#ifdef USE_PHDR_CACHE
-
-#if defined(__clang__)
-# pragma clang diagnostic ignored "-Wreserved-id-macro"
-# pragma clang diagnostic ignored "-Wunused-macros"
-#endif
-
-#define __msan_unpoison(X, Y) // NOLINT
-#if defined(ch_has_feature)
-# if ch_has_feature(memory_sanitizer)
-# undef __msan_unpoison
-# include <sanitizer/msan_interface.h>
-# endif
-#endif
-
-#include <link.h>
-#include <dlfcn.h>
-#include <vector>
-#include <atomic>
-#include <cstddef>
-#include <stdexcept>
-
-
-namespace
-{
-
-// This is adapted from
-// https://github.com/scylladb/seastar/blob/master/core/exception_hacks.hh
-// https://github.com/scylladb/seastar/blob/master/core/exception_hacks.cc
-
-using DLIterateFunction = int (*) (int (*callback) (dl_phdr_info * info, size_t size, void * data), void * data);
-
-DLIterateFunction getOriginalDLIteratePHDR()
-{
- void * func = dlsym(RTLD_NEXT, "dl_iterate_phdr");
- if (!func)
- throw std::runtime_error("Cannot find dl_iterate_phdr function with dlsym");
- return reinterpret_cast<DLIterateFunction>(func);
-}
-
-
-using PHDRCache = std::vector<dl_phdr_info>;
-std::atomic<PHDRCache *> phdr_cache {};
-
-}
-
-
-extern "C"
-#ifndef __clang__
-[[gnu::visibility("default")]]
-[[gnu::externally_visible]]
-#endif
-int dl_iterate_phdr(int (*callback) (dl_phdr_info * info, size_t size, void * data), void * data)
-{
- auto * current_phdr_cache = phdr_cache.load();
- if (!current_phdr_cache)
- {
- // Cache is not yet populated, pass through to the original function.
- return getOriginalDLIteratePHDR()(callback, data);
- }
-
- int result = 0;
- for (auto & entry : *current_phdr_cache)
- {
- result = callback(&entry, offsetof(dl_phdr_info, dlpi_adds), data);
- if (result != 0)
- break;
- }
- return result;
-}
-
-
-extern "C"
-{
-#ifdef ADDRESS_SANITIZER
-void __lsan_ignore_object(const void *);
-#else
-void __lsan_ignore_object(const void *) {} // NOLINT
-#endif
-}
-
-
-void updatePHDRCache()
-{
- // Fill out ELF header cache for access without locking.
- // This assumes no dynamic object loading/unloading after this point
-
- PHDRCache * new_phdr_cache = new PHDRCache;
- getOriginalDLIteratePHDR()([] (dl_phdr_info * info, size_t /*size*/, void * data)
- {
- // `info` is created by dl_iterate_phdr, which is a non-instrumented
- // libc function, so we have to unpoison it manually.
- __msan_unpoison(info, sizeof(*info));
-
- reinterpret_cast<PHDRCache *>(data)->push_back(*info);
- return 0;
- }, new_phdr_cache);
- phdr_cache.store(new_phdr_cache);
-
- /// Memory is intentionally leaked.
- __lsan_ignore_object(new_phdr_cache);
-}
-
-
-bool hasPHDRCache()
-{
- return phdr_cache.load() != nullptr;
-}
-
-#else
-
-void updatePHDRCache() {}
-bool hasPHDRCache() { return false; }
-
-#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.h b/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.h
deleted file mode 100644
index d2854ece0bc..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/phdr_cache.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
-
-/** Collects all dl_phdr_info items and caches them in a static array.
- * Also rewrites dl_iterate_phdr with a lock-free version which consults the above cache
- * thus eliminating scalability bottleneck in C++ exception unwinding.
- * As a drawback, this only works if no dynamic object unloading happens after this point.
- * This function is thread-safe. You should call it to update cache after loading new shared libraries.
- * Otherwise exception handling from dlopened libraries won't work (will call std::terminate immediately).
- *
- * NOTE: It is disabled with Thread Sanitizer because TSan can only use original "dl_iterate_phdr" function.
- */
-void updatePHDRCache();
-
-/** Check if "dl_iterate_phdr" will be lock-free
- * to determine if some features like Query Profiler can be used.
- */
-bool hasPHDRCache();
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/common/scope_guard_safe.h b/ydb/library/yql/udfs/common/clickhouse/client/base/common/scope_guard_safe.h
deleted file mode 100644
index a52b4a14f48..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/common/scope_guard_safe.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#pragma once
-
-#include <common/scope_guard.h>
-#include <common/logger_useful.h>
-#include <Common/MemoryTracker.h>
-
-/// Same as SCOPE_EXIT() but block the MEMORY_LIMIT_EXCEEDED errors.
-///
-/// Typical example of SCOPE_EXIT_MEMORY() usage is when code under it may do
-/// some tiny allocations, that may fail under high memory pressure or/and low
-/// max_memory_usage (and related limits).
-///
-/// NOTE: it should be used with caution.
-#define SCOPE_EXIT_MEMORY(...) SCOPE_EXIT( \
- MemoryTracker::LockExceptionInThread \
- lock_memory_tracker(VariableContext::Global); \
- __VA_ARGS__; \
-)
-
-/// Same as SCOPE_EXIT() but try/catch/tryLogCurrentException any exceptions.
-///
-/// SCOPE_EXIT_SAFE() should be used in case the exception during the code
-/// under SCOPE_EXIT() is not "that fatal" and error message in log is enough.
-///
-/// Good example is calling CurrentThread::detachQueryIfNotDetached().
-///
-/// Anti-pattern is calling WriteBuffer::finalize() under SCOPE_EXIT_SAFE()
-/// (since finalize() can do final write and it is better to fail abnormally
-/// instead of ignoring write error).
-///
-/// NOTE: it should be used with double caution.
-#define SCOPE_EXIT_SAFE(...) SCOPE_EXIT( \
- try \
- { \
- __VA_ARGS__; \
- } \
- catch (...) \
- { \
- tryLogCurrentException(__PRETTY_FUNCTION__); \
- } \
-)
-
-/// Same as SCOPE_EXIT() but:
-/// - block the MEMORY_LIMIT_EXCEEDED errors,
-/// - try/catch/tryLogCurrentException any exceptions.
-///
-/// SCOPE_EXIT_MEMORY_SAFE() can be used when the error can be ignored, and in
-/// addition to SCOPE_EXIT_SAFE() it will also lock MEMORY_LIMIT_EXCEEDED to
-/// avoid such exceptions.
-///
-/// It does exists as a separate helper, since you do not need to lock
-/// MEMORY_LIMIT_EXCEEDED always (there are cases when code under SCOPE_EXIT does
-/// not do any allocations, while LockExceptionInThread increment atomic
-/// variable).
-///
-/// NOTE: it should be used with triple caution.
-#define SCOPE_EXIT_MEMORY_SAFE(...) SCOPE_EXIT( \
- try \
- { \
- MemoryTracker::LockExceptionInThread \
- lock_memory_tracker(VariableContext::Global); \
- __VA_ARGS__; \
- } \
- catch (...) \
- { \
- tryLogCurrentException(__PRETTY_FUNCTION__); \
- } \
-)
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random/LICENSE b/ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random/LICENSE
deleted file mode 100644
index 8dada3edaf5..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "{}"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright {yyyy} {name of copyright owner}
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random/README b/ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random/README
deleted file mode 100644
index 6b72706dce0..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/base/pcg-random/README
+++ /dev/null
@@ -1,2 +0,0 @@
-https://github.com/imneme/pcg-cpp
-0ca2e8ea6ba212bdfbc6219c2313c45917e34b8d
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ActionLock.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ActionLock.cpp
deleted file mode 100644
index 04c48008683..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ActionLock.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "ActionLock.h"
-#include <Common/ActionBlocker.h>
-
-
-namespace NDB
-{
-
-ActionLock::ActionLock(const ActionBlocker & blocker) : counter_ptr(blocker.counter)
-{
- if (auto counter = counter_ptr.lock())
- ++(*counter);
-}
-
-ActionLock::ActionLock(ActionLock && other)
-{
- *this = std::move(other);
-}
-
-ActionLock & ActionLock::operator=(ActionLock && other)
-{
- auto lock_lhs = this->counter_ptr.lock();
-
- counter_ptr = std::move(other.counter_ptr);
- /// After move other.counter_ptr still points to counter, reset it explicitly
- other.counter_ptr.reset();
-
- if (lock_lhs)
- --(*lock_lhs);
-
- return *this;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h
deleted file mode 100644
index 45e0885e6fc..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashing.h
+++ /dev/null
@@ -1,689 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/HashTable.h>
-#include <Common/HashTable/HashTableKeyHolder.h>
-#include <Common/ColumnsHashingImpl.h>
-#include <Common/Arena.h>
-#include <Common/LRUCache.h>
-#include <Common/assert_cast.h>
-#include <common/unaligned.h>
-
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnLowCardinality.h>
-
-#include <Core/Defines.h>
-#include <memory>
-#include <cassert>
-
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-namespace ColumnsHashing
-{
-
-/// For the case when there is one numeric key.
-/// UInt8/16/32/64 for any type with corresponding bit width.
-template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false>
-struct HashMethodOneNumber
- : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
- using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>;
- using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
-
- const char * vec;
-
- /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
- HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
- {
- vec = key_columns[0]->getRawData().data;
- }
-
- HashMethodOneNumber(const IColumn * column)
- {
- vec = column->getRawData().data;
- }
-
- /// Creates context. Method is called once and result context is used in all threads.
- using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr
-
- /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr.
- /// Data is a HashTable where to insert key from column's row.
- /// For Serialized method, key may be placed in pool.
- using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult
-
- /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr.
- using Base::findKey; /// (Data & data, size_t row, Arena & pool) -> FindResult
-
- /// Get hash value of row.
- using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t
-
- /// Is used for default implementation in HashMethodBase.
- FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); }
-
- const FieldType * getKeyData() const { return reinterpret_cast<const FieldType *>(vec); }
-};
-
-
-/// For the case when there is one string key.
-template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
-struct HashMethodString
- : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
- using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
- using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
-
- const IColumn::Offset * offsets;
- const UInt8 * chars;
-
- HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
- {
- const IColumn & column = *key_columns[0];
- const ColumnString & column_string = assert_cast<const ColumnString &>(column);
- offsets = column_string.getOffsets().data();
- chars = column_string.getChars().data();
- }
-
- auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const
- {
- StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
-
- if constexpr (place_string_to_arena)
- {
- return ArenaKeyHolder{key, pool};
- }
- else
- {
- return key;
- }
- }
-
-protected:
- friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
-};
-
-
-/// For the case when there is one fixed-length string key.
-template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
-struct HashMethodFixedString
- : public columns_hashing_impl::
- HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
- using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
- using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
-
- size_t n;
- const ColumnFixedString::Chars * chars;
-
- HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
- {
- const IColumn & column = *key_columns[0];
- const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column);
- n = column_string.getN();
- chars = &column_string.getChars();
- }
-
- auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const
- {
- StringRef key(&(*chars)[row * n], n);
-
- if constexpr (place_string_to_arena)
- {
- return ArenaKeyHolder{key, pool};
- }
- else
- {
- return key;
- }
- }
-
-protected:
- friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
-};
-
-
-/// Cache stores dictionaries and saved_hash per dictionary key.
-class LowCardinalityDictionaryCache : public HashMethodContext
-{
-public:
- /// Will assume that dictionaries with same hash has the same keys.
- /// Just in case, check that they have also the same size.
- struct DictionaryKey
- {
- UInt128 hash;
- UInt64 size;
-
- bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; }
- };
-
- struct DictionaryKeyHash
- {
- size_t operator()(const DictionaryKey & key) const
- {
- SipHash hash;
- hash.update(key.hash);
- hash.update(key.size);
- return hash.get64();
- }
- };
-
- struct CachedValues
- {
- /// Store ptr to dictionary to be sure it won't be deleted.
- ColumnPtr dictionary_holder;
- /// Hashes for dictionary keys.
- const UInt64 * saved_hash = nullptr;
- };
-
- using CachedValuesPtr = std::shared_ptr<CachedValues>;
-
- explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {}
-
- CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); }
- void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); }
-
-private:
- using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>;
- Cache cache;
-};
-
-
-/// Single low cardinality column.
-template <typename SingleColumnMethod, typename Mapped, bool use_cache>
-struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
-{
- using Base = SingleColumnMethod;
-
- enum class VisitValue
- {
- Empty = 0,
- Found = 1,
- NotFound = 2,
- };
-
- static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
- using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>;
- using FindResult = columns_hashing_impl::FindResultImpl<Mapped>;
-
- static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings)
- {
- return std::make_shared<LowCardinalityDictionaryCache>(settings);
- }
-
- ColumnRawPtrs key_columns;
- const IColumn * positions = nullptr;
- size_t size_of_index_type = 0;
-
- /// saved hash is from current column or from cache.
- const UInt64 * saved_hash = nullptr;
- /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted.
- ColumnPtr dictionary_holder;
-
- /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages.
- columns_hashing_impl::MappedCache<Mapped> mapped_cache;
- PaddedPODArray<VisitValue> visit_cache;
-
- /// If initialized column is nullable.
- bool is_nullable = false;
-
- static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * column)
- {
- auto low_cardinality_column = typeid_cast<const ColumnLowCardinality *>(column);
- if (!low_cardinality_column)
- throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. "
- "Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR);
- return *low_cardinality_column;
- }
-
- HashMethodSingleLowCardinalityColumn(
- const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context)
- : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context)
- {
- auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]);
-
- if (!context)
- throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn",
- ErrorCodes::LOGICAL_ERROR);
-
- LowCardinalityDictionaryCache * lcd_cache;
- if constexpr (use_cache)
- {
- lcd_cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get());
- if (!lcd_cache)
- {
- const auto & cached_val = *context;
- throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: "
- + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR);
- }
- }
-
- auto * dict = column->getDictionary().getNestedNotNullableColumn().get();
- is_nullable = column->getDictionary().nestedColumnIsNullable();
- key_columns = {dict};
- bool is_shared_dict = column->isSharedDictionary();
-
- typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key;
- typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values;
-
- if (is_shared_dict)
- {
- dictionary_key = {column->getDictionary().getHash(), dict->size()};
- if constexpr (use_cache)
- cached_values = lcd_cache->get(dictionary_key);
- }
-
- if (cached_values)
- {
- saved_hash = cached_values->saved_hash;
- dictionary_holder = cached_values->dictionary_holder;
- }
- else
- {
- saved_hash = column->getDictionary().tryGetSavedHash();
- dictionary_holder = column->getDictionaryPtr();
-
- if constexpr (use_cache)
- {
- if (is_shared_dict)
- {
- cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>();
- cached_values->saved_hash = saved_hash;
- cached_values->dictionary_holder = dictionary_holder;
-
- lcd_cache->set(dictionary_key, cached_values);
- }
- }
- }
-
- if constexpr (has_mapped)
- mapped_cache.resize(key_columns[0]->size());
-
- VisitValue empty(VisitValue::Empty);
- visit_cache.assign(key_columns[0]->size(), empty);
-
- size_of_index_type = column->getSizeOfIndexType();
- positions = column->getIndexesPtr().get();
- }
-
- ALWAYS_INLINE size_t getIndexAt(size_t row) const
- {
- switch (size_of_index_type)
- {
- case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(positions)->getElement(row);
- case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(positions)->getElement(row);
- case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(positions)->getElement(row);
- case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(positions)->getElement(row);
- default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
- }
- }
-
- /// Get the key holder from the key columns for insertion into the hash table.
- ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const
- {
- return Base::getKeyHolder(getIndexAt(row), pool);
- }
-
- template <typename Data>
- ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool)
- {
- size_t row = getIndexAt(row_);
-
- if (is_nullable && row == 0)
- {
- visit_cache[row] = VisitValue::Found;
- bool has_null_key = data.hasNullKeyData();
- data.hasNullKeyData() = true;
-
- if constexpr (has_mapped)
- return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key);
- else
- return EmplaceResult(!has_null_key);
- }
-
- if (visit_cache[row] == VisitValue::Found)
- {
- if constexpr (has_mapped)
- return EmplaceResult(mapped_cache[row], mapped_cache[row], false);
- else
- return EmplaceResult(false);
- }
-
- auto key_holder = getKeyHolder(row_, pool);
-
- bool inserted = false;
- typename Data::LookupResult it;
- if (saved_hash)
- data.emplace(key_holder, it, inserted, saved_hash[row]);
- else
- data.emplace(key_holder, it, inserted);
-
- visit_cache[row] = VisitValue::Found;
-
- if constexpr (has_mapped)
- {
- auto & mapped = it->getMapped();
- if (inserted)
- {
- new (&mapped) Mapped();
- }
- mapped_cache[row] = mapped;
- return EmplaceResult(mapped, mapped_cache[row], inserted);
- }
- else
- return EmplaceResult(inserted);
- }
-
- ALWAYS_INLINE bool isNullAt(size_t i)
- {
- if (!is_nullable)
- return false;
-
- return getIndexAt(i) == 0;
- }
-
- template <typename Data>
- ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool)
- {
- size_t row = getIndexAt(row_);
-
- if (is_nullable && row == 0)
- {
- if constexpr (has_mapped)
- return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData());
- else
- return FindResult(data.hasNullKeyData());
- }
-
- if (visit_cache[row] != VisitValue::Empty)
- {
- if constexpr (has_mapped)
- return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found);
- else
- return FindResult(visit_cache[row] == VisitValue::Found);
- }
-
- auto key_holder = getKeyHolder(row_, pool);
-
- typename Data::iterator it;
- if (saved_hash)
- it = data.find(*key_holder, saved_hash[row]);
- else
- it = data.find(*key_holder);
-
- bool found = it != data.end();
- visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
-
- if constexpr (has_mapped)
- {
- if (found)
- mapped_cache[row] = it->second;
- }
-
- if constexpr (has_mapped)
- return FindResult(&mapped_cache[row], found);
- else
- return FindResult(found);
- }
-
- template <typename Data>
- ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
- {
- row = getIndexAt(row);
- if (saved_hash)
- return saved_hash[row];
-
- return Base::getHash(data, row, pool);
- }
-};
-
-
-// Optional mask for low cardinality columns.
-template <bool has_low_cardinality>
-struct LowCardinalityKeys
-{
- ColumnRawPtrs nested_columns;
- ColumnRawPtrs positions;
- Sizes position_sizes;
-};
-
-template <>
-struct LowCardinalityKeys<false> {};
-
-/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
-template <
- typename Value,
- typename Key,
- typename Mapped,
- bool has_nullable_keys_ = false,
- bool has_low_cardinality_ = false,
- bool use_cache = true,
- bool need_offset = false>
-struct HashMethodKeysFixed
- : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
- , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
- using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>;
- using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
- using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
-
- static constexpr bool has_nullable_keys = has_nullable_keys_;
- static constexpr bool has_low_cardinality = has_low_cardinality_;
-
- LowCardinalityKeys<has_low_cardinality> low_cardinality_keys;
- Sizes key_sizes;
- size_t keys_size;
-
- /// SSSE3 shuffle method can be used. Shuffle masks will be calculated and stored here.
-#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
- std::unique_ptr<uint8_t[]> masks;
- std::unique_ptr<const char*[]> columns_data;
-#endif
-
- PaddedPODArray<Key> prepared_keys;
-
- static bool usePreparedKeys(const Sizes & key_sizes)
- {
- if (has_low_cardinality || has_nullable_keys || sizeof(Key) > 16)
- return false;
-
- for (auto size : key_sizes)
- if (size != 1 && size != 2 && size != 4 && size != 8 && size != 16)
- return false;
-
- return true;
- }
-
- HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &)
- : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size())
- {
- if constexpr (has_low_cardinality)
- {
- low_cardinality_keys.nested_columns.resize(key_columns.size());
- low_cardinality_keys.positions.assign(key_columns.size(), nullptr);
- low_cardinality_keys.position_sizes.resize(key_columns.size());
- for (size_t i = 0; i < key_columns.size(); ++i)
- {
- if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
- {
- low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get();
- low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes();
- low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType();
- }
- else
- low_cardinality_keys.nested_columns[i] = key_columns[i];
- }
- }
-
- if (usePreparedKeys(key_sizes))
- {
- packFixedBatch(keys_size, Base::getActualColumns(), key_sizes, prepared_keys);
- }
-
-#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
- else if constexpr (!has_low_cardinality && !has_nullable_keys && sizeof(Key) <= 16)
- {
- /** The task is to "pack" multiple fixed-size fields into single larger Key.
- * Example: pack UInt8, UInt32, UInt16, UInt64 into UInt128 key:
- * [- ---- -- -------- -] - the resulting uint128 key
- * ^ ^ ^ ^ ^
- * u8 u32 u16 u64 zero
- *
- * We can do it with the help of SSSE3 shuffle instruction.
- *
- * There will be a mask for every GROUP BY element (keys_size masks in total).
- * Every mask has 16 bytes but only sizeof(Key) bytes are used (other we don't care).
- *
- * Every byte in the mask has the following meaning:
- * - if it is 0..15, take the element at this index from source register and place here in the result;
- * - if it is 0xFF - set the elemend in the result to zero.
- *
- * Example:
- * We want to copy UInt32 to offset 1 in the destination and set other bytes in the destination as zero.
- * The corresponding mask will be: FF, 0, 1, 2, 3, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF
- *
- * The max size of destination is 16 bytes, because we cannot process more with SSSE3.
- *
- * The method is disabled under MSan, because it's allowed
- * to load into SSE register and process up to 15 bytes of uninitialized memory in columns padding.
- * We don't use this uninitialized memory but MSan cannot look "into" the shuffle instruction.
- *
- * 16-bytes masks can be placed overlapping, only first sizeof(Key) bytes are relevant in each mask.
- * We initialize them to 0xFF and then set the needed elements.
- */
- size_t total_masks_size = sizeof(Key) * keys_size + (16 - sizeof(Key));
- masks.reset(new uint8_t[total_masks_size]);
- memset(masks.get(), 0xFF, total_masks_size);
-
- size_t offset = 0;
- for (size_t i = 0; i < keys_size; ++i)
- {
- for (size_t j = 0; j < key_sizes[i]; ++j)
- {
- masks[i * sizeof(Key) + offset] = j;
- ++offset;
- }
- }
-
- columns_data.reset(new const char*[keys_size]);
-
- for (size_t i = 0; i < keys_size; ++i)
- columns_data[i] = Base::getActualColumns()[i]->getRawData().data;
- }
-#endif
- }
-
- ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
- {
- if constexpr (has_nullable_keys)
- {
- auto bitmap = Base::createBitmap(row);
- return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap);
- }
- else
- {
- if constexpr (has_low_cardinality)
- return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes,
- &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
-
- if (!prepared_keys.empty())
- return prepared_keys[row];
-
-#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
- if constexpr (sizeof(Key) <= 16)
- {
- assert(!has_low_cardinality && !has_nullable_keys);
- return packFixedShuffle<Key>(columns_data.get(), keys_size, key_sizes.data(), row, masks.get());
- }
-#endif
- return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes);
- }
- }
-
- static std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> & key_columns, const Sizes & key_sizes)
- {
- if (!usePreparedKeys(key_sizes))
- return {};
-
- std::vector<IColumn *> new_columns;
- new_columns.reserve(key_columns.size());
-
- Sizes new_sizes;
- auto fill_size = [&](size_t size)
- {
- for (size_t i = 0; i < key_sizes.size(); ++i)
- {
- if (key_sizes[i] == size)
- {
- new_columns.push_back(key_columns[i]);
- new_sizes.push_back(size);
- }
- }
- };
-
- fill_size(16);
- fill_size(8);
- fill_size(4);
- fill_size(2);
- fill_size(1);
-
- key_columns.swap(new_columns);
- return new_sizes;
- }
-};
-
-/** Hash by concatenating serialized key values.
- * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
- * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
- * Therefore, when aggregating by several strings, there is no ambiguity.
- */
-template <typename Value, typename Mapped>
-struct HashMethodSerialized
- : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false>
-{
- using Self = HashMethodSerialized<Value, Mapped>;
- using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
-
- ColumnRawPtrs key_columns;
- size_t keys_size;
-
- HashMethodSerialized(const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
- : key_columns(key_columns_), keys_size(key_columns_.size()) {}
-
-protected:
- friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
-
- ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const
- {
- return SerializedKeyHolder{
- serializeKeysToPoolContiguous(row, keys_size, key_columns, pool),
- pool};
- }
-};
-
-/// For the case when there is one string key.
-template <typename Value, typename Mapped, bool use_cache = true, bool need_offset = false>
-struct HashMethodHashed
- : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
-{
- using Key = UInt128;
- using Self = HashMethodHashed<Value, Mapped, use_cache, need_offset>;
- using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
-
- ColumnRawPtrs key_columns;
-
- HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &)
- : key_columns(std::move(key_columns_)) {}
-
- ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
- {
- return hash128(row, key_columns.size(), key_columns);
- }
-};
-
-}
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h
deleted file mode 100644
index 04846d65469..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ColumnsHashingImpl.h
+++ /dev/null
@@ -1,394 +0,0 @@
-#pragma once
-
-#include <Columns/IColumn.h>
-#include <Columns/ColumnNullable.h>
-#include <Common/assert_cast.h>
-#include <Common/HashTable/HashTableKeyHolder.h>
-#include <Interpreters/AggregationCommon.h>
-
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-namespace ColumnsHashing
-{
-
-/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe.
-/// Is used for caching.
-class HashMethodContext
-{
-public:
- virtual ~HashMethodContext() = default;
-
- struct Settings
- {
- size_t max_threads;
- };
-};
-
-using HashMethodContextPtr = std::shared_ptr<HashMethodContext>;
-
-
-namespace columns_hashing_impl
-{
-
-template <typename Value, bool consecutive_keys_optimization_>
-struct LastElementCache
-{
- static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
- Value value;
- bool empty = true;
- bool found = false;
-
- bool check(const Value & value_) { return !empty && value == value_; }
-
- template <typename Key>
- bool check(const Key & key) { return !empty && value.first == key; }
-};
-
-template <typename Data>
-struct LastElementCache<Data, false>
-{
- static constexpr bool consecutive_keys_optimization = false;
-};
-
-template <typename Mapped>
-class EmplaceResultImpl
-{
- Mapped & value;
- Mapped & cached_value;
- bool inserted;
-
-public:
- EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_)
- : value(value_), cached_value(cached_value_), inserted(inserted_) {}
-
- bool isInserted() const { return inserted; }
- auto & getMapped() const { return value; }
-
- void setMapped(const Mapped & mapped)
- {
- cached_value = mapped;
- value = mapped;
- }
-};
-
-template <>
-class EmplaceResultImpl<void>
-{
- bool inserted;
-
-public:
- explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {}
- bool isInserted() const { return inserted; }
-};
-
-/// FindResult optionally may contain pointer to value and offset in hashtable buffer.
-/// Only bool found is required.
-/// So we will have 4 different specializations for FindResultImpl
-class FindResultImplBase
-{
- bool found;
-
-public:
- explicit FindResultImplBase(bool found_) : found(found_) {}
- bool isFound() const { return found; }
-};
-
-template <bool need_offset = false>
-class FindResultImplOffsetBase
-{
-public:
- constexpr static bool has_offset = need_offset;
- explicit FindResultImplOffsetBase(size_t /* off */) {}
-};
-
-template <>
-class FindResultImplOffsetBase<true>
-{
- size_t offset;
-public:
- constexpr static bool has_offset = true;
-
- explicit FindResultImplOffsetBase(size_t off) : offset(off) {}
- ALWAYS_INLINE size_t getOffset() const { return offset; }
-};
-
-template <typename Mapped, bool need_offset = false>
-class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
-{
- Mapped * value;
-
-public:
- FindResultImpl()
- : FindResultImplBase(false), FindResultImplOffsetBase<need_offset>(0)
- {}
-
- FindResultImpl(Mapped * value_, bool found_, size_t off)
- : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off), value(value_) {}
- Mapped & getMapped() const { return *value; }
-};
-
-template <bool need_offset>
-class FindResultImpl<void, need_offset> : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
-{
-public:
- FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off) {}
-};
-
-template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false>
-class HashMethodBase
-{
-public:
- using EmplaceResult = EmplaceResultImpl<Mapped>;
- using FindResult = FindResultImpl<Mapped, need_offset>;
- static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
- using Cache = LastElementCache<Value, consecutive_keys_optimization>;
-
- static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; }
-
- template <typename Data>
- ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
- {
- auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
- return emplaceImpl(key_holder, data);
- }
-
- template <typename Data>
- ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
- {
- auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
- return findKeyImpl(keyHolderGetKey(key_holder), data);
- }
-
- template <typename Data>
- ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
- {
- auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
- return data.hash(keyHolderGetKey(key_holder));
- }
-
-protected:
- Cache cache;
-
- HashMethodBase()
- {
- if constexpr (consecutive_keys_optimization)
- {
- if constexpr (has_mapped)
- {
- /// Init PairNoInit elements.
- cache.value.second = Mapped();
- cache.value.first = {};
- }
- else
- cache.value = Value();
- }
- }
-
- template <typename Data, typename KeyHolder>
- ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data)
- {
- if constexpr (Cache::consecutive_keys_optimization)
- {
- if (cache.found && cache.check(keyHolderGetKey(key_holder)))
- {
- if constexpr (has_mapped)
- return EmplaceResult(cache.value.second, cache.value.second, false);
- else
- return EmplaceResult(false);
- }
- }
-
- typename Data::LookupResult it;
- bool inserted = false;
- data.emplace(key_holder, it, inserted);
-
- [[maybe_unused]] Mapped * cached = nullptr;
- if constexpr (has_mapped)
- cached = &it->getMapped();
-
- if (inserted)
- {
- if constexpr (has_mapped)
- {
- new (&it->getMapped()) Mapped();
- }
- }
-
- if constexpr (consecutive_keys_optimization)
- {
- cache.found = true;
- cache.empty = false;
-
- if constexpr (has_mapped)
- {
- cache.value.first = it->getKey();
- cache.value.second = it->getMapped();
- cached = &cache.value.second;
- }
- else
- {
- cache.value = it->getKey();
- }
- }
-
- if constexpr (has_mapped)
- return EmplaceResult(it->getMapped(), *cached, inserted);
- else
- return EmplaceResult(inserted);
- }
-
- template <typename Data, typename Key>
- ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data)
- {
- if constexpr (Cache::consecutive_keys_optimization)
- {
- /// It's possible to support such combination, but code will became more complex.
- /// Now there's not place where we need this options enabled together
- static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options");
- if (cache.check(key))
- {
- if constexpr (has_mapped)
- return FindResult(&cache.value.second, cache.found, 0);
- else
- return FindResult(cache.found, 0);
- }
- }
-
- auto it = data.find(key);
-
- if constexpr (consecutive_keys_optimization)
- {
- cache.found = it != nullptr;
- cache.empty = false;
-
- if constexpr (has_mapped)
- {
- cache.value.first = key;
- if (it)
- {
- cache.value.second = it->getMapped();
- }
- }
- else
- {
- cache.value = key;
- }
- }
-
- size_t offset = 0;
- if constexpr (FindResult::has_offset)
- {
- offset = it ? data.offsetInternal(it) : 0;
- }
- if constexpr (has_mapped)
- return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset);
- else
- return FindResult(it != nullptr, offset);
- }
-};
-
-
-template <typename T>
-struct MappedCache : public PaddedPODArray<T> {};
-
-template <>
-struct MappedCache<void> {};
-
-
-/// This class is designed to provide the functionality that is required for
-/// supporting nullable keys in HashMethodKeysFixed. If there are
-/// no nullable keys, this class is merely implemented as an empty shell.
-template <typename Key, bool has_nullable_keys>
-class BaseStateKeysFixed;
-
-/// Case where nullable keys are supported.
-template <typename Key>
-class BaseStateKeysFixed<Key, true>
-{
-protected:
- BaseStateKeysFixed(const ColumnRawPtrs & key_columns)
- {
- null_maps.reserve(key_columns.size());
- actual_columns.reserve(key_columns.size());
-
- for (const auto & col : key_columns)
- {
- if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(col))
- {
- actual_columns.push_back(&nullable_col->getNestedColumn());
- null_maps.push_back(&nullable_col->getNullMapColumn());
- }
- else
- {
- actual_columns.push_back(col);
- null_maps.push_back(nullptr);
- }
- }
- }
-
- /// Return the columns which actually contain the values of the keys.
- /// For a given key column, if it is nullable, we return its nested
- /// column. Otherwise we return the key column itself.
- inline const ColumnRawPtrs & getActualColumns() const
- {
- return actual_columns;
- }
-
- /// Create a bitmap that indicates whether, for a particular row,
- /// a key column bears a null value or not.
- KeysNullMap<Key> createBitmap(size_t row) const
- {
- KeysNullMap<Key> bitmap{};
-
- for (size_t k = 0; k < null_maps.size(); ++k)
- {
- if (null_maps[k] != nullptr)
- {
- const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
- if (null_map[row] == 1)
- {
- size_t bucket = k / 8;
- size_t offset = k % 8;
- bitmap[bucket] |= UInt8(1) << offset;
- }
- }
- }
-
- return bitmap;
- }
-
-private:
- ColumnRawPtrs actual_columns;
- ColumnRawPtrs null_maps;
-};
-
-/// Case where nullable keys are not supported.
-template <typename Key>
-class BaseStateKeysFixed<Key, false>
-{
-protected:
- BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {}
-
- const ColumnRawPtrs & getActualColumns() const { return actual_columns; }
-
- KeysNullMap<Key> createBitmap(size_t) const
- {
- throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
- " is forbidden", ErrorCodes::LOGICAL_ERROR};
- }
-
-private:
- ColumnRawPtrs actual_columns;
-};
-
-}
-
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Fiber.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Fiber.h
deleted file mode 100644
index e3ad8af12cd..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Fiber.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#pragma once
-/// defines.h should be included before fiber.hpp
-/// BOOST_USE_ASAN, BOOST_USE_TSAN and BOOST_USE_UCONTEXT should be correctly defined for sanitizers.
-#include <common/defines.h>
-#include <boost/context/fiber.hpp>
-
-using Fiber = boost::context::fiber;
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/FiberStack.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/FiberStack.h
deleted file mode 100644
index 81e82b33e40..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/FiberStack.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#pragma once
-#include <common/defines.h>
-#include <boost/context/stack_context.hpp>
-#include <Common/formatReadable.h>
-#include <Common/CurrentMemoryTracker.h>
-#include <Common/Exception.h>
-
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <sys/mman.h>
-
-#if defined(BOOST_USE_VALGRIND)
-#include <valgrind/valgrind.h>
-#endif
-
-namespace NDB::ErrorCodes
-{
- extern const int CANNOT_ALLOCATE_MEMORY;
-}
-
-/// This is an implementation of allocator for fiber stack.
-/// The reference implementation is protected_fixedsize_stack from boost::context.
-/// This implementation additionally track memory usage. It is the main reason why it is needed.
-class FiberStack
-{
-private:
- size_t stack_size;
- size_t page_size = 0;
-public:
- static constexpr size_t default_stack_size = 128 * 1024; /// 64KB was not enough for tests
-
- explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
- {
- page_size = ::sysconf(_SC_PAGESIZE);
- }
-
- boost::context::stack_context allocate()
- {
- size_t num_pages = 1 + (stack_size - 1) / page_size;
- size_t num_bytes = (num_pages + 1) * page_size; /// Add one page at bottom that will be used as guard-page
-
- void * vp = ::mmap(nullptr, num_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (MAP_FAILED == vp)
- DB::throwFromErrno(fmt::format("FiberStack: Cannot mmap {}.", ReadableSize(num_bytes)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
-
- if (-1 == ::mprotect(vp, page_size, PROT_NONE))
- {
- ::munmap(vp, num_bytes);
- DB::throwFromErrno("FiberStack: cannot protect guard page", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
- }
-
- /// Do not count guard page in memory usage.
- CurrentMemoryTracker::alloc(num_pages * page_size);
-
- boost::context::stack_context sctx;
- sctx.size = num_bytes;
- sctx.sp = static_cast< char * >(vp) + sctx.size;
-#if defined(BOOST_USE_VALGRIND)
- sctx.valgrind_stack_id = VALGRIND_STACK_REGISTER(sctx.sp, vp);
-#endif
- return sctx;
- }
-
- void deallocate(boost::context::stack_context & sctx)
- {
-#if defined(BOOST_USE_VALGRIND)
- VALGRIND_STACK_DEREGISTER(sctx.valgrind_stack_id);
-#endif
- void * vp = static_cast< char * >(sctx.sp) - sctx.size;
- ::munmap(vp, sctx.size);
-
- /// Do not count guard page in memory usage.
- CurrentMemoryTracker::free(sctx.size - page_size);
- }
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h
deleted file mode 100644
index 37bd81c8b4f..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashMap.h
+++ /dev/null
@@ -1,178 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/FixedHashTable.h>
-#include <Common/HashTable/HashMap.h>
-
-
-template <typename Key, typename TMapped, typename TState = HashTableNoState>
-struct FixedHashMapCell
-{
- using Mapped = TMapped;
- using State = TState;
-
- using value_type = PairNoInit<Key, Mapped>;
- using mapped_type = TMapped;
-
- bool full;
- Mapped mapped;
-
- FixedHashMapCell() {} //-V730
- FixedHashMapCell(const Key &, const State &) : full(true) {}
- FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
-
- const VoidKey getKey() const { return {}; }
- Mapped & getMapped() { return mapped; }
- const Mapped & getMapped() const { return mapped; }
-
- bool isZero(const State &) const { return !full; }
- void setZero() { full = false; }
-
- /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
- /// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
- struct CellExt
- {
- CellExt() {} //-V730
- CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {}
- void update(Key && key_, const FixedHashMapCell * ptr_)
- {
- key = key_;
- ptr = const_cast<FixedHashMapCell *>(ptr_);
- }
- Key key;
- FixedHashMapCell * ptr;
-
- const Key & getKey() const { return key; }
- Mapped & getMapped() { return ptr->mapped; }
- const Mapped & getMapped() const { return ptr->mapped; }
- const value_type getValue() const { return {key, ptr->mapped}; }
- };
-};
-
-
-/// In case when we can encode empty cells with zero mapped values.
-template <typename Key, typename TMapped, typename TState = HashTableNoState>
-struct FixedHashMapImplicitZeroCell
-{
- using Mapped = TMapped;
- using State = TState;
-
- using value_type = PairNoInit<Key, Mapped>;
- using mapped_type = TMapped;
-
- Mapped mapped;
-
- FixedHashMapImplicitZeroCell() {}
- FixedHashMapImplicitZeroCell(const Key &, const State &) {}
- FixedHashMapImplicitZeroCell(const value_type & value_, const State &) : mapped(value_.second) {}
-
- const VoidKey getKey() const { return {}; }
- Mapped & getMapped() { return mapped; }
- const Mapped & getMapped() const { return mapped; }
-
- bool isZero(const State &) const { return !mapped; }
- void setZero() { mapped = {}; }
-
- /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
- /// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
- struct CellExt
- {
- CellExt() {} //-V730
- CellExt(Key && key_, const FixedHashMapImplicitZeroCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapImplicitZeroCell *>(ptr_)) {}
- void update(Key && key_, const FixedHashMapImplicitZeroCell * ptr_)
- {
- key = key_;
- ptr = const_cast<FixedHashMapImplicitZeroCell *>(ptr_);
- }
- Key key;
- FixedHashMapImplicitZeroCell * ptr;
-
- const Key & getKey() const { return key; }
- Mapped & getMapped() { return ptr->mapped; }
- const Mapped & getMapped() const { return ptr->mapped; }
- const value_type getValue() const { return {key, ptr->mapped}; }
- };
-};
-
-
-template <
- typename Key,
- typename Mapped,
- typename Cell = FixedHashMapCell<Key, Mapped>,
- typename Size = FixedHashTableStoredSize<Cell>,
- typename Allocator = HashTableAllocator>
-class FixedHashMap : public FixedHashTable<Key, Cell, Size, Allocator>
-{
-public:
- using Base = FixedHashTable<Key, Cell, Size, Allocator>;
- using Self = FixedHashMap;
- using LookupResult = typename Base::LookupResult;
-
- using Base::Base;
-
- template <typename Func>
- void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
- {
- for (auto it = this->begin(), end = this->end(); it != end; ++it)
- {
- typename Self::LookupResult res_it;
- bool inserted;
- that.emplace(it->getKey(), res_it, inserted, it.getHash());
- func(res_it->getMapped(), it->getMapped(), inserted);
- }
- }
-
- template <typename Func>
- void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
- {
- for (auto it = this->begin(), end = this->end(); it != end; ++it)
- {
- auto res_it = that.find(it->getKey(), it.getHash());
- if (!res_it)
- func(it->getMapped(), it->getMapped(), false);
- else
- func(res_it->getMapped(), it->getMapped(), true);
- }
- }
-
- template <typename Func>
- void forEachValue(Func && func)
- {
- for (auto & v : *this)
- func(v.getKey(), v.getMapped());
- }
-
- template <typename Func>
- void forEachMapped(Func && func)
- {
- for (auto & v : *this)
- func(v.getMapped());
- }
-
- Mapped & ALWAYS_INLINE operator[](const Key & x)
- {
- LookupResult it;
- bool inserted;
- this->emplace(x, it, inserted);
- if (inserted)
- new (&it->getMapped()) Mapped();
-
- return it->getMapped();
- }
-};
-
-
-template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
-using FixedImplicitZeroHashMap = FixedHashMap<
- Key,
- Mapped,
- FixedHashMapImplicitZeroCell<Key, Mapped>,
- FixedHashTableStoredSize<FixedHashMapImplicitZeroCell<Key, Mapped>>,
- Allocator>;
-
-template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
-using FixedImplicitZeroHashMapWithCalculatedSize = FixedHashMap<
- Key,
- Mapped,
- FixedHashMapImplicitZeroCell<Key, Mapped>,
- FixedHashTableCalculatedSize<FixedHashMapImplicitZeroCell<Key, Mapped>>,
- Allocator>;
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h
deleted file mode 100644
index b37971688d5..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/FixedHashTable.h
+++ /dev/null
@@ -1,496 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/HashTable.h>
-
-namespace NDB
-{
- namespace ErrorCodes
- {
- extern const int NO_AVAILABLE_DATA;
- }
-}
-
-template <typename Key, typename TState = HashTableNoState>
-struct FixedHashTableCell
-{
- using State = TState;
-
- using value_type = Key;
- using mapped_type = VoidMapped;
- bool full;
-
- FixedHashTableCell() {} //-V730
- FixedHashTableCell(const Key &, const State &) : full(true) {}
-
- const VoidKey getKey() const { return {}; }
- VoidMapped getMapped() const { return {}; }
-
- bool isZero(const State &) const { return !full; }
- void setZero() { full = false; }
- static constexpr bool need_zero_value_storage = false;
-
- /// This Cell is only stored inside an iterator. It's used to accommodate the fact
- /// that the iterator based API always provide a reference to a continuous memory
- /// containing the Key. As a result, we have to instantiate a real Key field.
- /// All methods that return a mutable reference to the Key field are named with
- /// -Mutable suffix, indicating this is uncommon usage. As this is only for lookup
- /// tables, it's totally fine to discard the Key mutations.
- struct CellExt
- {
- Key key;
-
- const VoidKey getKey() const { return {}; }
- VoidMapped getMapped() const { return {}; }
- const value_type & getValue() const { return key; }
- void update(Key && key_, FixedHashTableCell *) { key = key_; }
- };
-};
-
-
-/// How to obtain the size of the table.
-
-template <typename Cell>
-struct FixedHashTableStoredSize
-{
- size_t m_size = 0;
-
- size_t getSize(const Cell *, const typename Cell::State &, size_t) const { return m_size; }
- bool isEmpty(const Cell *, const typename Cell::State &, size_t) const { return m_size == 0; }
-
- void increaseSize() { ++m_size; }
- void clearSize() { m_size = 0; }
- void setSize(size_t to) { m_size = to; }
-};
-
-template <typename Cell>
-struct FixedHashTableCalculatedSize
-{
- size_t getSize(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
- {
- size_t res = 0;
- for (const Cell * end = buf + num_cells; buf != end; ++buf)
- if (!buf->isZero(state))
- ++res;
- return res;
- }
-
- bool isEmpty(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
- {
- for (const Cell * end = buf + num_cells; buf != end; ++buf)
- if (!buf->isZero(state))
- return false;
- return true;
- }
-
- void increaseSize() {}
- void clearSize() {}
- void setSize(size_t) {}
-};
-
-
-/** Used as a lookup table for small keys such as UInt8, UInt16. It's different
- * than a HashTable in that keys are not stored in the Cell buf, but inferred
- * inside each iterator. There are a bunch of to make it faster than using
- * HashTable: a) It doesn't have a conflict chain; b) There is no key
- * comparison; c) The number of cycles for checking cell empty is halved; d)
- * Memory layout is tighter, especially the Clearable variants.
- *
- * NOTE: For Set variants this should always be better. For Map variants
- * however, as we need to assemble the real cell inside each iterator, there
- * might be some cases we fall short.
- *
- * TODO: Deprecate the cell API so that end users don't rely on the structure
- * of cell. Instead iterator should be used for operations such as cell
- * transfer, key updates (f.g. StringRef) and serde. This will allow
- * TwoLevelHashSet(Map) to contain different type of sets(maps).
- */
-template <typename Key, typename Cell, typename Size, typename Allocator>
-class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size
-{
- static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
-
-protected:
- friend class const_iterator;
- friend class iterator;
- friend class Reader;
-
- using Self = FixedHashTable;
-
- Cell * buf; /// A piece of memory for all elements.
-
- void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); }
-
- void free()
- {
- if (buf)
- {
- Allocator::free(buf, getBufferSizeInBytes());
- buf = nullptr;
- }
- }
-
- void destroyElements()
- {
- if (!std::is_trivially_destructible_v<Cell>)
- for (iterator it = begin(), it_end = end(); it != it_end; ++it)
- it.ptr->~Cell();
- }
-
-
- template <typename Derived, bool is_const>
- class iterator_base
- {
- using Container = std::conditional_t<is_const, const Self, Self>;
- using cell_type = std::conditional_t<is_const, const Cell, Cell>;
-
- Container * container;
- cell_type * ptr;
-
- friend class FixedHashTable;
-
- public:
- iterator_base() {}
- iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_)
- {
- cell.update(ptr - container->buf, ptr);
- }
-
- bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; }
- bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; }
-
- Derived & operator++()
- {
- ++ptr;
-
- /// Skip empty cells in the main buffer.
- auto buf_end = container->buf + container->NUM_CELLS;
- while (ptr < buf_end && ptr->isZero(*container))
- ++ptr;
-
- return static_cast<Derived &>(*this);
- }
-
- auto & operator*()
- {
- if (cell.key != ptr - container->buf)
- cell.update(ptr - container->buf, ptr);
- return cell;
- }
- auto * operator-> ()
- {
- if (cell.key != ptr - container->buf)
- cell.update(ptr - container->buf, ptr);
- return &cell;
- }
-
- auto getPtr() const { return ptr; }
- size_t getHash() const { return ptr - container->buf; }
- size_t getCollisionChainLength() const { return 0; }
- typename cell_type::CellExt cell;
- };
-
-
-public:
- using key_type = Key;
- using mapped_type = typename Cell::mapped_type;
- using value_type = typename Cell::value_type;
- using cell_type = Cell;
-
- using LookupResult = Cell *;
- using ConstLookupResult = const Cell *;
-
-
- size_t hash(const Key & x) const { return x; }
-
- FixedHashTable() { alloc(); }
-
- FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); }
-
- ~FixedHashTable()
- {
- destroyElements();
- free();
- }
-
- FixedHashTable & operator=(FixedHashTable && rhs)
- {
- destroyElements();
- free();
-
- std::swap(buf, rhs.buf);
- this->setSize(rhs.size());
-
- Allocator::operator=(std::move(rhs));
- Cell::State::operator=(std::move(rhs));
-
- return *this;
- }
-
- class Reader final : private Cell::State
- {
- public:
- Reader(DB::ReadBuffer & in_) : in(in_) {}
-
- Reader(const Reader &) = delete;
- Reader & operator=(const Reader &) = delete;
-
- bool next()
- {
- if (!is_initialized)
- {
- Cell::State::read(in);
- DB::readVarUInt(size, in);
- is_initialized = true;
- }
-
- if (read_count == size)
- {
- is_eof = true;
- return false;
- }
-
- cell.read(in);
- ++read_count;
-
- return true;
- }
-
- inline const value_type & get() const
- {
- if (!is_initialized || is_eof)
- throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
-
- return cell.getValue();
- }
-
- private:
- DB::ReadBuffer & in;
- Cell cell;
- size_t read_count = 0;
- size_t size = 0;
- bool is_eof = false;
- bool is_initialized = false;
- };
-
-
- class iterator : public iterator_base<iterator, false>
- {
- public:
- using iterator_base<iterator, false>::iterator_base;
- };
-
- class const_iterator : public iterator_base<const_iterator, true>
- {
- public:
- using iterator_base<const_iterator, true>::iterator_base;
- };
-
-
- const_iterator begin() const
- {
- if (!buf)
- return end();
-
- const Cell * ptr = buf;
- auto buf_end = buf + NUM_CELLS;
- while (ptr < buf_end && ptr->isZero(*this))
- ++ptr;
-
- return const_iterator(this, ptr);
- }
-
- const_iterator cbegin() const { return begin(); }
-
- iterator begin()
- {
- if (!buf)
- return end();
-
- Cell * ptr = buf;
- auto buf_end = buf + NUM_CELLS;
- while (ptr < buf_end && ptr->isZero(*this))
- ++ptr;
-
- return iterator(this, ptr);
- }
-
- const_iterator end() const
- {
- /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
- return const_iterator(this, buf ? buf + NUM_CELLS : buf);
- }
-
- const_iterator cend() const
- {
- return end();
- }
-
- iterator end()
- {
- return iterator(this, buf ? buf + NUM_CELLS : buf);
- }
-
-
-public:
- /// The last parameter is unused but exists for compatibility with HashTable interface.
- void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
- {
- it = &buf[x];
-
- if (!buf[x].isZero(*this))
- {
- inserted = false;
- return;
- }
-
- new (&buf[x]) Cell(x, *this);
- inserted = true;
- this->increaseSize();
- }
-
- std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x)
- {
- std::pair<LookupResult, bool> res;
- emplace(Cell::getKey(x), res.first, res.second);
- if (res.second)
- insertSetMapped(res.first->getMapped(), x);
-
- return res;
- }
-
- LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; }
-
- ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); }
-
- LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; }
-
- ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const
- {
- return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value);
- }
-
- bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
- bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
-
- void write(DB::WriteBuffer & wb) const
- {
- Cell::State::write(wb);
- DB::writeVarUInt(size(), wb);
-
- if (!buf)
- return;
-
- for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
- {
- if (!ptr->isZero(*this))
- {
- DB::writeVarUInt(ptr - buf);
- ptr->write(wb);
- }
- }
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- Cell::State::writeText(wb);
- DB::writeText(size(), wb);
-
- if (!buf)
- return;
-
- for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
- {
- if (!ptr->isZero(*this))
- {
- DB::writeChar(',', wb);
- DB::writeText(ptr - buf, wb);
- DB::writeChar(',', wb);
- ptr->writeText(wb);
- }
- }
- }
-
- void read(DB::ReadBuffer & rb)
- {
- Cell::State::read(rb);
- destroyElements();
- size_t m_size;
- DB::readVarUInt(m_size, rb);
- this->setSize(m_size);
- free();
- alloc();
-
- for (size_t i = 0; i < m_size; ++i)
- {
- size_t place_value = 0;
- DB::readVarUInt(place_value, rb);
- Cell x;
- x.read(rb);
- new (&buf[place_value]) Cell(x, *this);
- }
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- Cell::State::readText(rb);
- destroyElements();
- size_t m_size;
- DB::readText(m_size, rb);
- this->setSize(m_size);
- free();
- alloc();
-
- for (size_t i = 0; i < m_size; ++i)
- {
- size_t place_value = 0;
- DB::assertChar(',', rb);
- DB::readText(place_value, rb);
- Cell x;
- DB::assertChar(',', rb);
- x.readText(rb);
- new (&buf[place_value]) Cell(x, *this);
- }
- }
-
- size_t size() const { return this->getSize(buf, *this, NUM_CELLS); }
- bool empty() const { return this->isEmpty(buf, *this, NUM_CELLS); }
-
- void clear()
- {
- destroyElements();
- this->clearSize();
-
- memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf));
- }
-
- /// After executing this function, the table can only be destroyed,
- /// and also you can use the methods `size`, `empty`, `begin`, `end`.
- void clearAndShrink()
- {
- destroyElements();
- this->clearSize();
- free();
- }
-
- size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); }
-
- size_t getBufferSizeInCells() const { return NUM_CELLS; }
-
- /// Return offset for result in internal buffer.
- /// Result can have value up to `getBufferSizeInCells() + 1`
- /// because offset for zero value considered to be 0
- /// and for other values it will be `offset in buffer + 1`
- size_t offsetInternal(ConstLookupResult ptr) const
- {
- if (ptr->isZero(*this))
- return 0;
- return ptr - buf + 1;
- }
-
- const Cell * data() const { return buf; }
- Cell * data() { return buf; }
-
-#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
- size_t getCollisions() const { return 0; }
-#endif
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h
deleted file mode 100644
index 298580dc837..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashMap.h
+++ /dev/null
@@ -1,189 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/HashMap.h>
-#include <Common/HashTable/HashTableAllocator.h>
-#include <Common/HashTable/StringHashTable.h>
-
-template <typename Key, typename TMapped>
-struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
-{
- using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
- using value_type = typename Base::value_type;
- using Base::Base;
- static constexpr bool need_zero_value_storage = false;
- // external
- const StringRef getKey() const { return toStringRef(this->value.first); }
- // internal
- static const Key & getKey(const value_type & value_) { return value_.first; }
-};
-
-template <typename TMapped>
-struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
-{
- using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
- using value_type = typename Base::value_type;
- using Base::Base;
- static constexpr bool need_zero_value_storage = false;
- bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
-
- // Zero means unoccupied cells in hash table. Use key with last word = 0 as
- // zero keys, because such keys are unrepresentable (no way to encode length).
- static bool isZero(const StringKey16 & key, const HashTableNoState &) { return key.items[1] == 0; }
- void setZero() { this->value.first.items[1] = 0; }
-
- // external
- const StringRef getKey() const { return toStringRef(this->value.first); }
- // internal
- static const StringKey16 & getKey(const value_type & value_) { return value_.first; }
-};
-
-template <typename TMapped>
-struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
-{
- using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
- using value_type = typename Base::value_type;
- using Base::Base;
- static constexpr bool need_zero_value_storage = false;
- bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
-
- // Zero means unoccupied cells in hash table. Use key with last word = 0 as
- // zero keys, because such keys are unrepresentable (no way to encode length).
- static bool isZero(const StringKey24 & key, const HashTableNoState &)
- { return key.c == 0; }
- void setZero() { this->value.first.c = 0; }
-
- // external
- const StringRef getKey() const { return toStringRef(this->value.first); }
- // internal
- static const StringKey24 & getKey(const value_type & value_) { return value_.first; }
-};
-
-template <typename TMapped>
-struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
-{
- using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
- using value_type = typename Base::value_type;
- using Base::Base;
- static constexpr bool need_zero_value_storage = false;
- // external
- using Base::getKey;
- // internal
- static const StringRef & getKey(const value_type & value_) { return value_.first; }
-};
-
-template <typename TMapped, typename Allocator>
-struct StringHashMapSubMaps
-{
- using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>;
- using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
-};
-
-template <typename TMapped, typename Allocator = HashTableAllocator>
-class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
-{
-public:
- using Key = StringRef;
- using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
- using Self = StringHashMap;
- using LookupResult = typename Base::LookupResult;
-
- using Base::Base;
-
- /// Merge every cell's value of current map into the destination map.
- /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
- /// Each filled cell in current map will invoke func once. If that map doesn't
- /// have a key equals to the given cell, a new cell gets emplaced into that map,
- /// and func is invoked with the third argument emplaced set to true. Otherwise
- /// emplaced is set to false.
- template <typename Func>
- void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
- {
- if (this->m0.hasZero() && that.m0.hasZero())
- func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
- else if (this->m0.hasZero())
- {
- that.m0.setHasZero();
- func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
- }
- this->m1.mergeToViaEmplace(that.m1, func);
- this->m2.mergeToViaEmplace(that.m2, func);
- this->m3.mergeToViaEmplace(that.m3, func);
- this->ms.mergeToViaEmplace(that.ms, func);
- }
-
- /// Merge every cell's value of current map into the destination map via find.
- /// Func should have signature void(Mapped & dst, Mapped & src, bool exist).
- /// Each filled cell in current map will invoke func once. If that map doesn't
- /// have a key equals to the given cell, func is invoked with the third argument
- /// exist set to false. Otherwise exist is set to true.
- template <typename Func>
- void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
- {
- if (this->m0.size() && that.m0.size())
- func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
- else if (this->m0.size())
- func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
- this->m1.mergeToViaFind(that.m1, func);
- this->m2.mergeToViaFind(that.m2, func);
- this->m3.mergeToViaFind(that.m3, func);
- this->ms.mergeToViaFind(that.ms, func);
- }
-
- TMapped & ALWAYS_INLINE operator[](const Key & x)
- {
- LookupResult it;
- bool inserted;
- this->emplace(x, it, inserted);
- if (inserted)
- new (&it->getMapped()) TMapped();
-
- return it->getMapped();
- }
-
- template <typename Func>
- void ALWAYS_INLINE forEachValue(Func && func)
- {
- if (this->m0.size())
- {
- func(StringRef{}, this->m0.zeroValue()->getMapped());
- }
-
- for (auto & v : this->m1)
- {
- func(v.getKey(), v.getMapped());
- }
-
- for (auto & v : this->m2)
- {
- func(v.getKey(), v.getMapped());
- }
-
- for (auto & v : this->m3)
- {
- func(v.getKey(), v.getMapped());
- }
-
- for (auto & v : this->ms)
- {
- func(v.getKey(), v.getMapped());
- }
- }
-
- template <typename Func>
- void ALWAYS_INLINE forEachMapped(Func && func)
- {
- if (this->m0.size())
- func(this->m0.zeroValue()->getMapped());
- for (auto & v : this->m1)
- func(v.getMapped());
- for (auto & v : this->m2)
- func(v.getMapped());
- for (auto & v : this->m3)
- func(v.getMapped());
- for (auto & v : this->ms)
- func(v.getMapped());
- }
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashSet.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashSet.h
deleted file mode 100644
index 0466dca3574..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashSet.h
+++ /dev/null
@@ -1,101 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/HashSet.h>
-#include <Common/HashTable/HashTableAllocator.h>
-#include <Common/HashTable/StringHashTable.h>
-
-template <typename Key>
-struct StringHashSetCell : public HashTableCell<Key, StringHashTableHash, HashTableNoState>
-{
- using Base = HashTableCell<Key, StringHashTableHash, HashTableNoState>;
- using Base::Base;
-
- VoidMapped void_map;
- VoidMapped & getMapped() { return void_map; }
- const VoidMapped & getMapped() const { return void_map; }
-
- static constexpr bool need_zero_value_storage = false;
-};
-
-template <>
-struct StringHashSetCell<StringKey16> : public HashTableCell<StringKey16, StringHashTableHash, HashTableNoState>
-{
- using Base = HashTableCell<StringKey16, StringHashTableHash, HashTableNoState>;
- using Base::Base;
-
- VoidMapped void_map;
- VoidMapped & getMapped() { return void_map; }
- const VoidMapped & getMapped() const { return void_map; }
-
- static constexpr bool need_zero_value_storage = false;
-
- bool isZero(const HashTableNoState & state) const { return isZero(this->key, state); }
- // Zero means unoccupied cells in hash table. Use key with last word = 0 as
- // zero keys, because such keys are unrepresentable (no way to encode length).
- static bool isZero(const StringKey16 & key_, const HashTableNoState &)
- { return key_.items[1] == 0; }
- void setZero() { this->key.items[1] = 0; }
-};
-
-template <>
-struct StringHashSetCell<StringKey24> : public HashTableCell<StringKey24, StringHashTableHash, HashTableNoState>
-{
- using Base = HashTableCell<StringKey24, StringHashTableHash, HashTableNoState>;
- using Base::Base;
-
- VoidMapped void_map;
- VoidMapped & getMapped() { return void_map; }
- const VoidMapped & getMapped() const { return void_map; }
-
- static constexpr bool need_zero_value_storage = false;
-
- bool isZero(const HashTableNoState & state) const { return isZero(this->key, state); }
- // Zero means unoccupied cells in hash table. Use key with last word = 0 as
- // zero keys, because such keys are unrepresentable (no way to encode length).
- static bool isZero(const StringKey24 & key_, const HashTableNoState &)
- { return key_.c == 0; }
- void setZero() { this->key.c = 0; }
-};
-
-template <>
-struct StringHashSetCell<StringRef> : public HashSetCellWithSavedHash<StringRef, StringHashTableHash, HashTableNoState>
-{
- using Base = HashSetCellWithSavedHash<StringRef, StringHashTableHash, HashTableNoState>;
- using Base::Base;
-
- VoidMapped void_map;
- VoidMapped & getMapped() { return void_map; }
- const VoidMapped & getMapped() const { return void_map; }
-
- static constexpr bool need_zero_value_storage = false;
-};
-
-template <typename Allocator>
-struct StringHashSetSubMaps
-{
- using T0 = StringHashTableEmpty<StringHashSetCell<StringRef>>;
- using T1 = HashSetTable<StringKey8, StringHashSetCell<StringKey8>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using T2 = HashSetTable<StringKey16, StringHashSetCell<StringKey16>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using T3 = HashSetTable<StringKey24, StringHashSetCell<StringKey24>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
- using Ts = HashSetTable<StringRef, StringHashSetCell<StringRef>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
-};
-
-template <typename Allocator = HashTableAllocator>
-class StringHashSet : public StringHashTable<StringHashSetSubMaps<Allocator>>
-{
-public:
- using Key = StringRef;
- using Base = StringHashTable<StringHashSetSubMaps<Allocator>>;
- using Self = StringHashSet;
- using LookupResult = typename Base::LookupResult;
-
- using Base::Base;
-
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, bool & inserted)
- {
- LookupResult it;
- Base::emplace(key_holder, it, inserted);
- }
-
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h
deleted file mode 100644
index d30271d65db..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/StringHashTable.h
+++ /dev/null
@@ -1,435 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/HashMap.h>
-#include <Common/HashTable/HashTable.h>
-
-#include <new>
-#include <variant>
-
-
-using StringKey8 = UInt64;
-using StringKey16 = DB::UInt128;
-struct StringKey24
-{
- UInt64 a;
- UInt64 b;
- UInt64 c;
-
- bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
-};
-
-inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
-{
- assert(n != 0);
- return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)};
-}
-inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n)
-{
- assert(n.items[1] != 0);
- return {reinterpret_cast<const char *>(&n), 16ul - (__builtin_clzll(n.items[1]) >> 3)};
-}
-inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
-{
- assert(n.c != 0);
- return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
-}
-
-struct StringHashTableHash
-{
-#if defined(__SSE4_2__)
- size_t ALWAYS_INLINE operator()(StringKey8 key) const
- {
- size_t res = -1ULL;
- res = _mm_crc32_u64(res, key);
- return res;
- }
- size_t ALWAYS_INLINE operator()(StringKey16 key) const
- {
- size_t res = -1ULL;
- res = _mm_crc32_u64(res, key.items[0]);
- res = _mm_crc32_u64(res, key.items[1]);
- return res;
- }
- size_t ALWAYS_INLINE operator()(StringKey24 key) const
- {
- size_t res = -1ULL;
- res = _mm_crc32_u64(res, key.a);
- res = _mm_crc32_u64(res, key.b);
- res = _mm_crc32_u64(res, key.c);
- return res;
- }
-#else
- size_t ALWAYS_INLINE operator()(StringKey8 key) const
- {
- return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 8);
- }
- size_t ALWAYS_INLINE operator()(StringKey16 key) const
- {
- return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 16);
- }
- size_t ALWAYS_INLINE operator()(StringKey24 key) const
- {
- return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 24);
- }
-#endif
- size_t ALWAYS_INLINE operator()(StringRef key) const
- {
- return StringRefHash()(key);
- }
-};
-
-template <typename Cell>
-struct StringHashTableEmpty //-V730
-{
- using Self = StringHashTableEmpty;
-
- bool has_zero = false;
- std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key.
-
-public:
- bool hasZero() const { return has_zero; }
-
- void setHasZero()
- {
- has_zero = true;
- new (zeroValue()) Cell();
- }
-
- void setHasZero(const Cell & other)
- {
- has_zero = true;
- new (zeroValue()) Cell(other);
- }
-
- void clearHasZero()
- {
- has_zero = false;
- if (!std::is_trivially_destructible_v<Cell>)
- zeroValue()->~Cell();
- }
-
- Cell * zeroValue() { return std::launder(reinterpret_cast<Cell *>(&zero_value_storage)); }
- const Cell * zeroValue() const { return std::launder(reinterpret_cast<const Cell *>(&zero_value_storage)); }
-
- using LookupResult = Cell *;
- using ConstLookupResult = const Cell *;
-
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0)
- {
- if (!hasZero())
- {
- setHasZero();
- inserted = true;
- }
- else
- inserted = false;
- it = zeroValue();
- }
-
- template <typename Key>
- LookupResult ALWAYS_INLINE find(const Key &, size_t = 0)
- {
- return hasZero() ? zeroValue() : nullptr;
- }
-
- template <typename Key>
- ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const
- {
- return hasZero() ? zeroValue() : nullptr;
- }
-
- void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
- void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
- void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); }
- void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); }
- size_t size() const { return hasZero() ? 1 : 0; }
- bool empty() const { return !hasZero(); }
- size_t getBufferSizeInBytes() const { return sizeof(Cell); }
- size_t getCollisions() const { return 0; }
-};
-
-template <size_t initial_size_degree = 8>
-struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
-{
- // Smooth growing for string maps
- void increaseSize() { this->size_degree += 1; }
-};
-
-template <typename Mapped>
-struct StringHashTableLookupResult
-{
- Mapped * mapped_ptr;
- StringHashTableLookupResult() {}
- StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
- StringHashTableLookupResult(std::nullptr_t) {}
- const VoidKey getKey() const { return {}; }
- auto & getMapped() { return *mapped_ptr; }
- auto & operator*() { return *this; }
- auto & operator*() const { return *this; }
- auto * operator->() { return this; }
- auto * operator->() const { return this; }
- operator bool() const { return mapped_ptr; }
- friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; }
- friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; }
- friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; }
- friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; }
-};
-
-template <typename SubMaps>
-class StringHashTable : private boost::noncopyable
-{
-protected:
- static constexpr size_t NUM_MAPS = 5;
- // Map for storing empty string
- using T0 = typename SubMaps::T0;
-
- // Short strings are stored as numbers
- using T1 = typename SubMaps::T1;
- using T2 = typename SubMaps::T2;
- using T3 = typename SubMaps::T3;
-
- // Long strings are stored as StringRef along with saved hash
- using Ts = typename SubMaps::Ts;
- using Self = StringHashTable;
-
- template <typename, typename, size_t>
- friend class TwoLevelStringHashTable;
-
- T0 m0;
- T1 m1;
- T2 m2;
- T3 m3;
- Ts ms;
-
-public:
- using Key = StringRef;
- using key_type = Key;
- using mapped_type = typename Ts::mapped_type;
- using value_type = typename Ts::value_type;
- using cell_type = typename Ts::cell_type;
-
- using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>;
- using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>;
-
- StringHashTable() = default;
-
- StringHashTable(size_t reserve_for_num_elements)
- : m1{reserve_for_num_elements / 4}
- , m2{reserve_for_num_elements / 4}
- , m3{reserve_for_num_elements / 4}
- , ms{reserve_for_num_elements / 4}
- {
- }
-
- StringHashTable(StringHashTable && rhs)
- : m1(std::move(rhs.m1))
- , m2(std::move(rhs.m2))
- , m3(std::move(rhs.m3))
- , ms(std::move(rhs.ms))
- {
- }
-
- ~StringHashTable() = default;
-
-public:
- // Dispatch is written in a way that maximizes the performance:
- // 1. Always memcpy 8 times bytes
- // 2. Use switch case extension to generate fast dispatching table
- // 3. Funcs are named callables that can be force_inlined
- //
- // NOTE: It relies on Little Endianness
- //
- // NOTE: It requires padded to 8 bytes keys (IOW you cannot pass
- // std::string here, but you can pass i.e. ColumnString::getDataAt()),
- // since it copies 8 bytes at a time.
- template <typename Self, typename KeyHolder, typename Func>
- static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
- {
- StringHashTableHash hash;
- const StringRef & x = keyHolderGetKey(key_holder);
- const size_t sz = x.size;
- if (sz == 0)
- {
- keyHolderDiscardKey(key_holder);
- return func(self.m0, VoidKey{}, 0);
- }
-
- if (x.data[sz - 1] == 0)
- {
- // Strings with trailing zeros are not representable as fixed-size
- // string keys. Put them to the generic table.
- return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
- }
-
- const char * p = x.data;
- // pending bits that needs to be shifted out
- const char s = (-sz & 7) * 8;
- union
- {
- StringKey8 k8;
- StringKey16 k16;
- StringKey24 k24;
- UInt64 n[3];
- };
- switch ((sz - 1) >> 3)
- {
- case 0: // 1..8 bytes
- {
- // first half page
- if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
- {
- memcpy(&n[0], p, 8);
- n[0] &= -1ul >> s;
- }
- else
- {
- const char * lp = x.data + x.size - 8;
- memcpy(&n[0], lp, 8);
- n[0] >>= s;
- }
- keyHolderDiscardKey(key_holder);
- return func(self.m1, k8, hash(k8));
- }
- case 1: // 9..16 bytes
- {
- memcpy(&n[0], p, 8);
- const char * lp = x.data + x.size - 8;
- memcpy(&n[1], lp, 8);
- n[1] >>= s;
- keyHolderDiscardKey(key_holder);
- return func(self.m2, k16, hash(k16));
- }
- case 2: // 17..24 bytes
- {
- memcpy(&n[0], p, 16);
- const char * lp = x.data + x.size - 8;
- memcpy(&n[2], lp, 8);
- n[2] >>= s;
- keyHolderDiscardKey(key_holder);
- return func(self.m3, k24, hash(k24));
- }
- default: // >= 25 bytes
- {
- return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
- }
- }
- }
-
- struct EmplaceCallable
- {
- LookupResult & mapped;
- bool & inserted;
-
- EmplaceCallable(LookupResult & mapped_, bool & inserted_)
- : mapped(mapped_), inserted(inserted_) {}
-
- template <typename Map, typename KeyHolder>
- void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash)
- {
- typename Map::LookupResult result;
- map.emplace(key_holder, result, inserted, hash);
- mapped = &result->getMapped();
- }
- };
-
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
- {
- this->dispatch(*this, key_holder, EmplaceCallable(it, inserted));
- }
-
- struct FindCallable
- {
- // find() doesn't need any key memory management, so we don't work with
- // any key holders here, only with normal keys. The key type is still
- // different for every subtable, this is why it is a template parameter.
- template <typename Submap, typename SubmapKey>
- auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
- {
- auto it = map.find(key, hash);
- if (!it)
- return decltype(&it->getMapped()){};
- else
- return &it->getMapped();
- }
- };
-
- LookupResult ALWAYS_INLINE find(const Key & x)
- {
- return dispatch(*this, x, FindCallable{});
- }
-
- ConstLookupResult ALWAYS_INLINE find(const Key & x) const
- {
- return dispatch(*this, x, FindCallable{});
- }
-
- bool ALWAYS_INLINE has(const Key & x, size_t = 0) const
- {
- return dispatch(*this, x, FindCallable{}) != nullptr;
- }
-
- void write(DB::WriteBuffer & wb) const
- {
- m0.write(wb);
- m1.write(wb);
- m2.write(wb);
- m3.write(wb);
- ms.write(wb);
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- m0.writeText(wb);
- DB::writeChar(',', wb);
- m1.writeText(wb);
- DB::writeChar(',', wb);
- m2.writeText(wb);
- DB::writeChar(',', wb);
- m3.writeText(wb);
- DB::writeChar(',', wb);
- ms.writeText(wb);
- }
-
- void read(DB::ReadBuffer & rb)
- {
- m0.read(rb);
- m1.read(rb);
- m2.read(rb);
- m3.read(rb);
- ms.read(rb);
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- m0.readText(rb);
- DB::assertChar(',', rb);
- m1.readText(rb);
- DB::assertChar(',', rb);
- m2.readText(rb);
- DB::assertChar(',', rb);
- m3.readText(rb);
- DB::assertChar(',', rb);
- ms.readText(rb);
- }
-
- size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); }
-
- bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); }
-
- size_t getBufferSizeInBytes() const
- {
- return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes()
- + ms.getBufferSizeInBytes();
- }
-
- void clearAndShrink()
- {
- m1.clearHasZero();
- m1.clearAndShrink();
- m2.clearAndShrink();
- m3.clearAndShrink();
- ms.clearAndShrink();
- }
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h
deleted file mode 100644
index 7bebf0d8af5..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashMap.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/TwoLevelHashTable.h>
-#include <Common/HashTable/HashMap.h>
-
-
-template
-<
- typename Key,
- typename Cell,
- typename Hash = DefaultHash<Key>,
- typename Grower = TwoLevelHashTableGrower<>,
- typename Allocator = HashTableAllocator,
- template <typename ...> typename ImplTable = HashMapTable
->
-class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>
-{
-public:
- using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>;
- using LookupResult = typename Impl::LookupResult;
-
- using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>::TwoLevelHashTable;
-
- template <typename Func>
- void ALWAYS_INLINE forEachMapped(Func && func)
- {
- for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
- this->impls[i].forEachMapped(func);
- }
-
- typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
- {
- LookupResult it;
- bool inserted;
- this->emplace(x, it, inserted);
-
- if (inserted)
- new (&it->getMapped()) typename Cell::Mapped();
-
- return it->getMapped();
- }
-};
-
-
-template
-<
- typename Key,
- typename Mapped,
- typename Hash = DefaultHash<Key>,
- typename Grower = TwoLevelHashTableGrower<>,
- typename Allocator = HashTableAllocator,
- template <typename ...> typename ImplTable = HashMapTable
->
-using TwoLevelHashMap = TwoLevelHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>;
-
-
-template
-<
- typename Key,
- typename Mapped,
- typename Hash = DefaultHash<Key>,
- typename Grower = TwoLevelHashTableGrower<>,
- typename Allocator = HashTableAllocator,
- template <typename ...> typename ImplTable = HashMapTable
->
-using TwoLevelHashMapWithSavedHash = TwoLevelHashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator, ImplTable>;
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h
deleted file mode 100644
index 14afb91c071..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelHashTable.h
+++ /dev/null
@@ -1,335 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/HashTable.h>
-
-
-/** Two-level hash table.
- * Represents 256 (or 1ULL << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
- * To determine which one to use, one of the bytes of the hash function is taken.
- *
- * Usually works a little slower than a simple hash table.
- * However, it has advantages in some cases:
- * - if you need to merge two hash tables together, then you can easily parallelize it by buckets;
- * - delay during resizes is amortized, since the small hash tables will be resized separately;
- * - in theory, resizes are cache-local in a larger range of sizes.
- */
-
-template <size_t initial_size_degree = 8>
-struct TwoLevelHashTableGrower : public HashTableGrower<initial_size_degree>
-{
- /// Increase the size of the hash table.
- void increaseSize()
- {
- this->size_degree += this->size_degree >= 15 ? 1 : 2;
- }
-};
-
-template
-<
- typename Key,
- typename Cell,
- typename Hash,
- typename Grower,
- typename Allocator,
- typename ImplTable = HashTable<Key, Cell, Hash, Grower, Allocator>,
- size_t BITS_FOR_BUCKET = 8
->
-class TwoLevelHashTable :
- private boost::noncopyable,
- protected Hash /// empty base optimization
-{
-protected:
- friend class const_iterator;
- friend class iterator;
-
- using HashValue = size_t;
- using Self = TwoLevelHashTable;
-public:
- using Impl = ImplTable;
-
- static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
- static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
-
- size_t hash(const Key & x) const { return Hash::operator()(x); }
-
- /// NOTE Bad for hash tables with more than 2^32 cells.
- static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
-
-protected:
- typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket)
- {
- while (bucket != NUM_BUCKETS && impls[bucket].empty())
- ++bucket;
-
- if (bucket != NUM_BUCKETS)
- return impls[bucket].begin();
-
- --bucket;
- return impls[MAX_BUCKET].end();
- }
-
- typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const
- {
- while (bucket != NUM_BUCKETS && impls[bucket].empty())
- ++bucket;
-
- if (bucket != NUM_BUCKETS)
- return impls[bucket].begin();
-
- --bucket;
- return impls[MAX_BUCKET].end();
- }
-
-public:
- using key_type = typename Impl::key_type;
- using mapped_type = typename Impl::mapped_type;
- using value_type = typename Impl::value_type;
- using cell_type = typename Impl::cell_type;
-
- using LookupResult = typename Impl::LookupResult;
- using ConstLookupResult = typename Impl::ConstLookupResult;
-
- Impl impls[NUM_BUCKETS];
-
-
- TwoLevelHashTable() {}
-
- /// Copy the data from another (normal) hash table. It should have the same hash function.
- template <typename Source>
- TwoLevelHashTable(const Source & src)
- {
- typename Source::const_iterator it = src.begin();
-
- /// It is assumed that the zero key (stored separately) is first in iteration order.
- if (it != src.end() && it.getPtr()->isZero(src))
- {
- insert(it->getValue());
- ++it;
- }
-
- for (; it != src.end(); ++it)
- {
- const Cell * cell = it.getPtr();
- size_t hash_value = cell->getHash(src);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].insertUniqueNonZero(cell, hash_value);
- }
- }
-
-
- class iterator
- {
- Self * container{};
- size_t bucket{};
- typename Impl::iterator current_it{};
-
- friend class TwoLevelHashTable;
-
- iterator(Self * container_, size_t bucket_, typename Impl::iterator current_it_)
- : container(container_), bucket(bucket_), current_it(current_it_) {}
-
- public:
- iterator() {}
-
- bool operator== (const iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; }
- bool operator!= (const iterator & rhs) const { return !(*this == rhs); }
-
- iterator & operator++()
- {
- ++current_it;
- if (current_it == container->impls[bucket].end())
- {
- ++bucket;
- current_it = container->beginOfNextNonEmptyBucket(bucket);
- }
-
- return *this;
- }
-
- Cell & operator* () const { return *current_it; }
- Cell * operator->() const { return current_it.getPtr(); }
-
- Cell * getPtr() const { return current_it.getPtr(); }
- size_t getHash() const { return current_it.getHash(); }
- };
-
-
- class const_iterator
- {
- Self * container{};
- size_t bucket{};
- typename Impl::const_iterator current_it{};
-
- friend class TwoLevelHashTable;
-
- const_iterator(Self * container_, size_t bucket_, typename Impl::const_iterator current_it_)
- : container(container_), bucket(bucket_), current_it(current_it_) {}
-
- public:
- const_iterator() {}
- const_iterator(const iterator & rhs) : container(rhs.container), bucket(rhs.bucket), current_it(rhs.current_it) {}
-
- bool operator== (const const_iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; }
- bool operator!= (const const_iterator & rhs) const { return !(*this == rhs); }
-
- const_iterator & operator++()
- {
- ++current_it;
- if (current_it == container->impls[bucket].end())
- {
- ++bucket;
- current_it = container->beginOfNextNonEmptyBucket(bucket);
- }
-
- return *this;
- }
-
- const Cell & operator* () const { return *current_it; }
- const Cell * operator->() const { return current_it->getPtr(); }
-
- const Cell * getPtr() const { return current_it.getPtr(); }
- size_t getHash() const { return current_it.getHash(); }
- };
-
-
- const_iterator begin() const
- {
- size_t buck = 0;
- typename Impl::const_iterator impl_it = beginOfNextNonEmptyBucket(buck);
- return { this, buck, impl_it };
- }
-
- iterator begin()
- {
- size_t buck = 0;
- typename Impl::iterator impl_it = beginOfNextNonEmptyBucket(buck);
- return { this, buck, impl_it };
- }
-
- const_iterator end() const { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
- iterator end() { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
-
-
- /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
- std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x)
- {
- size_t hash_value = hash(Cell::getKey(x));
-
- std::pair<LookupResult, bool> res;
- emplace(Cell::getKey(x), res.first, res.second, hash_value);
-
- if (res.second)
- insertSetMapped(res.first->getMapped(), x);
-
- return res;
- }
-
-
- /** Insert the key,
- * return an iterator to a position that can be used for `placement new` of value,
- * as well as the flag - whether a new key was inserted.
- *
- * You have to make `placement new` values if you inserted a new key,
- * since when destroying a hash table, the destructor will be invoked for it!
- *
- * Example usage:
- *
- * Map::iterator it;
- * bool inserted;
- * map.emplace(key, it, inserted);
- * if (inserted)
- * new(&it->second) Mapped(value);
- */
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
- {
- size_t hash_value = hash(keyHolderGetKey(key_holder));
- emplace(key_holder, it, inserted, hash_value);
- }
-
-
- /// Same, but with a precalculated values of hash function.
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it,
- bool & inserted, size_t hash_value)
- {
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].emplace(key_holder, it, inserted, hash_value);
- }
-
- LookupResult ALWAYS_INLINE find(Key x, size_t hash_value)
- {
- size_t buck = getBucketFromHash(hash_value);
- return impls[buck].find(x, hash_value);
- }
-
- ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const
- {
- return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
- }
-
- LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); }
-
- ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x, hash(x)); }
-
-
- void write(DB::WriteBuffer & wb) const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- impls[i].write(wb);
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::writeChar(',', wb);
- impls[i].writeText(wb);
- }
- }
-
- void read(DB::ReadBuffer & rb)
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- impls[i].read(rb);
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::assertChar(',', rb);
- impls[i].readText(rb);
- }
- }
-
-
- size_t size() const
- {
- size_t res = 0;
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].size();
-
- return res;
- }
-
- bool empty() const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- if (!impls[i].empty())
- return false;
-
- return true;
- }
-
- size_t getBufferSizeInBytes() const
- {
- size_t res = 0;
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].getBufferSizeInBytes();
-
- return res;
- }
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h
deleted file mode 100644
index 6bd8f74dbd6..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashMap.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/StringHashMap.h>
-#include <Common/HashTable/TwoLevelStringHashTable.h>
-
-template <typename TMapped, typename Allocator = HashTableAllocator, template <typename...> typename ImplTable = StringHashMap>
-class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>>
-{
-public:
- using Key = StringRef;
- using Self = TwoLevelStringHashMap;
- using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
- using LookupResult = typename Base::LookupResult;
-
- using Base::Base;
-
- template <typename Func>
- void ALWAYS_INLINE forEachMapped(Func && func)
- {
- for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
- this->impls[i].forEachMapped(func);
- }
-
- TMapped & ALWAYS_INLINE operator[](const Key & x)
- {
- bool inserted;
- LookupResult it;
- this->emplace(x, it, inserted);
- if (inserted)
- new (&it->getMapped()) TMapped();
- return it->getMapped();
- }
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h
deleted file mode 100644
index 93bbcb2835d..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/HashTable/TwoLevelStringHashTable.h
+++ /dev/null
@@ -1,235 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/StringHashTable.h>
-
-template <typename SubMaps, typename ImplTable = StringHashTable<SubMaps>, size_t BITS_FOR_BUCKET = 8>
-class TwoLevelStringHashTable : private boost::noncopyable
-{
-protected:
- using HashValue = size_t;
- using Self = TwoLevelStringHashTable;
-
-public:
- using Key = StringRef;
- using Impl = ImplTable;
-
- static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
- static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
-
- // TODO: currently hashing contains redundant computations when doing distributed or external aggregations
- size_t hash(const Key & x) const
- {
- return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; });
- }
-
- size_t operator()(const Key & x) const { return hash(x); }
-
- /// NOTE Bad for hash tables with more than 2^32 cells.
- static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
-
-public:
- using key_type = typename Impl::key_type;
- using mapped_type = typename Impl::mapped_type;
- using value_type = typename Impl::value_type;
- using cell_type = typename Impl::cell_type;
-
- using LookupResult = typename Impl::LookupResult;
- using ConstLookupResult = typename Impl::ConstLookupResult;
-
- Impl impls[NUM_BUCKETS];
-
- TwoLevelStringHashTable() {}
-
- template <typename Source>
- TwoLevelStringHashTable(const Source & src)
- {
- if (src.m0.hasZero())
- impls[0].m0.setHasZero(*src.m0.zeroValue());
-
- for (auto & v : src.m1)
- {
- size_t hash_value = v.getHash(src.m1);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].m1.insertUniqueNonZero(&v, hash_value);
- }
- for (auto & v : src.m2)
- {
- size_t hash_value = v.getHash(src.m2);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].m2.insertUniqueNonZero(&v, hash_value);
- }
- for (auto & v : src.m3)
- {
- size_t hash_value = v.getHash(src.m3);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].m3.insertUniqueNonZero(&v, hash_value);
- }
- for (auto & v : src.ms)
- {
- size_t hash_value = v.getHash(src.ms);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].ms.insertUniqueNonZero(&v, hash_value);
- }
- }
-
- // This function is mostly the same as StringHashTable::dispatch, but with
- // added bucket computation. See the comments there.
- template <typename Self, typename Func, typename KeyHolder>
- static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
- {
- StringHashTableHash hash;
- const StringRef & x = keyHolderGetKey(key_holder);
- const size_t sz = x.size;
- if (sz == 0)
- {
- keyHolderDiscardKey(key_holder);
- return func(self.impls[0].m0, VoidKey{}, 0);
- }
-
- if (x.data[x.size - 1] == 0)
- {
- // Strings with trailing zeros are not representable as fixed-size
- // string keys. Put them to the generic table.
- auto res = hash(x);
- auto buck = getBucketFromHash(res);
- return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder),
- res);
- }
-
- const char * p = x.data;
- // pending bits that needs to be shifted out
- const char s = (-sz & 7) * 8;
- union
- {
- StringKey8 k8;
- StringKey16 k16;
- StringKey24 k24;
- UInt64 n[3];
- };
- switch ((sz - 1) >> 3)
- {
- case 0:
- {
- // first half page
- if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
- {
- memcpy(&n[0], p, 8);
- n[0] &= -1ul >> s;
- }
- else
- {
- const char * lp = x.data + x.size - 8;
- memcpy(&n[0], lp, 8);
- n[0] >>= s;
- }
- auto res = hash(k8);
- auto buck = getBucketFromHash(res);
- keyHolderDiscardKey(key_holder);
- return func(self.impls[buck].m1, k8, res);
- }
- case 1:
- {
- memcpy(&n[0], p, 8);
- const char * lp = x.data + x.size - 8;
- memcpy(&n[1], lp, 8);
- n[1] >>= s;
- auto res = hash(k16);
- auto buck = getBucketFromHash(res);
- keyHolderDiscardKey(key_holder);
- return func(self.impls[buck].m2, k16, res);
- }
- case 2:
- {
- memcpy(&n[0], p, 16);
- const char * lp = x.data + x.size - 8;
- memcpy(&n[2], lp, 8);
- n[2] >>= s;
- auto res = hash(k24);
- auto buck = getBucketFromHash(res);
- keyHolderDiscardKey(key_holder);
- return func(self.impls[buck].m3, k24, res);
- }
- default:
- {
- auto res = hash(x);
- auto buck = getBucketFromHash(res);
- return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
- }
- }
- }
-
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
- {
- dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted});
- }
-
- LookupResult ALWAYS_INLINE find(const Key x)
- {
- return dispatch(*this, x, typename Impl::FindCallable{});
- }
-
- ConstLookupResult ALWAYS_INLINE find(const Key x) const
- {
- return dispatch(*this, x, typename Impl::FindCallable{});
- }
-
- void write(DB::WriteBuffer & wb) const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- impls[i].write(wb);
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::writeChar(',', wb);
- impls[i].writeText(wb);
- }
- }
-
- void read(DB::ReadBuffer & rb)
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- impls[i].read(rb);
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::assertChar(',', rb);
- impls[i].readText(rb);
- }
- }
-
- size_t size() const
- {
- size_t res = 0;
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].size();
-
- return res;
- }
-
- bool empty() const
- {
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- if (!impls[i].empty())
- return false;
-
- return true;
- }
-
- size_t getBufferSizeInBytes() const
- {
- size_t res = 0;
- for (size_t i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].getBufferSizeInBytes();
-
- return res;
- }
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h
deleted file mode 100644
index 6f7bf4a1f6b..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/PoolWithFailoverBase.h
+++ /dev/null
@@ -1,427 +0,0 @@
-#pragma once
-
-#include <time.h>
-#include <cstdlib>
-#include <climits>
-#include <random>
-#include <functional>
-#include <common/types.h>
-#include <common/scope_guard.h>
-#include <Common/PoolBase.h>
-#include <Common/ProfileEvents.h>
-#include <Common/NetException.h>
-#include <Common/Exception.h>
-#include <Common/randomSeed.h>
-
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int ALL_CONNECTION_TRIES_FAILED;
- extern const int ALL_REPLICAS_ARE_STALE;
- extern const int LOGICAL_ERROR;
-}
-}
-
-namespace ProfileEvents
-{
- extern const Event DistributedConnectionFailTry;
- extern const Event DistributedConnectionFailAtAll;
-}
-
-/// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB.
-/// Initialized by several PoolBase objects.
-/// When a connection is requested, tries to create or choose an alive connection from one of the nested pools.
-/// Pools are tried in the order consistent with lexicographical order of (error count, priority, random number) tuples.
-/// Number of tries for a single pool is limited by max_tries parameter.
-/// The client can set nested pool priority by passing a GetPriority functor.
-///
-/// NOTE: if one of the nested pools blocks because it is empty, this pool will also block.
-///
-/// The client must provide a TryGetEntryFunc functor, which should perform a single try to get a connection from a nested pool.
-/// This functor can also check if the connection satisfies some eligibility criterion (e.g. check if
-/// the replica is up-to-date).
-
-template <typename TNestedPool>
-class PoolWithFailoverBase : private boost::noncopyable
-{
-public:
- using NestedPool = TNestedPool;
- using NestedPoolPtr = std::shared_ptr<NestedPool>;
- using Entry = typename NestedPool::Entry;
- using NestedPools = std::vector<NestedPoolPtr>;
-
- PoolWithFailoverBase(
- NestedPools nested_pools_,
- time_t decrease_error_period_,
- size_t max_error_cap_,
- Poco::Logger * log_)
- : nested_pools(std::move(nested_pools_))
- , decrease_error_period(decrease_error_period_)
- , max_error_cap(max_error_cap_)
- , shared_pool_states(nested_pools.size())
- , log(log_)
- {
- for (size_t i = 0;i < nested_pools.size(); ++i)
- shared_pool_states[i].config_priority = nested_pools[i]->getPriority();
- }
-
- struct TryResult
- {
- TryResult() = default;
-
- explicit TryResult(Entry entry_)
- : entry(std::move(entry_))
- , is_usable(true)
- , is_up_to_date(true)
- {
- }
-
- void reset()
- {
- entry = Entry();
- is_usable = false;
- is_up_to_date = false;
- staleness = 0.0;
- }
-
- Entry entry;
- bool is_usable = false; /// If false, the entry is unusable for current request
- /// (but may be usable for other requests, so error counts are not incremented)
- bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica.
- double staleness = 0.0; /// Helps choosing the "least stale" option when all replicas are stale.
- };
-
- struct PoolState;
-
- using PoolStates = std::vector<PoolState>;
-
- struct ShuffledPool
- {
- NestedPool * pool{};
- const PoolState * state{};
- size_t index = 0;
- size_t error_count = 0;
- size_t slowdown_count = 0;
- };
-
- /// This functor must be provided by a client. It must perform a single try that takes a connection
- /// from the provided pool and checks that it is good.
- using TryGetEntryFunc = std::function<TryResult(NestedPool & pool, std::string & fail_message)>;
-
- /// The client can provide this functor to affect load balancing - the index of a pool is passed to
- /// this functor. The pools with lower result value will be tried first.
- using GetPriorityFunc = std::function<size_t(size_t index)>;
-
-
- /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool).
- /// The method will throw if it is unable to get min_entries alive connections or
- /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas.
- std::vector<TryResult> getMany(
- size_t min_entries, size_t max_entries, size_t max_tries,
- size_t max_ignored_errors,
- bool fallback_to_stale_replicas,
- const TryGetEntryFunc & try_get_entry,
- const GetPriorityFunc & get_priority = GetPriorityFunc());
-
-protected:
-
- /// Returns a single connection.
- Entry get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
- const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority = GetPriorityFunc());
-
- /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states.
- PoolStates updatePoolStates(size_t max_ignored_errors);
-
- void updateErrorCounts(PoolStates & states, time_t & last_decrease_time) const;
-
- std::vector<ShuffledPool> getShuffledPools(size_t max_ignored_errors, const GetPriorityFunc & get_priority);
-
- inline void updateSharedErrorCounts(std::vector<ShuffledPool> & shuffled_pools);
-
- auto getPoolExtendedStates() const
- {
- std::lock_guard lock(pool_states_mutex);
- return std::make_tuple(shared_pool_states, nested_pools, last_error_decrease_time);
- }
-
- NestedPools nested_pools;
-
- const time_t decrease_error_period;
- const size_t max_error_cap;
-
- mutable std::mutex pool_states_mutex;
- PoolStates shared_pool_states;
- /// The time when error counts were last decreased.
- time_t last_error_decrease_time = 0;
-
- Poco::Logger * log;
-};
-
-
-template <typename TNestedPool>
-std::vector<typename PoolWithFailoverBase<TNestedPool>::ShuffledPool>
-PoolWithFailoverBase<TNestedPool>::getShuffledPools(
- size_t max_ignored_errors, const PoolWithFailoverBase::GetPriorityFunc & get_priority)
-{
- /// Update random numbers and error counts.
- PoolStates pool_states = updatePoolStates(max_ignored_errors);
- if (get_priority)
- {
- for (size_t i = 0; i < pool_states.size(); ++i)
- pool_states[i].priority = get_priority(i);
- }
-
- /// Sort the pools into order in which they will be tried (based on respective PoolStates).
- std::vector<ShuffledPool> shuffled_pools;
- shuffled_pools.reserve(nested_pools.size());
- for (size_t i = 0; i < nested_pools.size(); ++i)
- shuffled_pools.push_back(ShuffledPool{nested_pools[i].get(), &pool_states[i], i, 0});
- std::sort(
- shuffled_pools.begin(), shuffled_pools.end(),
- [](const ShuffledPool & lhs, const ShuffledPool & rhs)
- {
- return PoolState::compare(*lhs.state, *rhs.state);
- });
-
- return shuffled_pools;
-}
-
-template <typename TNestedPool>
-inline void PoolWithFailoverBase<TNestedPool>::updateSharedErrorCounts(std::vector<ShuffledPool> & shuffled_pools)
-{
- std::lock_guard lock(pool_states_mutex);
- for (const ShuffledPool & pool: shuffled_pools)
- {
- auto & pool_state = shared_pool_states[pool.index];
- pool_state.error_count = std::min<UInt64>(max_error_cap, pool_state.error_count + pool.error_count);
- pool_state.slowdown_count += pool.slowdown_count;
- }
-}
-
-template <typename TNestedPool>
-typename TNestedPool::Entry
-PoolWithFailoverBase<TNestedPool>::get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
- const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority)
-{
- std::vector<TryResult> results = getMany(
- 1 /* min entries */, 1 /* max entries */, 1 /* max tries */,
- max_ignored_errors, fallback_to_stale_replicas,
- try_get_entry, get_priority);
- if (results.empty() || results[0].entry.isNull())
- throw DB::Exception(
- "PoolWithFailoverBase::getMany() returned less than min_entries entries.",
- DB::ErrorCodes::LOGICAL_ERROR);
- return results[0].entry;
-}
-
-template <typename TNestedPool>
-std::vector<typename PoolWithFailoverBase<TNestedPool>::TryResult>
-PoolWithFailoverBase<TNestedPool>::getMany(
- size_t min_entries, size_t max_entries, size_t max_tries,
- size_t max_ignored_errors,
- bool fallback_to_stale_replicas,
- const TryGetEntryFunc & try_get_entry,
- const GetPriorityFunc & get_priority)
-{
- std::vector<ShuffledPool> shuffled_pools = getShuffledPools(max_ignored_errors, get_priority);
-
- /// We will try to get a connection from each pool until a connection is produced or max_tries is reached.
- std::vector<TryResult> try_results(shuffled_pools.size());
- size_t entries_count = 0;
- size_t usable_count = 0;
- size_t up_to_date_count = 0;
- size_t failed_pools_count = 0;
-
- /// At exit update shared error counts with error counts occurred during this call.
- SCOPE_EXIT(
- {
- updateSharedErrorCounts(shuffled_pools);
- });
-
- std::string fail_messages;
- bool finished = false;
- while (!finished)
- {
- for (size_t i = 0; i < shuffled_pools.size(); ++i)
- {
- if (up_to_date_count >= max_entries /// Already enough good entries.
- || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced.
- {
- finished = true;
- break;
- }
-
- ShuffledPool & shuffled_pool = shuffled_pools[i];
- TryResult & result = try_results[i];
- if (max_tries && (shuffled_pool.error_count >= max_tries || !result.entry.isNull()))
- continue;
-
- std::string fail_message;
- result = try_get_entry(*shuffled_pool.pool, fail_message);
-
- if (!fail_message.empty())
- fail_messages += fail_message + '\n';
-
- if (!result.entry.isNull())
- {
- ++entries_count;
- if (result.is_usable)
- {
- ++usable_count;
- if (result.is_up_to_date)
- ++up_to_date_count;
- }
- }
- else
- {
- LOG_WARNING(log, "Connection failed at try â„–{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
- ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
-
- shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);
-
- if (shuffled_pool.error_count >= max_tries)
- {
- ++failed_pools_count;
- ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll);
- }
- }
- }
- }
-
- if (usable_count < min_entries)
- throw DB::NetException(
- "All connection tries failed. Log: \n\n" + fail_messages + "\n",
- DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED);
-
- try_results.erase(
- std::remove_if(
- try_results.begin(), try_results.end(),
- [](const TryResult & r) { return r.entry.isNull() || !r.is_usable; }),
- try_results.end());
-
- /// Sort so that preferred items are near the beginning.
- std::stable_sort(
- try_results.begin(), try_results.end(),
- [](const TryResult & left, const TryResult & right)
- {
- return std::forward_as_tuple(!left.is_up_to_date, left.staleness)
- < std::forward_as_tuple(!right.is_up_to_date, right.staleness);
- });
-
- if (fallback_to_stale_replicas)
- {
- /// There is not enough up-to-date entries but we are allowed to return stale entries.
- /// Gather all up-to-date ones and least-bad stale ones.
-
- size_t size = std::min(try_results.size(), max_entries);
- try_results.resize(size);
- }
- else if (up_to_date_count >= min_entries)
- {
- /// There is enough up-to-date entries.
- try_results.resize(up_to_date_count);
- }
- else
- throw DB::Exception(
- "Could not find enough connections to up-to-date replicas. Got: " + std::to_string(up_to_date_count)
- + ", needed: " + std::to_string(min_entries),
- DB::ErrorCodes::ALL_REPLICAS_ARE_STALE);
-
- return try_results;
-}
-
-template <typename TNestedPool>
-struct PoolWithFailoverBase<TNestedPool>::PoolState
-{
- UInt64 error_count = 0;
- /// The number of slowdowns that led to changing replica in HedgedRequestsFactory
- UInt64 slowdown_count = 0;
- /// Priority from the <remote_server> configuration.
- Int64 config_priority = 1;
- /// Priority from the GetPriorityFunc.
- Int64 priority = 0;
- UInt32 random = 0;
-
- void randomize()
- {
- random = rng();
- }
-
- static bool compare(const PoolState & lhs, const PoolState & rhs)
- {
- return std::forward_as_tuple(lhs.error_count, lhs.slowdown_count, lhs.config_priority, lhs.priority, lhs.random)
- < std::forward_as_tuple(rhs.error_count, rhs.slowdown_count, rhs.config_priority, rhs.priority, rhs.random);
- }
-
-private:
- std::minstd_rand rng = std::minstd_rand(randomSeed());
-};
-
-template <typename TNestedPool>
-typename PoolWithFailoverBase<TNestedPool>::PoolStates
-PoolWithFailoverBase<TNestedPool>::updatePoolStates(size_t max_ignored_errors)
-{
- PoolStates result;
- result.reserve(nested_pools.size());
-
- {
- std::lock_guard lock(pool_states_mutex);
-
- for (auto & state : shared_pool_states)
- state.randomize();
-
- updateErrorCounts(shared_pool_states, last_error_decrease_time);
- result.assign(shared_pool_states.begin(), shared_pool_states.end());
- }
-
- /// distributed_replica_max_ignored_errors
- for (auto & state : result)
- state.error_count = std::max<UInt64>(0, state.error_count - max_ignored_errors);
-
- return result;
-}
-
-template <typename TNestedPool>
-void PoolWithFailoverBase<TNestedPool>::updateErrorCounts(PoolWithFailoverBase<TNestedPool>::PoolStates & states, time_t & last_decrease_time) const
-{
- time_t current_time = time(nullptr);
-
- if (last_decrease_time) //-V1051
- {
- time_t delta = current_time - last_decrease_time;
-
- if (delta >= 0)
- {
- const UInt64 MAX_BITS = sizeof(UInt64) * CHAR_BIT;
- size_t shift_amount = MAX_BITS;
- /// Divide error counts by 2 every decrease_error_period seconds.
- if (decrease_error_period)
- shift_amount = delta / decrease_error_period;
- /// Update time but don't do it more often than once a period.
- /// Else if the function is called often enough, error count will never decrease.
- if (shift_amount)
- last_decrease_time = current_time;
-
- if (shift_amount >= MAX_BITS)
- {
- for (auto & state : states)
- {
- state.error_count = 0;
- state.slowdown_count = 0;
- }
- }
- else if (shift_amount)
- {
- for (auto & state : states)
- {
- state.error_count >>= shift_amount;
- state.slowdown_count >>= shift_amount;
- }
- }
- }
- }
- else
- last_decrease_time = current_time;
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/RWLock.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/RWLock.cpp
deleted file mode 100644
index 66c0c7c101c..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/RWLock.cpp
+++ /dev/null
@@ -1,307 +0,0 @@
-#include "RWLock.h"
-#include <Common/Stopwatch.h>
-#include <Common/Exception.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/ProfileEvents.h>
-
-
-namespace ProfileEvents
-{
- extern const Event RWLockAcquiredReadLocks;
- extern const Event RWLockAcquiredWriteLocks;
- extern const Event RWLockReadersWaitMilliseconds;
- extern const Event RWLockWritersWaitMilliseconds;
-}
-
-
-namespace CurrentMetrics
-{
- extern const Metric RWLockWaitingReaders;
- extern const Metric RWLockWaitingWriters;
- extern const Metric RWLockActiveReaders;
- extern const Metric RWLockActiveWriters;
-}
-
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-
-/** A one-time-use-object that represents lock ownership
- * For the purpose of exception safety guarantees LockHolder is to be used in two steps:
- * 1. Create an instance (allocating all the needed memory)
- * 2. Associate the instance with the lock (attach to the lock and locking request group)
- */
-class RWLockImpl::LockHolderImpl
-{
- bool bound{false};
- String query_id;
- CurrentMetrics::Increment active_client_increment;
- RWLock parent;
- GroupsContainer::iterator it_group;
-
-public:
- LockHolderImpl(const LockHolderImpl & other) = delete;
- LockHolderImpl& operator=(const LockHolderImpl & other) = delete;
-
- /// Implicit memory allocation for query_id is done here
- LockHolderImpl(const String & query_id_, Type type)
- : query_id{query_id_}
- , active_client_increment{
- type == Type::Read ? CurrentMetrics::RWLockActiveReaders : CurrentMetrics::RWLockActiveWriters}
- {
- }
-
- ~LockHolderImpl()
- {
- if (bound && parent != nullptr)
- parent->unlock(it_group, query_id);
- else
- active_client_increment.destroy();
- }
-
-private:
- /// A separate method which binds the lock holder to the owned lock
- /// N.B. It is very important that this method produces no allocations
- bool bindWith(RWLock && parent_, GroupsContainer::iterator it_group_) noexcept
- {
- if (bound || parent_ == nullptr)
- return false;
- it_group = it_group_;
- parent = std::move(parent_);
- ++it_group->requests;
- bound = true;
- return true;
- }
-
- friend class RWLockImpl;
-};
-
-
-/** General algorithm:
- * Step 1. Try the FastPath (for both Reads/Writes)
- * Step 2. Find ourselves request group: attach to existing or create a new one
- * Step 3. Wait/timed wait for ownership signal
- * Step 3a. Check if we must handle timeout and exit
- * Step 4. Persist lock ownership
- *
- * To guarantee that we do not get any piece of our data corrupted:
- * 1. Perform all actions that include allocations before changing lock's internal state
- * 2. Roll back any changes that make the state inconsistent
- *
- * Note: "SM" in the commentaries below stands for STATE MODIFICATION
- */
-RWLockImpl::LockHolder
-RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & lock_timeout_ms)
-{
- const auto lock_deadline_tp =
- (lock_timeout_ms == std::chrono::milliseconds(0))
- ? std::chrono::time_point<std::chrono::steady_clock>::max()
- : std::chrono::steady_clock::now() + lock_timeout_ms;
-
- const bool request_has_query_id = query_id != NO_QUERY;
-
- Stopwatch watch(CLOCK_MONOTONIC_COARSE);
- CurrentMetrics::Increment waiting_client_increment((type == Read) ? CurrentMetrics::RWLockWaitingReaders
- : CurrentMetrics::RWLockWaitingWriters);
- auto finalize_metrics = [type, &watch] ()
- {
- ProfileEvents::increment((type == Read) ? ProfileEvents::RWLockAcquiredReadLocks
- : ProfileEvents::RWLockAcquiredWriteLocks);
- ProfileEvents::increment((type == Read) ? ProfileEvents::RWLockReadersWaitMilliseconds
- : ProfileEvents::RWLockWritersWaitMilliseconds, watch.elapsedMilliseconds());
- };
-
- /// This object is placed above unique_lock, because it may lock in destructor.
- auto lock_holder = std::make_shared<LockHolderImpl>(query_id, type);
-
- std::unique_lock state_lock(internal_state_mtx);
-
- /// The FastPath:
- /// Check if the same query_id already holds the required lock in which case we can proceed without waiting
- if (request_has_query_id)
- {
- const auto owner_query_it = owner_queries.find(query_id);
- if (owner_query_it != owner_queries.end())
- {
- if (wrlock_owner != writers_queue.end())
- throw Exception(
- "RWLockImpl::getLock(): RWLock is already locked in exclusive mode",
- ErrorCodes::LOGICAL_ERROR);
-
- /// Lock upgrading is not supported
- if (type == Write)
- throw Exception(
- "RWLockImpl::getLock(): Cannot acquire exclusive lock while RWLock is already locked",
- ErrorCodes::LOGICAL_ERROR);
-
- /// N.B. Type is Read here, query_id is not empty and it_query is a valid iterator
- ++owner_query_it->second; /// SM1: nothrow
- lock_holder->bindWith(shared_from_this(), rdlock_owner); /// SM2: nothrow
-
- finalize_metrics();
- return lock_holder;
- }
- }
-
- if (type == Type::Write)
- {
- writers_queue.emplace_back(type); /// SM1: may throw (nothing to roll back)
- }
- else if (readers_queue.empty() ||
- (rdlock_owner == readers_queue.begin() && readers_queue.size() == 1 && !writers_queue.empty()))
- {
- readers_queue.emplace_back(type); /// SM1: may throw (nothing to roll back)
- }
- GroupsContainer::iterator it_group =
- (type == Type::Write) ? std::prev(writers_queue.end()) : std::prev(readers_queue.end());
-
- /// Lock is free to acquire
- if (rdlock_owner == readers_queue.end() && wrlock_owner == writers_queue.end())
- {
- (type == Read ? rdlock_owner : wrlock_owner) = it_group; /// SM2: nothrow
- }
- else
- {
- /// Wait until our group becomes the lock owner
- const auto predicate = [&] () { return it_group == (type == Read ? rdlock_owner : wrlock_owner); };
-
- if (lock_deadline_tp == std::chrono::time_point<std::chrono::steady_clock>::max())
- {
- ++it_group->requests;
- it_group->cv.wait(state_lock, predicate);
- --it_group->requests;
- }
- else
- {
- ++it_group->requests;
- const auto wait_result = it_group->cv.wait_until(state_lock, lock_deadline_tp, predicate);
- --it_group->requests;
-
- /// Step 3a. Check if we must handle timeout and exit
- if (!wait_result) /// Wait timed out!
- {
- /// Rollback(SM1): nothrow
- if (it_group->requests == 0)
- {
- (type == Read ? readers_queue : writers_queue).erase(it_group);
- }
-
- return nullptr;
- }
- }
- }
-
- if (request_has_query_id)
- {
- try
- {
- const auto emplace_res =
- owner_queries.emplace(query_id, 1); /// SM2: may throw on insertion
- if (!emplace_res.second)
- ++emplace_res.first->second; /// SM3: nothrow
- }
- catch (...)
- {
- /// Methods std::list<>::emplace_back() and std::unordered_map<>::emplace() provide strong exception safety
- /// We only need to roll back the changes to these objects: owner_queries and the readers/writers queue
- if (it_group->requests == 0)
- dropOwnerGroupAndPassOwnership(it_group); /// Rollback(SM1): nothrow
-
- throw;
- }
- }
-
- lock_holder->bindWith(shared_from_this(), it_group); /// SM: nothrow
-
- finalize_metrics();
- return lock_holder;
-}
-
-
-/** The sequence points of acquiring lock ownership by an instance of LockHolderImpl:
- * 1. owner_queries is updated
- * 2. request group is updated by LockHolderImpl which in turn becomes "bound"
- *
- * If by the time when destructor of LockHolderImpl is called the instance has been "bound",
- * it is guaranteed that all three steps have been executed successfully and the resulting state is consistent.
- * With the mutex locked the order of steps to restore the lock's state can be arbitrary
- *
- * We do not employ try-catch: if something bad happens, there is nothing we can do =(
- */
-void RWLockImpl::unlock(GroupsContainer::iterator group_it, const String & query_id) noexcept
-{
- std::lock_guard state_lock(internal_state_mtx);
-
- /// All of these are Undefined behavior and nothing we can do!
- if (rdlock_owner == readers_queue.end() && wrlock_owner == writers_queue.end())
- return;
- if (rdlock_owner != readers_queue.end() && group_it != rdlock_owner)
- return;
- if (wrlock_owner != writers_queue.end() && group_it != wrlock_owner)
- return;
-
- /// If query_id is not empty it must be listed in parent->owner_queries
- if (query_id != NO_QUERY)
- {
- const auto owner_query_it = owner_queries.find(query_id);
- if (owner_query_it != owner_queries.end())
- {
- if (--owner_query_it->second == 0) /// SM: nothrow
- owner_queries.erase(owner_query_it); /// SM: nothrow
- }
- }
-
- /// If we are the last remaining referrer, remove this QNode and notify the next one
- if (--group_it->requests == 0) /// SM: nothrow
- dropOwnerGroupAndPassOwnership(group_it);
-}
-
-
-void RWLockImpl::dropOwnerGroupAndPassOwnership(GroupsContainer::iterator group_it) noexcept
-{
- rdlock_owner = readers_queue.end();
- wrlock_owner = writers_queue.end();
-
- if (group_it->type == Read)
- {
- readers_queue.erase(group_it);
- /// Prepare next phase
- if (!writers_queue.empty())
- {
- wrlock_owner = writers_queue.begin();
- }
- else
- {
- rdlock_owner = readers_queue.begin();
- }
- }
- else
- {
- writers_queue.erase(group_it);
- /// Prepare next phase
- if (!readers_queue.empty())
- {
- rdlock_owner = readers_queue.begin();
- }
- else
- {
- wrlock_owner = writers_queue.begin();
- }
- }
-
- if (rdlock_owner != readers_queue.end())
- {
- rdlock_owner->cv.notify_all();
- }
- else if (wrlock_owner != writers_queue.end())
- {
- wrlock_owner->cv.notify_one();
- }
-}
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h
deleted file mode 100644
index b1faac641af..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SensitiveDataMasker.h
+++ /dev/null
@@ -1,72 +0,0 @@
-#pragma once
-
-#include <memory>
-#include <vector>
-#include <cstdint>
-
-namespace Poco
-{
-namespace Util
-{
- class AbstractConfiguration;
-}
-}
-
-/// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules
-
-/// It's used as a singleton via getInstance method
-
-/// Initially it's empty (nullptr) and after manual initialization
-/// (one-time, done by setInstance call) it takes the proper value which
-/// is stored in unique_ptr.
-
-/// It looks like the singleton is the best option here, as
-/// two users of that object (OwnSplitChannel & Interpreters/executeQuery)
-/// can't own/share that Masker properly without synchronization & locks,
-/// and we can't afford setting global locks for each logged line.
-
-/// I've considered singleton alternatives, but it's unclear who should own the object,
-/// and it introduce unnecessary complexity in implementation (passing references back and forward):
-///
-/// context can't own, as Context is destroyed before logger,
-/// and logger lives longer and logging can still happen after Context destruction.
-/// resetting masker in the logger at the moment of
-/// context destruction can't be done w/o synchronization / locks in a safe manner.
-///
-/// logger is Poco derived and i didn't want to brake it's interface,
-/// also logger can be dynamically reconfigured without server restart,
-/// and it actually recreates OwnSplitChannel when reconfiguration happen,
-/// so that makes it's quite tricky. So it a bad candidate for owning masker too.
-
-namespace NDB
-{
-class SensitiveDataMasker
-{
-private:
- class MaskingRule;
- std::vector<std::unique_ptr<MaskingRule>> all_masking_rules;
- static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker;
-
-public:
- SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
- ~SensitiveDataMasker();
-
- /// Returns the number of matched rules.
- size_t wipeSensitiveData(std::string & data) const;
-
- /// setInstance is not thread-safe and should be called once in single-thread mode.
- /// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367
- static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_);
- static SensitiveDataMasker * getInstance();
-
- /// Used in tests.
- void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string);
-
-#ifndef NDEBUG
- void printStats();
-#endif
-
- size_t rulesCount() const;
-};
-
-};
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SettingsChanges.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SettingsChanges.cpp
deleted file mode 100644
index 00af9c6bd58..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/SettingsChanges.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <Common/SettingsChanges.h>
-
-
-namespace NDB
-{
-namespace
-{
- SettingChange * find(SettingsChanges & changes, const std::string_view & name)
- {
- auto it = std::find_if(changes.begin(), changes.end(), [&name](const SettingChange & change) { return change.name == name; });
- if (it == changes.end())
- return nullptr;
- return &*it;
- }
-
- const SettingChange * find(const SettingsChanges & changes, const std::string_view & name)
- {
- auto it = std::find_if(changes.begin(), changes.end(), [&name](const SettingChange & change) { return change.name == name; });
- if (it == changes.end())
- return nullptr;
- return &*it;
- }
-}
-
-bool SettingsChanges::tryGet(const std::string_view & name, Field & out_value) const
-{
- const auto * change = find(*this, name);
- if (!change)
- return false;
- out_value = change->value;
- return true;
-}
-
-const Field * SettingsChanges::tryGet(const std::string_view & name) const
-{
- const auto * change = find(*this, name);
- if (!change)
- return nullptr;
- return &change->value;
-}
-
-Field * SettingsChanges::tryGet(const std::string_view & name)
-{
- auto * change = find(*this, name);
- if (!change)
- return nullptr;
- return &change->value;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Stopwatch.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Stopwatch.cpp
deleted file mode 100644
index b17e343f1af..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Stopwatch.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <sys/resource.h>
-#include "Stopwatch.h"
-
-StopwatchRUsage::Timestamp StopwatchRUsage::Timestamp::current()
-{
- StopwatchRUsage::Timestamp res;
-
- ::rusage rusage {};
-#if !defined(__APPLE__)
-#if defined(OS_SUNOS)
- ::getrusage(RUSAGE_LWP, &rusage);
-#else
- ::getrusage(RUSAGE_THREAD, &rusage);
-#endif // OS_SUNOS
-#endif // __APPLE__
- res.user_ns = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL;
- res.sys_ns = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL;
- return res;
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h
deleted file mode 100644
index 33be2853068..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/Types.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include <common/types.h>
-#include <future>
-#include <memory>
-#include <vector>
-#include <Common/ZooKeeper/IKeeper.h>
-#include <Poco/Event.h>
-
-
-namespace zkutil
-{
-
-using Strings = std::vector<std::string>;
-
-
-namespace CreateMode
-{
- extern const int Persistent;
- extern const int Ephemeral;
- extern const int EphemeralSequential;
- extern const int PersistentSequential;
-}
-
-using EventPtr = std::shared_ptr<Poco::Event>;
-
-/// Gets multiple asynchronous results
-/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse
-template <typename R>
-using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>;
-
-Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode);
-Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version);
-Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version);
-Coordination::RequestPtr makeCheckRequest(const std::string & path, int version);
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/UTF8Helpers.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/UTF8Helpers.cpp
deleted file mode 100644
index 0050f9fca1a..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/UTF8Helpers.cpp
+++ /dev/null
@@ -1,207 +0,0 @@
-#include <Common/UTF8Helpers.h>
-
-#include <widechar_width.h>
-
-
-namespace NDB
-{
-namespace UTF8
-{
-
-// based on https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
-// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions: The above copyright
-// notice and this permission notice shall be included in all copies or
-// substantial portions of the Software.
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-
-static const UInt8 TABLE[] =
-{
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
- 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
- 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
- 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
- 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
- 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
- 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
-};
-
-struct UTF8Decoder
-{
- enum
- {
- ACCEPT = 0,
- REJECT = 1
- };
-
- UInt32 decode(UInt8 byte)
- {
- UInt32 type = TABLE[byte];
- codepoint = (state != ACCEPT) ? (byte & 0x3fu) | (codepoint << 6) : (0xff >> type) & (byte);
- state = TABLE[256 + state * 16 + type];
- return state;
- }
-
- void reset()
- {
- state = ACCEPT;
- codepoint = 0xfffdU;
- }
-
- UInt8 state {ACCEPT};
- UInt32 codepoint {0};
-};
-
-static int wcwidth(wchar_t wc)
-{
- int width = widechar_wcwidth(wc);
- switch (width)
- {
- case widechar_nonprint:
- case widechar_combining:
- case widechar_unassigned:
- return 0;
- case widechar_ambiguous:
- case widechar_private_use:
- case widechar_widened_in_9:
- return 1;
- default:
- return width;
- }
-}
-
-
-namespace
-{
-
-enum ComputeWidthMode
-{
- Width, /// Calculate and return visible width
- BytesBeforLimit /// Calculate and return the maximum number of bytes when substring fits in visible width.
-};
-
-template <ComputeWidthMode mode>
-static size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
-{
- UTF8Decoder decoder;
- size_t width = 0;
- size_t rollback = 0;
- for (size_t i = 0; i < size; ++i)
- {
- /// Quickly skip regular ASCII
-
-#if defined(__SSE2__)
- const auto lower_bound = _mm_set1_epi8(32);
- const auto upper_bound = _mm_set1_epi8(126);
-
- while (i + 15 < size)
- {
- __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[i]));
-
- const uint16_t non_regular_width_mask = _mm_movemask_epi8(
- _mm_or_si128(
- _mm_cmplt_epi8(bytes, lower_bound),
- _mm_cmpgt_epi8(bytes, upper_bound)));
-
- if (non_regular_width_mask)
- {
- auto num_regular_chars = __builtin_ctz(non_regular_width_mask);
- width += num_regular_chars;
- i += num_regular_chars;
- break;
- }
- else
- {
- i += 16;
- width += 16;
- }
- }
-#endif
-
- while (i < size && isPrintableASCII(data[i]))
- {
- ++width;
- ++i;
- }
-
- /// Now i points to position in bytes after regular ASCII sequence
- /// and if width > limit, then (width - limit) is the number of extra ASCII characters after width limit.
- if (mode == BytesBeforLimit && width > limit)
- return i - (width - limit);
-
- switch (decoder.decode(data[i]))
- {
- case UTF8Decoder::REJECT:
- {
- decoder.reset();
- // invalid sequences seem to have zero width in modern terminals
- // tested in libvte-based, alacritty, urxvt and xterm
- i -= rollback;
- rollback = 0;
- break;
- }
- case UTF8Decoder::ACCEPT:
- {
- // there are special control characters that manipulate the terminal output.
- // (`0x08`, `0x09`, `0x0a`, `0x0b`, `0x0c`, `0x0d`, `0x1b`)
- // Since we don't touch the original column data, there is no easy way to escape them.
- // TODO: escape control characters
- // TODO: multiline support for '\n'
-
- // special treatment for '\t'
- size_t next_width = width;
- if (decoder.codepoint == '\t')
- next_width += 8 - (prefix + width) % 8;
- else
- next_width += wcwidth(decoder.codepoint);
-
- if (mode == BytesBeforLimit && next_width > limit)
- return i - rollback;
- width = next_width;
-
- rollback = 0;
- break;
- }
- // continue if we meet other values here
- default:
- ++rollback;
- }
- }
-
- // no need to handle trailing sequence as they have zero width
- return (mode == BytesBeforLimit) ? size : width;
-}
-
-}
-
-
-size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept
-{
- return computeWidthImpl<Width>(data, size, prefix, 0);
-}
-
-size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
-{
- return computeWidthImpl<BytesBeforLimit>(data, size, prefix, limit);
-}
-
-}
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/WeakHash.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/WeakHash.cpp
deleted file mode 100644
index 54d973b6296..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/WeakHash.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <Common/WeakHash.h>
-
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h
deleted file mode 100644
index 33be2853068..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/ZooKeeper/Types.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include <common/types.h>
-#include <future>
-#include <memory>
-#include <vector>
-#include <Common/ZooKeeper/IKeeper.h>
-#include <Poco/Event.h>
-
-
-namespace zkutil
-{
-
-using Strings = std::vector<std::string>;
-
-
-namespace CreateMode
-{
- extern const int Persistent;
- extern const int Ephemeral;
- extern const int EphemeralSequential;
- extern const int PersistentSequential;
-}
-
-using EventPtr = std::shared_ptr<Poco::Event>;
-
-/// Gets multiple asynchronous results
-/// Each pair, the first is path, the second is response eg. CreateResponse, RemoveResponse
-template <typename R>
-using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>;
-
-Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode);
-Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version);
-Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version);
-Coordination::RequestPtr makeCheckRequest(const std::string & path, int version);
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/filesystemHelpers.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/filesystemHelpers.cpp
deleted file mode 100644
index e69de29bb2d..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/filesystemHelpers.cpp
+++ /dev/null
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/filesystemHelpers.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Common/filesystemHelpers.h
deleted file mode 100644
index e69de29bb2d..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Common/filesystemHelpers.h
+++ /dev/null
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp
deleted file mode 100644
index 647f0c208ee..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.cpp
+++ /dev/null
@@ -1,324 +0,0 @@
-#include "BackgroundSchedulePool.h"
-#include <Common/Exception.h>
-#include <Common/setThreadName.h>
-#include <Common/Stopwatch.h>
-#include <Common/CurrentThread.h>
-#include <common/logger_useful.h>
-#include <chrono>
-#include <common/scope_guard.h>
-
-
-namespace NDB
-{
-
-
-class TaskNotification final : public Poco::Notification
-{
-public:
- explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task_) : task(task_) {}
- void execute() { task->execute(); }
-
-private:
- BackgroundSchedulePoolTaskInfoPtr task;
-};
-
-
-BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo(
- BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_)
- : pool(pool_), log_name(log_name_), function(function_)
-{
-}
-
-bool BackgroundSchedulePoolTaskInfo::schedule()
-{
- std::lock_guard lock(schedule_mutex);
-
- if (deactivated || scheduled)
- return false;
-
- scheduleImpl(lock);
- return true;
-}
-
-bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms, bool overwrite)
-{
- std::lock_guard lock(schedule_mutex);
-
- if (deactivated || scheduled)
- return false;
- if (delayed && !overwrite)
- return false;
-
- pool.scheduleDelayedTask(shared_from_this(), ms, lock);
- return true;
-}
-
-void BackgroundSchedulePoolTaskInfo::deactivate()
-{
- std::lock_guard lock_exec(exec_mutex);
- std::lock_guard lock_schedule(schedule_mutex);
-
- if (deactivated)
- return;
-
- deactivated = true;
- scheduled = false;
-
- if (delayed)
- pool.cancelDelayedTask(shared_from_this(), lock_schedule);
-}
-
-void BackgroundSchedulePoolTaskInfo::activate()
-{
- std::lock_guard lock(schedule_mutex);
- deactivated = false;
-}
-
-bool BackgroundSchedulePoolTaskInfo::activateAndSchedule()
-{
- std::lock_guard lock(schedule_mutex);
-
- deactivated = false;
- if (scheduled)
- return false;
-
- scheduleImpl(lock);
- return true;
-}
-
-void BackgroundSchedulePoolTaskInfo::execute()
-{
- Stopwatch watch;
- CurrentMetrics::Increment metric_increment{pool.tasks_metric};
-
- std::lock_guard lock_exec(exec_mutex);
-
- {
- std::lock_guard lock_schedule(schedule_mutex);
-
- if (deactivated)
- return;
-
- scheduled = false;
- executing = true;
- }
-
- function();
- UInt64 milliseconds = watch.elapsedMilliseconds();
-
- /// If the task is executed longer than specified time, it will be logged.
- static const int32_t slow_execution_threshold_ms = 200;
-
- if (milliseconds >= slow_execution_threshold_ms)
- LOG_TRACE(&Poco::Logger::get(log_name), "Execution took {} ms.", milliseconds);
-
- {
- std::lock_guard lock_schedule(schedule_mutex);
-
- executing = false;
-
- /// In case was scheduled while executing (including a scheduleAfter which expired) we schedule the task
- /// on the queue. We don't call the function again here because this way all tasks
- /// will have their chance to execute
-
- if (scheduled)
- pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
- }
-}
-
-void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
-{
- scheduled = true;
-
- if (delayed)
- pool.cancelDelayedTask(shared_from_this(), schedule_mutex_lock);
-
- /// If the task is not executing at the moment, enqueue it for immediate execution.
- /// But if it is currently executing, do nothing because it will be enqueued
- /// at the end of the execute() method.
- if (!executing)
- pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
-}
-
-Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback()
-{
- return [t = shared_from_this()](const Coordination::WatchResponse &)
- {
- t->schedule();
- };
-}
-
-
-BackgroundSchedulePool::BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, const char *thread_name_)
- : size(size_)
- , tasks_metric(tasks_metric_)
- , thread_name(thread_name_)
-{
- LOG_INFO(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Create BackgroundSchedulePool with {} threads", size);
-
- threads.resize(size);
- for (auto & thread : threads)
- thread = ThreadFromGlobalPool([this] { threadFunction(); });
-
- delayed_thread = ThreadFromGlobalPool([this] { delayExecutionThreadFunction(); });
-}
-
-
-BackgroundSchedulePool::~BackgroundSchedulePool()
-{
- try
- {
- {
- std::unique_lock lock(delayed_tasks_mutex);
- shutdown = true;
- wakeup_cond.notify_all();
- }
-
- queue.wakeUpAll();
- delayed_thread.join();
-
- LOG_TRACE(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Waiting for threads to finish.");
- for (auto & thread : threads)
- thread.join();
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
-}
-
-
-BackgroundSchedulePool::TaskHolder BackgroundSchedulePool::createTask(const std::string & name, const TaskFunc & function)
-{
- return TaskHolder(std::make_shared<TaskInfo>(*this, name, function));
-}
-
-
-void BackgroundSchedulePool::scheduleDelayedTask(const TaskInfoPtr & task, size_t ms, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */)
-{
- Poco::Timestamp current_time;
-
- {
- std::lock_guard lock(delayed_tasks_mutex);
-
- if (task->delayed)
- delayed_tasks.erase(task->iterator);
-
- task->iterator = delayed_tasks.emplace(current_time + (ms * 1000), task);
- task->delayed = true;
- }
-
- wakeup_cond.notify_all();
-}
-
-
-void BackgroundSchedulePool::cancelDelayedTask(const TaskInfoPtr & task, std::lock_guard<std::mutex> & /* task_schedule_mutex_lock */)
-{
- {
- std::lock_guard lock(delayed_tasks_mutex);
- delayed_tasks.erase(task->iterator);
- task->delayed = false;
- }
-
- wakeup_cond.notify_all();
-}
-
-
-void BackgroundSchedulePool::attachToThreadGroup()
-{
- std::lock_guard lock(delayed_tasks_mutex);
-
- if (thread_group)
- {
- /// Put all threads to one thread pool
- CurrentThread::attachTo(thread_group);
- }
- else
- {
- CurrentThread::initializeQuery();
- thread_group = CurrentThread::getGroup();
- }
-}
-
-
-void BackgroundSchedulePool::threadFunction()
-{
- setThreadName(thread_name.c_str());
-
- attachToThreadGroup();
- SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
-
- while (!shutdown)
- {
- /// We have to wait with timeout to prevent very rare deadlock, caused by the following race condition:
- /// 1. Background thread N: threadFunction(): checks for shutdown (it's false)
- /// 2. Main thread: ~BackgroundSchedulePool(): sets shutdown to true, calls queue.wakeUpAll(), it triggers
- /// all existing Poco::Events inside Poco::NotificationQueue which background threads are waiting on.
- /// 3. Background thread N: threadFunction(): calls queue.waitDequeueNotification(), it creates
- /// new Poco::Event inside Poco::NotificationQueue and starts to wait on it
- /// Background thread N will never be woken up.
- /// TODO Do we really need Poco::NotificationQueue? Why not to use std::queue + mutex + condvar or maybe even DB::ThreadPool?
- constexpr size_t wait_timeout_ms = 500;
- if (Poco::AutoPtr<Poco::Notification> notification = queue.waitDequeueNotification(wait_timeout_ms))
- {
- TaskNotification & task_notification = static_cast<TaskNotification &>(*notification);
- task_notification.execute();
- }
- }
-}
-
-
-void BackgroundSchedulePool::delayExecutionThreadFunction()
-{
- setThreadName((thread_name + "/D").c_str());
-
- attachToThreadGroup();
- SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
-
- while (!shutdown)
- {
- TaskInfoPtr task;
- bool found = false;
-
- {
- std::unique_lock lock(delayed_tasks_mutex);
-
- while (!shutdown)
- {
- Poco::Timestamp min_time;
-
- if (!delayed_tasks.empty())
- {
- auto t = delayed_tasks.begin();
- min_time = t->first;
- task = t->second;
- }
-
- if (!task)
- {
- wakeup_cond.wait(lock);
- continue;
- }
-
- Poco::Timestamp current_time;
-
- if (min_time > current_time)
- {
- wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time));
- continue;
- }
- else
- {
- /// We have a task ready for execution
- found = true;
- break;
- }
- }
- }
-
- if (found)
- task->schedule();
- }
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h
deleted file mode 100644
index 614e6bc33e0..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/BackgroundSchedulePool.h
+++ /dev/null
@@ -1,173 +0,0 @@
-#pragma once
-
-#include <Poco/Notification.h>
-#include <Poco/NotificationQueue.h>
-#include <Poco/Timestamp.h>
-#include <thread>
-#include <atomic>
-#include <mutex>
-#include <condition_variable>
-#include <vector>
-#include <map>
-#include <functional>
-#include <boost/noncopyable.hpp>
-#include <Common/ZooKeeper/Types.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/CurrentThread.h>
-#include <Common/ThreadPool.h>
-
-
-namespace NDB
-{
-
-class TaskNotification;
-class BackgroundSchedulePoolTaskInfo;
-class BackgroundSchedulePoolTaskHolder;
-
-
-/** Executes functions scheduled at a specific point in time.
- * Basically all tasks are added in a queue and precessed by worker threads.
- *
- * The most important difference between this and BackgroundProcessingPool
- * is that we have the guarantee that the same function is not executed from many workers in the same time.
- *
- * The usage scenario: instead starting a separate thread for each task,
- * register a task in BackgroundSchedulePool and when you need to run the task,
- * call schedule or scheduleAfter(duration) method.
- */
-class BackgroundSchedulePool
-{
-public:
- friend class BackgroundSchedulePoolTaskInfo;
-
- using TaskInfo = BackgroundSchedulePoolTaskInfo;
- using TaskInfoPtr = std::shared_ptr<TaskInfo>;
- using TaskFunc = std::function<void()>;
- using TaskHolder = BackgroundSchedulePoolTaskHolder;
- using DelayedTasks = std::multimap<Poco::Timestamp, TaskInfoPtr>;
-
- TaskHolder createTask(const std::string & log_name, const TaskFunc & function);
-
- size_t getNumberOfThreads() const { return size; }
-
- /// thread_name_ cannot be longer then 13 bytes (2 bytes is reserved for "/D" suffix for delayExecutionThreadFunction())
- BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, const char *thread_name_);
- ~BackgroundSchedulePool();
-
-private:
- using Threads = std::vector<ThreadFromGlobalPool>;
-
- void threadFunction();
- void delayExecutionThreadFunction();
-
- /// Schedule task for execution after specified delay from now.
- void scheduleDelayedTask(const TaskInfoPtr & task_info, size_t ms, std::lock_guard<std::mutex> & task_schedule_mutex_lock);
-
- /// Remove task, that was scheduled with delay, from schedule.
- void cancelDelayedTask(const TaskInfoPtr & task_info, std::lock_guard<std::mutex> & task_schedule_mutex_lock);
-
- /// Number for worker threads.
- const size_t size;
- std::atomic<bool> shutdown {false};
- Threads threads;
- Poco::NotificationQueue queue;
-
- /// Delayed notifications.
-
- std::condition_variable wakeup_cond;
- std::mutex delayed_tasks_mutex;
- /// Thread waiting for next delayed task.
- ThreadFromGlobalPool delayed_thread;
- /// Tasks ordered by scheduled time.
- DelayedTasks delayed_tasks;
-
- /// Thread group used for profiling purposes
- ThreadGroupStatusPtr thread_group;
-
- CurrentMetrics::Metric tasks_metric;
- std::string thread_name;
-
- void attachToThreadGroup();
-};
-
-
-class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this<BackgroundSchedulePoolTaskInfo>, private boost::noncopyable
-{
-public:
- BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_);
-
- /// Schedule for execution as soon as possible (if not already scheduled).
- /// If the task was already scheduled with delay, the delay will be ignored.
- bool schedule();
-
- /// Schedule for execution after specified delay.
- /// If overwrite is set then the task will be re-scheduled (if it was already scheduled, i.e. delayed == true).
- bool scheduleAfter(size_t ms, bool overwrite = true);
-
- /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
- void deactivate();
-
- void activate();
-
- /// Atomically activate task and schedule it for execution.
- bool activateAndSchedule();
-
- /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
- Coordination::WatchCallback getWatchCallback();
-
-private:
- friend class TaskNotification;
- friend class BackgroundSchedulePool;
-
- void execute();
-
- void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
-
- BackgroundSchedulePool & pool;
- std::string log_name;
- BackgroundSchedulePool::TaskFunc function;
-
- std::mutex exec_mutex;
- std::mutex schedule_mutex;
-
- /// Invariants:
- /// * If deactivated is true then scheduled, delayed and executing are all false.
- /// * scheduled and delayed cannot be true at the same time.
- bool deactivated = false;
- bool scheduled = false;
- bool delayed = false;
- bool executing = false;
-
- /// If the task is scheduled with delay, points to element of delayed_tasks.
- BackgroundSchedulePool::DelayedTasks::iterator iterator;
-};
-
-using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr<BackgroundSchedulePoolTaskInfo>;
-
-
-class BackgroundSchedulePoolTaskHolder
-{
-public:
- BackgroundSchedulePoolTaskHolder() = default;
- explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {}
- BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete;
- BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
- BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete;
- BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
-
- ~BackgroundSchedulePoolTaskHolder()
- {
- if (task_info)
- task_info->deactivate();
- }
-
- operator bool() const { return task_info != nullptr; }
-
- BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); }
- const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); }
-
-private:
- BackgroundSchedulePoolTaskInfoPtr task_info;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h
deleted file mode 100644
index 98db9f865cf..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/Protocol.h
+++ /dev/null
@@ -1,182 +0,0 @@
-#pragma once
-
-#include <common/types.h>
-
-
-namespace NDB
-{
-
-
-/// Client-server protocol.
-///
-/// Client opens a connection and sends Hello packet.
-/// If client version is incompatible, the server can terminate the connection.
-/// Server responds with Hello packet.
-/// If server version is incompatible, the client can terminate the connection.
-///
-/// The main loop follows:
-///
-/// 1. The client sends Query packet.
-///
-/// Starting from version 50263 immediately after sending the Query packet the client starts
-/// transfer of external (temporary) table (external storages) - one or several Data packets.
-/// End of transmission is marked by an empty block.
-/// At present, non-empty tables can be sent only along with SELECT query.
-///
-/// If the query is an INSERT (and thus requires data transfer from client), then the server transmits
-/// Data packet containing empty block that describes the table structure.
-/// Then the client sends one or several Data packets - data for insertion.
-/// End of data is marked by the transmission of empty block.
-/// Then the server sends EndOfStream packet.
-///
-/// If the query is a SELECT or a query of other type, then the server transmits packets of
-/// one of the following types:
-/// - Data - data corresponding to one block of query results.
-/// - Progress - query execution progress.
-/// - Exception - error description.
-/// - EndOfStream - the end of data transmission.
-///
-/// The client should read packets until EndOfStream or Exception.
-///
-/// The client can also send Cancel packet - a request to cancel the query.
-/// In this case the server can stop executing the query and return incomplete data,
-/// but the client must still read until EndOfStream packet.
-///
-/// Also if there is profiling info and the client revision is recent enough, the server can
-/// send one of the following packets before EndOfStream:
-/// - Totals - a block with total values
-/// - ProfileInfo - serialized BlockStreamProfileInfo structure.
-///
-/// If a query returns data, the server sends an empty header block containing
-/// the description of resulting columns before executing the query.
-/// Using this block the client can initialize the output formatter and display the prefix of resulting table
-/// beforehand.
-
-/// Marker of the inter-server secret (passed in the user name)
-/// (anyway user cannot be started with a whitespace)
-const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET ";
-
-namespace Protocol
-{
- /// Packet types that server transmits.
- namespace Server
- {
- enum Enum
- {
- Hello = 0, /// Name, version, revision.
- Data = 1, /// A block of data (compressed or not).
- Exception = 2, /// The exception during query execution.
- Progress = 3, /// Query execution progress: rows read, bytes read.
- Pong = 4, /// Ping response
- EndOfStream = 5, /// All packets were transmitted
- ProfileInfo = 6, /// Packet with profiling info.
- Totals = 7, /// A block with totals (compressed or not).
- Extremes = 8, /// A block with minimums and maximums (compressed or not).
- TablesStatusResponse = 9, /// A response to TablesStatus request.
- Log = 10, /// System logs of the query execution
- TableColumns = 11, /// Columns' description for default values calculation
- PartUUIDs = 12, /// List of unique parts ids.
- ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed
- /// This is such an inverted logic, where server sends requests
- /// And client returns back response
- MAX = ReadTaskRequest,
- };
-
- /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
- /// would always be true because of compiler optimisation. That would lead to out-of-bounds error
- /// if the packet is invalid.
- /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values
- inline const char * toString(UInt64 packet)
- {
- static const char * data[] = {
- "Hello",
- "Data",
- "Exception",
- "Progress",
- "Pong",
- "EndOfStream",
- "ProfileInfo",
- "Totals",
- "Extremes",
- "TablesStatusResponse",
- "Log",
- "TableColumns",
- "PartUUIDs",
- "ReadTaskRequest"
- };
- return packet <= MAX
- ? data[packet]
- : "Unknown packet";
- }
-
- inline size_t stringsInMessage(UInt64 msg_type)
- {
- switch (msg_type)
- {
- case TableColumns:
- return 2;
- default:
- break;
- }
- return 0;
- }
- }
-
- /// Packet types that client transmits.
- namespace Client
- {
- enum Enum
- {
- Hello = 0, /// Name, version, revision, default DB
- Query = 1, /// Query id, query settings, stage up to which the query must be executed,
- /// whether the compression must be used,
- /// query text (without data for INSERTs).
- Data = 2, /// A block of data (compressed or not).
- Cancel = 3, /// Cancel the query execution.
- Ping = 4, /// Check that connection to the server is alive.
- TablesStatusRequest = 5, /// Check status of tables on the server.
- KeepAlive = 6, /// Keep the connection alive
- Scalar = 7, /// A block of data (compressed or not).
- IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing
- ReadTaskResponse = 9, /// TODO:
-
- MAX = ReadTaskResponse,
- };
-
- inline const char * toString(UInt64 packet)
- {
- static const char * data[] = {
- "Hello",
- "Query",
- "Data",
- "Cancel",
- "Ping",
- "TablesStatusRequest",
- "KeepAlive",
- "Scalar",
- "IgnoredPartUUIDs",
- "ReadTaskResponse",
- };
- return packet <= MAX
- ? data[packet]
- : "Unknown packet";
- }
- }
-
- /// Whether the compression must be used.
- enum class Compression
- {
- Disable = 0,
- Enable = 1,
- };
-
- /// Whether the ssl must be used.
- enum class Secure
- {
- Disable = 0,
- Enable = 1,
- };
-
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/QueryProcessingStage.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/QueryProcessingStage.cpp
deleted file mode 100644
index 856cfa787ea..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/QueryProcessingStage.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <Core/QueryProcessingStage.h>
-#include <Common/Exception.h>
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int BAD_ARGUMENTS;
-}
-
-namespace QueryProcessingStage
-{
-
- Enum fromString(const std::string & stage_string)
- {
- Enum stage;
-
- if (stage_string == "complete")
- stage = Complete;
- else if (stage_string == "fetch_columns")
- stage = FetchColumns;
- else if (stage_string == "with_mergeable_state")
- stage = WithMergeableState;
- else if (stage_string == "with_mergeable_state_after_aggregation")
- stage = WithMergeableStateAfterAggregation;
- else if (stage_string == "with_mergeable_state_after_aggregation_and_limit")
- stage = WithMergeableStateAfterAggregationAndLimit;
- else
- throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown query processing stage: {}", stage_string);
-
- return stage;
- }
-
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/SortDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/SortDescription.cpp
deleted file mode 100644
index 4ff0e47c040..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/SortDescription.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-#include <Core/SortDescription.h>
-#include <Core/Block.h>
-#include <IO/Operators.h>
-#include <Common/JSONBuilder.h>
-
-namespace NDB
-{
-
-void dumpSortDescription(const SortDescription & description, const Block & header, WriteBuffer & out)
-{
- bool first = true;
-
- for (const auto & desc : description)
- {
- if (!first)
- out << ", ";
- first = false;
-
- if (!desc.column_name.empty())
- out << desc.column_name;
- else
- {
- if (desc.column_number < header.columns())
- out << header.getByPosition(desc.column_number).name;
- else
- out << "?";
-
- out << " (pos " << desc.column_number << ")";
- }
-
- if (desc.direction > 0)
- out << " ASC";
- else
- out << " DESC";
-
- if (desc.with_fill)
- out << " WITH FILL";
- }
-}
-
-void SortColumnDescription::explain(JSONBuilder::JSONMap & map, const Block & header) const
-{
- if (!column_name.empty())
- map.add("Column", column_name);
- else
- {
- if (column_number < header.columns())
- map.add("Column", header.getByPosition(column_number).name);
-
- map.add("Position", column_number);
- }
-
- map.add("Ascending", direction > 0);
- map.add("With Fill", with_fill);
-}
-
-std::string dumpSortDescription(const SortDescription & description)
-{
- WriteBufferFromOwnString wb;
- dumpSortDescription(description, Block{}, wb);
- return wb.str();
-}
-
-JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description, const Block & header)
-{
- auto json_array = std::make_unique<JSONBuilder::JSONArray>();
- for (const auto & descr : description)
- {
- auto json_map = std::make_unique<JSONBuilder::JSONMap>();
- descr.explain(*json_map, header);
- json_array->add(std::move(json_map));
- }
-
- return json_array;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/UUID.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Core/UUID.cpp
deleted file mode 100644
index c18a80f7e54..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Core/UUID.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <Core/UUID.h>
-#include <Common/thread_local_rng.h>
-
-
-namespace NDB
-{
-
-namespace UUIDHelpers
-{
- UUID generateV4()
- {
- UInt128 res{thread_local_rng(), thread_local_rng()};
- res.items[0] = (res.items[0] & 0xffffffffffff0fffull) | 0x0000000000004000ull;
- res.items[1] = (res.items[1] & 0x3fffffffffffffffull) | 0x8000000000000000ull;
- return UUID{res};
- }
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/BlockIO.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/BlockIO.cpp
deleted file mode 100644
index 44daa32245d..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/BlockIO.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <DataStreams/BlockIO.h>
-#include <Interpreters/ProcessList.h>
-#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-BlockInputStreamPtr BlockIO::getInputStream()
-{
- if (out)
- throw Exception("Cannot get input stream from BlockIO because output stream is not empty",
- ErrorCodes::LOGICAL_ERROR);
-
- if (in)
- return in;
-
- if (pipeline.initialized())
- return std::make_shared<PipelineExecutingBlockInputStream>(std::move(pipeline));
-
- throw Exception("Cannot get input stream from BlockIO because query pipeline was not initialized",
- ErrorCodes::LOGICAL_ERROR);
-}
-
-void BlockIO::reset()
-{
- /** process_list_entry should be destroyed after in, after out and after pipeline,
- * since in, out and pipeline contain pointer to objects inside process_list_entry (query-level MemoryTracker for example),
- * which could be used before destroying of in and out.
- *
- * However, QueryStatus inside process_list_entry holds shared pointers to streams for some reason.
- * Streams must be destroyed before storage locks, storages and contexts inside pipeline,
- * so releaseQueryStreams() is required.
- */
- /// TODO simplify it all
-
- out.reset();
- in.reset();
- if (process_list_entry)
- process_list_entry->get().releaseQueryStreams();
- pipeline.reset();
- process_list_entry.reset();
-
- /// TODO Do we need also reset callbacks? In which order?
-}
-
-BlockIO & BlockIO::operator= (BlockIO && rhs)
-{
- if (this == &rhs)
- return *this;
-
- /// Explicitly reset fields, so everything is destructed in right order
- reset();
-
- process_list_entry = std::move(rhs.process_list_entry);
- in = std::move(rhs.in);
- out = std::move(rhs.out);
- pipeline = std::move(rhs.pipeline);
-
- finish_callback = std::move(rhs.finish_callback);
- exception_callback = std::move(rhs.exception_callback);
-
- null_format = std::move(rhs.null_format);
-
- return *this;
-}
-
-BlockIO::~BlockIO()
-{
- reset();
-}
-
-}
-
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h
deleted file mode 100644
index 887a7ebb8f2..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/PushingToViewsBlockOutputStream.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#pragma once
-
-#include <DataStreams/IBlockOutputStream.h>
-#include <Interpreters/QueryViewsLog.h>
-#include <Parsers/IAST_fwd.h>
-#include <Storages/IStorage.h>
-#include <Common/Stopwatch.h>
-
-namespace Poco
-{
-class Logger;
-}
-
-namespace NDB
-{
-
-class ReplicatedMergeTreeSink;
-
-struct ViewRuntimeData
-{
- const ASTPtr query;
- StorageID table_id;
- BlockOutputStreamPtr out;
- std::exception_ptr exception;
- QueryViewsLogElement::ViewRuntimeStats runtime_stats;
-
- void setException(std::exception_ptr e)
- {
- exception = e;
- runtime_stats.setStatus(QueryViewsLogElement::ViewStatus::EXCEPTION_WHILE_PROCESSING);
- }
-};
-
-/** Writes data to the specified table and to all dependent materialized views.
- */
-class PushingToViewsBlockOutputStream : public IBlockOutputStream, WithContext
-{
-public:
- PushingToViewsBlockOutputStream(
- const StoragePtr & storage_,
- const StorageMetadataPtr & metadata_snapshot_,
- ContextPtr context_,
- const ASTPtr & query_ptr_,
- bool no_destination = false);
-
- Block getHeader() const override;
- void write(const Block & block) override;
-
- void flush() override;
- void writePrefix() override;
- void writeSuffix() override;
- void onProgress(const Progress & progress) override;
-
-private:
- StoragePtr storage;
- StorageMetadataPtr metadata_snapshot;
- BlockOutputStreamPtr output;
- ReplicatedMergeTreeSink * replicated_output = nullptr;
- Poco::Logger * log;
-
- ASTPtr query_ptr;
- Stopwatch main_watch;
-
- std::vector<ViewRuntimeData> views;
- ContextMutablePtr select_context;
- ContextMutablePtr insert_context;
-
- void process(const Block & block, ViewRuntimeData & view);
- void checkExceptionsInViews();
- void logQueryViews();
-};
-
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp
deleted file mode 100644
index 9d3a5ccf4dd..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-#include <DataStreams/RemoteBlockInputStream.h>
-#include <Interpreters/Context.h>
-
-namespace NDB
-{
-
-RemoteBlockInputStream::RemoteBlockInputStream(
- Connection & connection,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
- : query_executor(connection, query_, header_, context_, throttler, scalars_, external_tables_, stage_)
-{
- init();
-}
-
-RemoteBlockInputStream::RemoteBlockInputStream(
- const ConnectionPoolWithFailoverPtr & pool,
- std::vector<IConnectionPool::Entry> && connections,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
- : query_executor(pool, std::move(connections), query_, header_, context_, throttler, scalars_, external_tables_, stage_)
-{
- init();
-}
-
-RemoteBlockInputStream::RemoteBlockInputStream(
- const ConnectionPoolWithFailoverPtr & pool,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
- : query_executor(pool, query_, header_, context_, throttler, scalars_, external_tables_, stage_)
-{
- init();
-}
-
-void RemoteBlockInputStream::init()
-{
- query_executor.setProgressCallback([this](const Progress & progress) { progressImpl(progress); });
- query_executor.setProfileInfoCallback([this](const BlockStreamProfileInfo & info_) { info.setFrom(info_, true); });
- query_executor.setLogger(log);
-}
-
-void RemoteBlockInputStream::cancel(bool kill)
-{
- if (kill)
- is_killed = true;
-
- bool old_val = false;
- if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
- return;
-
- query_executor.cancel();
-}
-
-Block RemoteBlockInputStream::readImpl()
-{
- auto block = query_executor.read();
-
- if (isCancelledOrThrowIfKilled())
- return Block();
-
- return block;
-}
-
-void RemoteBlockInputStream::readSuffixImpl()
-{
- query_executor.finish();
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h
deleted file mode 100644
index 043e09bbd0a..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteBlockInputStream.h
+++ /dev/null
@@ -1,78 +0,0 @@
-#pragma once
-
-#include <optional>
-
-#include <common/logger_useful.h>
-
-#include <DataStreams/IBlockInputStream.h>
-#include <Common/Throttler.h>
-#include <Client/ConnectionPool.h>
-#include <Client/MultiplexedConnections.h>
-#include <Interpreters/Cluster.h>
-
-#include <DataStreams/RemoteQueryExecutor.h>
-
-namespace NDB
-{
-
-class Context;
-
-/** This class allows one to launch queries on remote replicas of one shard and get results
- */
-class RemoteBlockInputStream : public IBlockInputStream
-{
-public:
- /// Takes already set connection.
- RemoteBlockInputStream(
- Connection & connection,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
- QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
-
- /// Accepts several connections already taken from pool.
- RemoteBlockInputStream(
- const ConnectionPoolWithFailoverPtr & pool,
- std::vector<IConnectionPool::Entry> && connections,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
- QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
-
- /// Takes a pool and gets one or several connections from it.
- RemoteBlockInputStream(
- const ConnectionPoolWithFailoverPtr & pool,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
- QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
-
- /// Set the query_id. For now, used by performance test to later find the query
- /// in the server query_log. Must be called before sending the query to the server.
- void setQueryId(const std::string & query_id) { query_executor.setQueryId(query_id); }
-
- /// Specify how we allocate connections on a shard.
- void setPoolMode(PoolMode pool_mode) { query_executor.setPoolMode(pool_mode); }
-
- void setMainTable(StorageID main_table_) { query_executor.setMainTable(std::move(main_table_)); }
-
- /// Prevent default progress notification because progress' callback is called by its own.
- void progress(const Progress & /*value*/) override {}
-
- void cancel(bool kill) override;
-
- String getName() const override { return "Remote"; }
-
- Block getHeader() const override { return query_executor.getHeader(); }
- Block getTotals() override { return query_executor.getTotals(); }
- Block getExtremes() override { return query_executor.getExtremes(); }
-
-protected:
- Block readImpl() override;
- void readSuffixImpl() override;
-
-private:
- RemoteQueryExecutor query_executor;
- Poco::Logger * log = &Poco::Logger::get("RemoteBlockInputStream");
-
- void init();
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp
deleted file mode 100644
index 544a1027d48..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.cpp
+++ /dev/null
@@ -1,558 +0,0 @@
-#include <DataStreams/ConnectionCollector.h>
-#include <DataStreams/RemoteQueryExecutor.h>
-#include <DataStreams/RemoteQueryExecutorReadContext.h>
-
-#include <Columns/ColumnConst.h>
-#include <Common/CurrentThread.h>
-#include <Processors/Pipe.h>
-#include <Processors/Sources/SourceFromSingleChunk.h>
-#include <Storages/IStorage.h>
-#include <Storages/SelectQueryInfo.h>
-#include <Interpreters/castColumn.h>
-#include <Interpreters/Cluster.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/InternalTextLogsQueue.h>
-#include <IO/ConnectionTimeoutsContext.h>
-#include <Client/MultiplexedConnections.h>
-#include <Client/HedgedConnections.h>
-#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
-
-
-namespace CurrentMetrics
-{
- extern const Metric SyncDrainedConnections;
- extern const Metric ActiveSyncDrainedConnections;
-}
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int UNKNOWN_PACKET_FROM_SERVER;
- extern const int DUPLICATED_PART_UUIDS;
-}
-
-RemoteQueryExecutor::RemoteQueryExecutor(
- const String & query_, const Block & header_, ContextPtr context_,
- const Scalars & scalars_, const Tables & external_tables_,
- QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
- : header(header_), query(query_), context(context_), scalars(scalars_)
- , external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
-{}
-
-RemoteQueryExecutor::RemoteQueryExecutor(
- Connection & connection,
- const String & query_, const Block & header_, ContextPtr context_,
- ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
- QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
- : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_)
-{
- create_connections = [this, &connection, throttler]()
- {
- return std::make_shared<MultiplexedConnections>(connection, context->getSettingsRef(), throttler);
- };
-}
-
-RemoteQueryExecutor::RemoteQueryExecutor(
- std::shared_ptr<Connection> connection_ptr,
- const String & query_, const Block & header_, ContextPtr context_,
- ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
- QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
- : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_)
-{
- create_connections = [this, connection_ptr, throttler]()
- {
- return std::make_shared<MultiplexedConnections>(connection_ptr, context->getSettingsRef(), throttler);
- };
-}
-
-RemoteQueryExecutor::RemoteQueryExecutor(
- const ConnectionPoolWithFailoverPtr & pool_,
- std::vector<IConnectionPool::Entry> && connections_,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
- QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
- : header(header_), query(query_), context(context_)
- , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_)
-{
- create_connections = [this, connections_, throttler]() mutable {
- return std::make_shared<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
- };
-}
-
-RemoteQueryExecutor::RemoteQueryExecutor(
- const ConnectionPoolWithFailoverPtr & pool_,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
- QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
- : header(header_), query(query_), context(context_)
- , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_)
-{
- create_connections = [this, throttler]()->std::shared_ptr<IConnections>
- {
- const Settings & current_settings = context->getSettingsRef();
- auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
-
-#if defined(OS_LINUX)
- if (current_settings.use_hedged_requests)
- {
- std::shared_ptr<QualifiedTableName> table_to_check = nullptr;
- if (main_table)
- table_to_check = std::make_shared<QualifiedTableName>(main_table.getQualifiedName());
-
- return std::make_shared<HedgedConnections>(pool, context, timeouts, throttler, pool_mode, table_to_check);
- }
-#endif
-
- std::vector<IConnectionPool::Entry> connection_entries;
- if (main_table)
- {
- auto try_results = pool->getManyChecked(timeouts, &current_settings, pool_mode, main_table.getQualifiedName());
- connection_entries.reserve(try_results.size());
- for (auto & try_result : try_results)
- connection_entries.emplace_back(std::move(try_result.entry));
- }
- else
- connection_entries = pool->getMany(timeouts, &current_settings, pool_mode);
-
- return std::make_shared<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
- };
-}
-
-RemoteQueryExecutor::~RemoteQueryExecutor()
-{
- /** If interrupted in the middle of the loop of communication with replicas, then interrupt
- * all connections, then read and skip the remaining packets to make sure
- * these connections did not remain hanging in the out-of-sync state.
- */
- if (established || isQueryPending())
- connections->disconnect();
-}
-
-/** If we receive a block with slightly different column types, or with excessive columns,
- * we will adapt it to expected structure.
- */
-static Block adaptBlockStructure(const Block & block, const Block & header)
-{
- /// Special case when reader doesn't care about result structure. Deprecated and used only in Benchmark, PerformanceTest.
- if (!header)
- return block;
-
- Block res;
- res.info = block.info;
-
- for (const auto & elem : header)
- {
- ColumnPtr column;
-
- if (elem.column && isColumnConst(*elem.column))
- {
- /// We expect constant column in block.
- /// If block is not empty, then get value for constant from it,
- /// because it may be different for remote server for functions like version(), uptime(), ...
- if (block.rows() > 0 && block.has(elem.name))
- {
- /// Const column is passed as materialized. Get first value from it.
- ///
- /// TODO: check that column contains the same value.
- /// TODO: serialize const columns.
- auto col = block.getByName(elem.name);
- col.column = block.getByName(elem.name).column->cut(0, 1);
-
- column = castColumn(col, elem.type);
-
- if (!isColumnConst(*column))
- column = ColumnConst::create(column, block.rows());
- else
- /// It is not possible now. Just in case we support const columns serialization.
- column = column->cloneResized(block.rows());
- }
- else
- column = elem.column->cloneResized(block.rows());
- }
- else
- column = castColumn(block.getByName(elem.name), elem.type);
-
- res.insert({column, elem.type, elem.name});
- }
- return res;
-}
-
-void RemoteQueryExecutor::sendQuery()
-{
- if (sent_query)
- return;
-
- connections = create_connections();
-
- const auto & settings = context->getSettingsRef();
- if (settings.skip_unavailable_shards && 0 == connections->size())
- return;
-
- /// Query cannot be canceled in the middle of the send query,
- /// since there are multiple packets:
- /// - Query
- /// - Data (multiple times)
- ///
- /// And after the Cancel packet none Data packet can be sent, otherwise the remote side will throw:
- ///
- /// Unexpected packet Data received from client
- ///
- std::lock_guard guard(was_cancelled_mutex);
-
- established = true;
- was_cancelled = false;
-
- auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
- ClientInfo modified_client_info = context->getClientInfo();
- modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
- if (CurrentThread::isInitialized())
- {
- modified_client_info.client_trace_context = CurrentThread::get().thread_trace_context;
- }
-
- {
- std::lock_guard lock(duplicated_part_uuids_mutex);
- if (!duplicated_part_uuids.empty())
- connections->sendIgnoredPartUUIDs(duplicated_part_uuids);
- }
-
- connections->sendQuery(timeouts, query, query_id, stage, modified_client_info, true);
-
- established = false;
- sent_query = true;
-
- if (settings.enable_scalar_subquery_optimization)
- sendScalars();
- sendExternalTables();
-}
-
-Block RemoteQueryExecutor::read()
-{
- if (!sent_query)
- {
- sendQuery();
-
- if (context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()))
- return {};
- }
-
- while (true)
- {
- if (was_cancelled)
- return Block();
-
- Packet packet = connections->receivePacket();
-
- if (auto block = processPacket(std::move(packet)))
- return *block;
- else if (got_duplicated_part_uuids)
- return std::get<Block>(restartQueryWithoutDuplicatedUUIDs());
- }
-}
-
-std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext> & read_context [[maybe_unused]])
-{
-
-#if defined(OS_LINUX)
- if (!sent_query)
- {
- sendQuery();
-
- if (context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()))
- return Block();
- }
-
- if (!read_context || resent_query)
- {
- std::lock_guard lock(was_cancelled_mutex);
- if (was_cancelled)
- return Block();
-
- read_context = std::make_unique<ReadContext>(*connections);
- }
-
- do
- {
- if (!read_context->resumeRoutine())
- return Block();
-
- if (read_context->is_read_in_progress.load(std::memory_order_relaxed))
- {
- read_context->setTimer();
- return read_context->epoll.getFileDescriptor();
- }
- else
- {
- if (auto data = processPacket(std::move(read_context->packet)))
- return std::move(*data);
- else if (got_duplicated_part_uuids)
- return restartQueryWithoutDuplicatedUUIDs(&read_context);
- }
- }
- while (true);
-#else
- return read();
-#endif
-}
-
-
-std::variant<Block, int> RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context)
-{
- /// Cancel previous query and disconnect before retry.
- cancel(read_context);
- connections->disconnect();
-
- /// Only resend once, otherwise throw an exception
- if (!resent_query)
- {
- if (log)
- LOG_DEBUG(log, "Found duplicate UUIDs, will retry query without those parts");
-
- resent_query = true;
- sent_query = false;
- got_duplicated_part_uuids = false;
- /// Consecutive read will implicitly send query first.
- if (!read_context)
- return read();
- else
- return read(*read_context);
- }
- throw Exception("Found duplicate uuids while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
-}
-
-std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
-{
- switch (packet.type)
- {
- case Protocol::Server::ReadTaskRequest:
- processReadTaskRequest();
- break;
- case Protocol::Server::PartUUIDs:
- if (!setPartUUIDs(packet.part_uuids))
- got_duplicated_part_uuids = true;
- break;
- case Protocol::Server::Data:
- /// If the block is not empty and is not a header block
- if (packet.block && (packet.block.rows() > 0))
- return adaptBlockStructure(packet.block, header);
- break; /// If the block is empty - we will receive other packets before EndOfStream.
-
- case Protocol::Server::Exception:
- got_exception_from_replica = true;
- packet.exception->rethrow();
- break;
-
- case Protocol::Server::EndOfStream:
- if (!connections->hasActiveConnections())
- {
- finished = true;
- return Block();
- }
- break;
-
- case Protocol::Server::Progress:
- /** We use the progress from a remote server.
- * We also include in ProcessList,
- * and we use it to check
- * constraints (for example, the minimum speed of query execution)
- * and quotas (for example, the number of lines to read).
- */
- if (progress_callback)
- progress_callback(packet.progress);
- break;
-
- case Protocol::Server::ProfileInfo:
- /// Use own (client-side) info about read bytes, it is more correct info than server-side one.
- if (profile_info_callback)
- profile_info_callback(packet.profile_info);
- break;
-
- case Protocol::Server::Totals:
- totals = packet.block;
- break;
-
- case Protocol::Server::Extremes:
- extremes = packet.block;
- break;
-
- case Protocol::Server::Log:
- /// Pass logs from remote server to client
- if (auto log_queue = CurrentThread::getInternalTextLogsQueue())
- log_queue->pushBlock(std::move(packet.block));
- break;
-
- default:
- got_unknown_packet_from_replica = true;
- throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}",
- toString(packet.type),
- connections->dumpAddresses());
- }
-
- return {};
-}
-
-bool RemoteQueryExecutor::setPartUUIDs(const std::vector<UUID> & uuids)
-{
- auto query_context = context->getQueryContext();
- auto duplicates = query_context->getPartUUIDs()->add(uuids);
-
- if (!duplicates.empty())
- {
- std::lock_guard lock(duplicated_part_uuids_mutex);
- duplicated_part_uuids.insert(duplicated_part_uuids.begin(), duplicates.begin(), duplicates.end());
- return false;
- }
- return true;
-}
-
-void RemoteQueryExecutor::processReadTaskRequest()
-{
- if (!task_iterator)
- throw Exception("Distributed task iterator is not initialized", ErrorCodes::LOGICAL_ERROR);
- auto response = (*task_iterator)();
- connections->sendReadTaskResponse(response);
-}
-
-void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
-{
- /** If one of:
- * - nothing started to do;
- * - received all packets before EndOfStream;
- * - received exception from one replica;
- * - received an unknown packet from one replica;
- * then you do not need to read anything.
- */
- if (!isQueryPending() || hasThrownException())
- return;
-
- /** If you have not read all the data yet, but they are no longer needed.
- * This may be due to the fact that the data is sufficient (for example, when using LIMIT).
- */
-
- /// Send the request to abort the execution of the request, if not already sent.
- tryCancel("Cancelling query because enough data has been read", read_context);
- /// Try to drain connections asynchronously.
- if (auto conn = ConnectionCollector::enqueueConnectionCleanup(pool, connections))
- {
- /// Drain connections synchronously.
- CurrentMetrics::Increment metric_increment(CurrentMetrics::ActiveSyncDrainedConnections);
- ConnectionCollector::drainConnections(*conn);
- CurrentMetrics::add(CurrentMetrics::SyncDrainedConnections, 1);
- }
- finished = true;
-}
-
-void RemoteQueryExecutor::cancel(std::unique_ptr<ReadContext> * read_context)
-{
- {
- std::lock_guard lock(external_tables_mutex);
-
- /// Stop sending external data.
- for (auto & vec : external_tables_data)
- for (auto & elem : vec)
- elem->is_cancelled = true;
- }
-
- if (!isQueryPending() || hasThrownException())
- return;
-
- tryCancel("Cancelling query", read_context);
-}
-
-void RemoteQueryExecutor::sendScalars()
-{
- connections->sendScalarsData(scalars);
-}
-
-void RemoteQueryExecutor::sendExternalTables()
-{
- size_t count = connections->size();
-
- {
- std::lock_guard lock(external_tables_mutex);
-
- external_tables_data.clear();
- external_tables_data.reserve(count);
-
- for (size_t i = 0; i < count; ++i)
- {
- ExternalTablesData res;
- for (const auto & table : external_tables)
- {
- StoragePtr cur = table.second;
-
- auto data = std::make_unique<ExternalTableData>();
- data->table_name = table.first;
- data->creating_pipe_callback = [cur, context = this->context]()
- {
- SelectQueryInfo query_info;
- auto metadata_snapshot = cur->getInMemoryMetadataPtr();
- QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(
- context, QueryProcessingStage::Complete, metadata_snapshot, query_info);
-
- Pipe pipe = cur->read(
- metadata_snapshot->getColumns().getNamesOfPhysical(),
- metadata_snapshot, query_info, context,
- read_from_table_stage, DEFAULT_BLOCK_SIZE, 1);
-
- if (pipe.empty())
- return std::make_unique<Pipe>(
- std::make_shared<SourceFromSingleChunk>(metadata_snapshot->getSampleBlock(), Chunk()));
-
- return std::make_unique<Pipe>(std::move(pipe));
- };
-
- data->pipe = data->creating_pipe_callback();
- res.emplace_back(std::move(data));
- }
- external_tables_data.push_back(std::move(res));
- }
- }
-
- connections->sendExternalTablesData(external_tables_data);
-}
-
-void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context)
-{
- /// Flag was_cancelled is atomic because it is checked in read().
- std::lock_guard guard(was_cancelled_mutex);
-
- if (was_cancelled)
- return;
-
- was_cancelled = true;
-
- if (read_context && *read_context)
- {
- /// The timer should be set for query cancellation to avoid query cancellation hung.
- ///
- /// Since in case the remote server will abnormally terminated, neither
- /// FIN nor RST packet will be sent, and the initiator will not know that
- /// the connection died (unless tcp_keep_alive_timeout > 0).
- ///
- /// Also note that it is possible to get this situation even when
- /// enough data already had been read.
- (*read_context)->setTimer();
- (*read_context)->cancel();
- }
-
- connections->sendCancel();
-
- if (log)
- LOG_TRACE(log, "({}) {}", connections->dumpAddresses(), reason);
-}
-
-bool RemoteQueryExecutor::isQueryPending() const
-{
- return sent_query && !finished;
-}
-
-bool RemoteQueryExecutor::hasThrownException() const
-{
- return got_exception_from_replica || got_unknown_packet_from_replica;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h
deleted file mode 100644
index ad0af68bc0f..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutor.h
+++ /dev/null
@@ -1,227 +0,0 @@
-#pragma once
-
-#include <Client/ConnectionPool.h>
-#include <Client/IConnections.h>
-#include <Client/ConnectionPoolWithFailover.h>
-#include <Storages/IStorage_fwd.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/StorageID.h>
-#include <Common/TimerDescriptor.h>
-#include <variant>
-
-
-namespace NDB
-{
-
-class Context;
-
-class Throttler;
-using ThrottlerPtr = std::shared_ptr<Throttler>;
-
-struct Progress;
-using ProgressCallback = std::function<void(const Progress & progress)>;
-
-struct BlockStreamProfileInfo;
-using ProfileInfoCallback = std::function<void(const BlockStreamProfileInfo & info)>;
-
-class RemoteQueryExecutorReadContext;
-
-/// This is the same type as StorageS3Source::IteratorWrapper
-using TaskIterator = std::function<String()>;
-
-/// This class allows one to launch queries on remote replicas of one shard and get results
-class RemoteQueryExecutor
-{
-public:
- using ReadContext = RemoteQueryExecutorReadContext;
-
- /// Takes already set connection.
- /// We don't own connection, thus we have to drain it synchronously.
- RemoteQueryExecutor(
- Connection & connection,
- const String & query_, const Block & header_, ContextPtr context_,
- ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
- QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
-
- /// Takes already set connection.
- RemoteQueryExecutor(
- std::shared_ptr<Connection> connection,
- const String & query_, const Block & header_, ContextPtr context_,
- ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
- QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
-
- /// Accepts several connections already taken from pool.
- RemoteQueryExecutor(
- const ConnectionPoolWithFailoverPtr & pool,
- std::vector<IConnectionPool::Entry> && connections_,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
- QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
-
- /// Takes a pool and gets one or several connections from it.
- RemoteQueryExecutor(
- const ConnectionPoolWithFailoverPtr & pool,
- const String & query_, const Block & header_, ContextPtr context_,
- const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
- QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
-
- ~RemoteQueryExecutor();
-
- /// Create connection and send query, external tables and scalars.
- void sendQuery();
-
- /// Query is resent to a replica, the query itself can be modified.
- std::atomic<bool> resent_query { false };
-
- /// Read next block of data. Returns empty block if query is finished.
- Block read();
-
- /// Async variant of read. Returns ready block or file descriptor which may be used for polling.
- /// ReadContext is an internal read state. Pass empty ptr first time, reuse created one for every call.
- std::variant<Block, int> read(std::unique_ptr<ReadContext> & read_context);
-
- /// Receive all remain packets and finish query.
- /// It should be cancelled after read returned empty block.
- void finish(std::unique_ptr<ReadContext> * read_context = nullptr);
-
- /// Cancel query execution. Sends Cancel packet and ignore others.
- /// This method may be called from separate thread.
- void cancel(std::unique_ptr<ReadContext> * read_context = nullptr);
-
- /// Get totals and extremes if any.
- Block getTotals() { return std::move(totals); }
- Block getExtremes() { return std::move(extremes); }
-
- /// Set callback for progress. It will be called on Progress packet.
- void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); }
-
- /// Set callback for profile info. It will be called on ProfileInfo packet.
- void setProfileInfoCallback(ProfileInfoCallback callback) { profile_info_callback = std::move(callback); }
-
- /// Set the query_id. For now, used by performance test to later find the query
- /// in the server query_log. Must be called before sending the query to the server.
- void setQueryId(const std::string& query_id_) { assert(!sent_query); query_id = query_id_; }
-
- /// Specify how we allocate connections on a shard.
- void setPoolMode(PoolMode pool_mode_) { pool_mode = pool_mode_; }
-
- void setMainTable(StorageID main_table_) { main_table = std::move(main_table_); }
-
- void setLogger(Poco::Logger * logger) { log = logger; }
-
- const Block & getHeader() const { return header; }
-
-private:
- RemoteQueryExecutor(
- const String & query_, const Block & header_, ContextPtr context_,
- const Scalars & scalars_, const Tables & external_tables_,
- QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_);
-
- Block header;
- Block totals;
- Block extremes;
-
- const String query;
- String query_id;
- ContextPtr context;
-
- ProgressCallback progress_callback;
- ProfileInfoCallback profile_info_callback;
-
- /// Scalars needed to be sent to remote servers
- Scalars scalars;
- /// Temporary tables needed to be sent to remote servers
- Tables external_tables;
- QueryProcessingStage::Enum stage;
- /// Initiator identifier for distributed task processing
- std::shared_ptr<TaskIterator> task_iterator;
-
- std::function<std::shared_ptr<IConnections>()> create_connections;
- /// Hold a shared reference to the connection pool so that asynchronous connection draining will
- /// work safely. Make sure it's the first member so that we don't destruct it too early.
- const ConnectionPoolWithFailoverPtr pool;
- std::shared_ptr<IConnections> connections;
-
- /// Streams for reading from temporary tables and following sending of data
- /// to remote servers for GLOBAL-subqueries
- std::vector<ExternalTablesData> external_tables_data;
- std::mutex external_tables_mutex;
-
- /// Connections to replicas are established, but no queries are sent yet
- std::atomic<bool> established { false };
-
- /// Query is sent (used before getting first block)
- std::atomic<bool> sent_query { false };
-
- /** All data from all replicas are received, before EndOfStream packet.
- * To prevent desynchronization, if not all data is read before object
- * destruction, it's required to send cancel query request to replicas and
- * read all packets before EndOfStream
- */
- std::atomic<bool> finished { false };
-
- /** Cancel query request was sent to all replicas because data is not needed anymore
- * This behaviour may occur when:
- * - data size is already satisfactory (when using LIMIT, for example)
- * - an exception was thrown from client side
- */
- std::atomic<bool> was_cancelled { false };
- std::mutex was_cancelled_mutex;
-
- /** An exception from replica was received. No need in receiving more packets or
- * requesting to cancel query execution
- */
- std::atomic<bool> got_exception_from_replica { false };
-
- /** Unknown packet was received from replica. No need in receiving more packets or
- * requesting to cancel query execution
- */
- std::atomic<bool> got_unknown_packet_from_replica { false };
-
- /** Got duplicated uuids from replica
- */
- std::atomic<bool> got_duplicated_part_uuids{ false };
-
- /// Parts uuids, collected from remote replicas
- std::mutex duplicated_part_uuids_mutex;
- std::vector<UUID> duplicated_part_uuids;
-
- PoolMode pool_mode = PoolMode::GET_MANY;
- StorageID main_table = StorageID::createEmpty();
-
- Poco::Logger * log = nullptr;
-
- /// Send all scalars to remote servers
- void sendScalars();
-
- /// Send all temporary tables to remote servers
- void sendExternalTables();
-
- /// Set part uuids to a query context, collected from remote replicas.
- /// Return true if duplicates found.
- bool setPartUUIDs(const std::vector<UUID> & uuids);
-
- void processReadTaskRequest();
-
- /// Cancell query and restart it with info about duplicated UUIDs
- /// only for `allow_experimental_query_deduplication`.
- std::variant<Block, int> restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context = nullptr);
-
- /// If wasn't sent yet, send request to cancel all connections to replicas
- void tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context);
-
- /// Returns true if query was sent
- bool isQueryPending() const;
-
- /// Returns true if exception was thrown
- bool hasThrownException() const;
-
- /// Process packet for read and return data block if possible.
- std::optional<Block> processPacket(Packet packet);
-
- /// Reads packet by packet
- Block readPackets();
-
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutorReadContext.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutorReadContext.cpp
deleted file mode 100644
index d195654621e..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutorReadContext.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-#if defined(OS_LINUX)
-
-#include <DataStreams/RemoteQueryExecutorReadContext.h>
-#include <Common/Exception.h>
-#include <Common/NetException.h>
-#include <Client/IConnections.h>
-#include <sys/epoll.h>
-
-namespace NDB
-{
-
-struct RemoteQueryExecutorRoutine
-{
- IConnections & connections;
- RemoteQueryExecutorReadContext & read_context;
-
- struct ReadCallback
- {
- RemoteQueryExecutorReadContext & read_context;
- Fiber & fiber;
-
- void operator()(int fd, Poco::Timespan timeout = 0, const std::string fd_description = "")
- {
- try
- {
- read_context.setConnectionFD(fd, timeout, fd_description);
- }
- catch (DB::Exception & e)
- {
- e.addMessage(" while reading from {}", fd_description);
- throw;
- }
-
- read_context.is_read_in_progress.store(true, std::memory_order_relaxed);
- fiber = std::move(fiber).resume();
- read_context.is_read_in_progress.store(false, std::memory_order_relaxed);
- }
- };
-
- Fiber operator()(Fiber && sink) const
- {
- try
- {
- while (true)
- {
- read_context.packet = connections.receivePacketUnlocked(ReadCallback{read_context, sink}, false /* is_draining */);
- sink = std::move(sink).resume();
- }
- }
- catch (const boost::context::detail::forced_unwind &)
- {
- /// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
- /// It should not be caught or it will segfault.
- /// Other exceptions must be caught
- throw;
- }
- catch (...)
- {
- read_context.exception = std::current_exception();
- }
-
- return std::move(sink);
- }
-};
-
-namespace ErrorCodes
-{
- extern const int CANNOT_READ_FROM_SOCKET;
- extern const int CANNOT_OPEN_FILE;
- extern const int SOCKET_TIMEOUT;
-}
-
-RemoteQueryExecutorReadContext::RemoteQueryExecutorReadContext(IConnections & connections_)
- : connections(connections_)
-{
-
- if (-1 == pipe2(pipe_fd, O_NONBLOCK))
- throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
-
- {
- epoll.add(pipe_fd[0]);
- }
-
- {
- epoll.add(timer.getDescriptor());
- }
-
- auto routine = RemoteQueryExecutorRoutine{connections, *this};
- fiber = boost::context::fiber(std::allocator_arg_t(), stack, std::move(routine));
-}
-
-void RemoteQueryExecutorReadContext::setConnectionFD(int fd, Poco::Timespan timeout, const std::string & fd_description)
-{
- if (fd == connection_fd)
- return;
-
- if (connection_fd != -1)
- epoll.remove(connection_fd);
-
- connection_fd = fd;
- epoll.add(connection_fd);
-
- receive_timeout_usec = timeout.totalMicroseconds();
- connection_fd_description = fd_description;
-}
-
-bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking)
-{
- try
- {
- return checkTimeoutImpl(blocking);
- }
- catch (DB::Exception & e)
- {
- if (last_used_socket)
- e.addMessage(" while reading from socket ({})", last_used_socket->peerAddress().toString());
- throw;
- }
-}
-
-bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking)
-{
- /// Wait for epoll will not block if it was polled externally.
- epoll_event events[3];
- events[0].data.fd = events[1].data.fd = events[2].data.fd = -1;
-
- int num_events = epoll.getManyReady(3, events, blocking);
-
- bool is_socket_ready = false;
- bool is_pipe_alarmed = false;
-
- for (int i = 0; i < num_events; ++i)
- {
- if (events[i].data.fd == connection_fd)
- is_socket_ready = true;
- if (events[i].data.fd == timer.getDescriptor())
- is_timer_alarmed = true;
- if (events[i].data.fd == pipe_fd[0])
- is_pipe_alarmed = true;
- }
-
- if (is_pipe_alarmed)
- return false;
-
- if (is_timer_alarmed && !is_socket_ready)
- {
- /// Socket receive timeout. Drain it in case of error, or it may be hide by timeout exception.
- timer.drain();
- throw NetException("Timeout exceeded", ErrorCodes::SOCKET_TIMEOUT);
- }
-
- return true;
-}
-
-void RemoteQueryExecutorReadContext::setTimer() const
-{
- /// Did not get packet yet. Init timeout for the next async reading.
- timer.reset();
-
- if (receive_timeout_usec)
- timer.setRelative(receive_timeout_usec);
-}
-
-bool RemoteQueryExecutorReadContext::resumeRoutine()
-{
- if (is_read_in_progress.load(std::memory_order_relaxed) && !checkTimeout())
- return false;
-
- {
- std::lock_guard guard(fiber_lock);
- if (!fiber)
- return false;
-
- fiber = std::move(fiber).resume();
- }
-
- if (exception)
- std::rethrow_exception(std::move(exception));
-
- return true;
-}
-
-void RemoteQueryExecutorReadContext::cancel()
-{
- std::lock_guard guard(fiber_lock);
-
- /// It is safe to just destroy fiber - we are not in the process of reading from socket.
- boost::context::fiber to_destroy = std::move(fiber);
-
- /// One should not try to wait for the current packet here in case of
- /// timeout because this will exceed the timeout.
- /// Anyway if the timeout is exceeded, then the connection will be shutdown
- /// (disconnected), so it will not left in an unsynchronised state.
- if (!is_timer_alarmed)
- {
- /// Wait for current pending packet, to avoid leaving connection in unsynchronised state.
- while (is_read_in_progress.load(std::memory_order_relaxed))
- {
- checkTimeout(/* blocking= */ true);
- to_destroy = std::move(to_destroy).resume();
- }
- }
-
- /// Send something to pipe to cancel executor waiting.
- uint64_t buf = 0;
- while (-1 == write(pipe_fd[1], &buf, sizeof(buf)))
- {
- if (errno == EAGAIN)
- break;
-
- if (errno != EINTR)
- throwFromErrno("Cannot write to pipe", ErrorCodes::CANNOT_READ_FROM_SOCKET);
- }
-}
-
-RemoteQueryExecutorReadContext::~RemoteQueryExecutorReadContext()
-{
- /// connection_fd is closed by Poco::Net::Socket or Epoll
- if (pipe_fd[0] != -1)
- close(pipe_fd[0]);
- if (pipe_fd[1] != -1)
- close(pipe_fd[1]);
-}
-
-}
-#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutorReadContext.h b/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutorReadContext.h
deleted file mode 100644
index a022536dff8..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/DataStreams/RemoteQueryExecutorReadContext.h
+++ /dev/null
@@ -1,83 +0,0 @@
-#pragma once
-
-#if defined(OS_LINUX)
-
-#include <mutex>
-#include <atomic>
-#include <Common/Fiber.h>
-#include <Common/FiberStack.h>
-#include <Common/TimerDescriptor.h>
-#include <Common/Epoll.h>
-#include <Client/Connection.h>
-#include <Client/IConnections.h>
-#include <Poco/Timespan.h>
-
-namespace Poco::Net
-{
-class Socket;
-}
-
-namespace NDB
-{
-
-class MultiplexedConnections;
-
-class RemoteQueryExecutorReadContext
-{
-public:
- std::atomic_bool is_read_in_progress = false;
- Packet packet;
-
- std::exception_ptr exception;
- FiberStack stack;
- boost::context::fiber fiber;
- /// This mutex for fiber is needed because fiber could be destroyed in cancel method from another thread.
- std::mutex fiber_lock;
-
- /// atomic is required due to data-race between setConnectionFD() and setTimer() from the cancellation path.
- std::atomic<uint64_t> receive_timeout_usec = 0;
- IConnections & connections;
- Poco::Net::Socket * last_used_socket = nullptr;
-
- /// Here we have three descriptors we are going to wait:
- /// * connection_fd is a descriptor of connection. It may be changed in case of reading from several replicas.
- /// * timer is a timerfd descriptor to manually check socket timeout
- /// * pipe_fd is a pipe we use to cancel query and socket polling by executor.
- /// We put those descriptors into our own epoll which is used by external executor.
- TimerDescriptor timer{CLOCK_MONOTONIC, 0};
- bool is_timer_alarmed = false;
- int connection_fd = -1;
- int pipe_fd[2] = { -1, -1 };
-
- Epoll epoll;
-
- std::string connection_fd_description;
-
- explicit RemoteQueryExecutorReadContext(IConnections & connections_);
- ~RemoteQueryExecutorReadContext();
-
- bool checkTimeout(bool blocking = false);
- bool checkTimeoutImpl(bool blocking);
-
- void setConnectionFD(int fd, Poco::Timespan timeout = 0, const std::string & fd_description = "");
- void setTimer() const;
-
- bool resumeRoutine();
- void cancel();
-};
-
-}
-
-#else
-
-namespace NDB
-{
-class RemoteQueryExecutorReadContext
-{
-public:
- void cancel() {}
- void setTimer() {}
-};
-
-}
-#endif
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h
deleted file mode 100644
index d4a062ee17b..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskFactory.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#pragma once
-
-#include <Disks/IDisk.h>
-#include <Interpreters/Context_fwd.h>
-#include <common/types.h>
-
-#include <boost/noncopyable.hpp>
-#include <Poco/Util/AbstractConfiguration.h>
-
-#include <functional>
-#include <map>
-#include <unordered_map>
-
-
-namespace NDB
-{
-
-using DisksMap = std::map<String, DiskPtr>;
-/**
- * Disk factory. Responsible for creating new disk objects.
- */
-class DiskFactory final : private boost::noncopyable
-{
-public:
- using Creator = std::function<DiskPtr(
- const String & name,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- ContextPtr context,
- const DisksMap & map)>;
-
- static DiskFactory & instance();
-
- void registerDiskType(const String & disk_type, Creator creator);
-
- DiskPtr create(
- const String & name,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- ContextPtr context,
- const DisksMap & map) const;
-
-private:
- using DiskTypeRegistry = std::unordered_map<String, Creator>;
- DiskTypeRegistry registry;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp
deleted file mode 100644
index c37628da439..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.cpp
+++ /dev/null
@@ -1,442 +0,0 @@
-#include "DiskLocal.h"
-#include <Common/createHardLink.h>
-#include "DiskFactory.h"
-
-#include <Disks/LocalDirectorySyncGuard.h>
-#include <Interpreters/Context.h>
-#include <Common/filesystemHelpers.h>
-#include <Common/quoteString.h>
-#include <IO/createReadBufferFromFileBase.h>
-
-#include <fstream>
-#include <unistd.h>
-
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int UNKNOWN_ELEMENT_IN_CONFIG;
- extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
- extern const int PATH_ACCESS_DENIED;
- extern const int INCORRECT_DISK_INDEX;
- extern const int CANNOT_TRUNCATE_FILE;
- extern const int CANNOT_UNLINK;
- extern const int CANNOT_RMDIR;
-}
-
-std::mutex DiskLocal::reservation_mutex;
-
-
-using DiskLocalPtr = std::shared_ptr<DiskLocal>;
-
-static void loadDiskLocalConfig(const String & name,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- ContextPtr context,
- String & path,
- UInt64 & keep_free_space_bytes)
-{
- path = config.getString(config_prefix + ".path", "");
- if (name == "default")
- {
- if (!path.empty())
- throw Exception(
- "\"default\" disk path should be provided in <path> not it <storage_configuration>",
- ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
- path = context->getPath();
- }
- else
- {
- if (path.empty())
- throw Exception("Disk path can not be empty. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
- if (path.back() != '/')
- throw Exception("Disk path must end with /. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
- }
-
- if (!FS::canRead(path) || !FS::canWrite(path))
- throw Exception("There is no RW access to the disk " + name + " (" + path + ")", ErrorCodes::PATH_ACCESS_DENIED);
-
- bool has_space_ratio = config.has(config_prefix + ".keep_free_space_ratio");
-
- if (config.has(config_prefix + ".keep_free_space_bytes") && has_space_ratio)
- throw Exception(
- "Only one of 'keep_free_space_bytes' and 'keep_free_space_ratio' can be specified",
- ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
-
- keep_free_space_bytes = config.getUInt64(config_prefix + ".keep_free_space_bytes", 0);
-
- if (has_space_ratio)
- {
- auto ratio = config.getDouble(config_prefix + ".keep_free_space_ratio");
- if (ratio < 0 || ratio > 1)
- throw Exception("'keep_free_space_ratio' have to be between 0 and 1", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
- String tmp_path = path;
- if (tmp_path.empty())
- tmp_path = context->getPath();
-
- // Create tmp disk for getting total disk space.
- keep_free_space_bytes = static_cast<UInt64>(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio);
- }
-}
-
-class DiskLocalReservation : public IReservation
-{
-public:
- DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_)
- : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_)
- {
- }
-
- UInt64 getSize() const override { return size; }
-
- DiskPtr getDisk(size_t i) const override;
-
- Disks getDisks() const override { return {disk}; }
-
- void update(UInt64 new_size) override;
-
- ~DiskLocalReservation() override;
-
-private:
- DiskLocalPtr disk;
- UInt64 size;
- CurrentMetrics::Increment metric_increment;
-};
-
-
-class DiskLocalDirectoryIterator : public IDiskDirectoryIterator
-{
-public:
- explicit DiskLocalDirectoryIterator(const String & disk_path_, const String & dir_path_)
- : dir_path(dir_path_), entry(fs::path(disk_path_) / dir_path_)
- {
- }
-
- void next() override { ++entry; }
-
- bool isValid() const override { return entry != fs::directory_iterator(); }
-
- String path() const override
- {
- if (entry->is_directory())
- return dir_path / entry->path().filename() / "";
- else
- return dir_path / entry->path().filename();
- }
-
-
- String name() const override { return entry->path().filename(); }
-
-private:
- fs::path dir_path;
- fs::directory_iterator entry;
-};
-
-
-ReservationPtr DiskLocal::reserve(UInt64 bytes)
-{
- if (!tryReserve(bytes))
- return {};
- return std::make_unique<DiskLocalReservation>(std::static_pointer_cast<DiskLocal>(shared_from_this()), bytes);
-}
-
-bool DiskLocal::tryReserve(UInt64 bytes)
-{
- std::lock_guard lock(DiskLocal::reservation_mutex);
- if (bytes == 0)
- {
- LOG_DEBUG(log, "Reserving 0 bytes on disk {}", backQuote(name));
- ++reservation_count;
- return true;
- }
-
- auto available_space = getAvailableSpace();
- UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes);
- if (unreserved_space >= bytes)
- {
- LOG_DEBUG(log, "Reserving {} on disk {}, having unreserved {}.",
- ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space));
- ++reservation_count;
- reserved_bytes += bytes;
- return true;
- }
- return false;
-}
-
-UInt64 DiskLocal::getTotalSpace() const
-{
- struct statvfs fs;
- if (name == "default") /// for default disk we get space from path/data/
- fs = getStatVFS((fs::path(disk_path) / "data/").string());
- else
- fs = getStatVFS(disk_path);
- UInt64 total_size = fs.f_blocks * fs.f_bsize;
- if (total_size < keep_free_space_bytes)
- return 0;
- return total_size - keep_free_space_bytes;
-}
-
-UInt64 DiskLocal::getAvailableSpace() const
-{
- /// we use f_bavail, because part of b_free space is
- /// available for superuser only and for system purposes
- struct statvfs fs;
- if (name == "default") /// for default disk we get space from path/data/
- fs = getStatVFS((fs::path(disk_path) / "data/").string());
- else
- fs = getStatVFS(disk_path);
- UInt64 total_size = fs.f_bavail * fs.f_bsize;
- if (total_size < keep_free_space_bytes)
- return 0;
- return total_size - keep_free_space_bytes;
-}
-
-UInt64 DiskLocal::getUnreservedSpace() const
-{
- std::lock_guard lock(DiskLocal::reservation_mutex);
- auto available_space = getAvailableSpace();
- available_space -= std::min(available_space, reserved_bytes);
- return available_space;
-}
-
-bool DiskLocal::exists(const String & path) const
-{
- return fs::exists(fs::path(disk_path) / path);
-}
-
-bool DiskLocal::isFile(const String & path) const
-{
- return fs::is_regular_file(fs::path(disk_path) / path);
-}
-
-bool DiskLocal::isDirectory(const String & path) const
-{
- return fs::is_directory(fs::path(disk_path) / path);
-}
-
-size_t DiskLocal::getFileSize(const String & path) const
-{
- return fs::file_size(fs::path(disk_path) / path);
-}
-
-void DiskLocal::createDirectory(const String & path)
-{
- fs::create_directory(fs::path(disk_path) / path);
-}
-
-void DiskLocal::createDirectories(const String & path)
-{
- fs::create_directories(fs::path(disk_path) / path);
-}
-
-void DiskLocal::clearDirectory(const String & path)
-{
- for (const auto & entry : fs::directory_iterator(fs::path(disk_path) / path))
- fs::remove(entry.path());
-}
-
-void DiskLocal::moveDirectory(const String & from_path, const String & to_path)
-{
- fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path);
-}
-
-DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path)
-{
- return std::make_unique<DiskLocalDirectoryIterator>(disk_path, path);
-}
-
-void DiskLocal::moveFile(const String & from_path, const String & to_path)
-{
- fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path);
-}
-
-void DiskLocal::replaceFile(const String & from_path, const String & to_path)
-{
- fs::path from_file = fs::path(disk_path) / from_path;
- fs::path to_file = fs::path(disk_path) / to_path;
- fs::rename(from_file, to_file);
-}
-
-std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, size_t estimated_size) const
-{
- return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, estimated_size);
-}
-
-std::unique_ptr<WriteBufferFromFileBase>
-DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode)
-{
- int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
- return std::make_unique<WriteBufferFromFile>(fs::path(disk_path) / path, buf_size, flags);
-}
-
-void DiskLocal::removeFile(const String & path)
-{
- auto fs_path = fs::path(disk_path) / path;
- if (0 != unlink(fs_path.c_str()))
- throwFromErrnoWithPath("Cannot unlink file " + fs_path.string(), fs_path, ErrorCodes::CANNOT_UNLINK);
-}
-
-void DiskLocal::removeFileIfExists(const String & path)
-{
- auto fs_path = fs::path(disk_path) / path;
- if (0 != unlink(fs_path.c_str()) && errno != ENOENT)
- throwFromErrnoWithPath("Cannot unlink file " + fs_path.string(), fs_path, ErrorCodes::CANNOT_UNLINK);
-}
-
-void DiskLocal::removeDirectory(const String & path)
-{
- auto fs_path = fs::path(disk_path) / path;
- if (0 != rmdir(fs_path.c_str()))
- throwFromErrnoWithPath("Cannot rmdir " + fs_path.string(), fs_path, ErrorCodes::CANNOT_RMDIR);
-}
-
-void DiskLocal::removeRecursive(const String & path)
-{
- fs::remove_all(fs::path(disk_path) / path);
-}
-
-void DiskLocal::listFiles(const String & path, std::vector<String> & file_names)
-{
- file_names.clear();
- for (const auto & entry : fs::directory_iterator(fs::path(disk_path) / path))
- file_names.emplace_back(entry.path().filename());
-}
-
-void DiskLocal::setLastModified(const String & path, const Poco::Timestamp & timestamp)
-{
- FS::setModificationTime(fs::path(disk_path) / path, timestamp.epochTime());
-}
-
-Poco::Timestamp DiskLocal::getLastModified(const String & path)
-{
- return FS::getModificationTimestamp(fs::path(disk_path) / path);
-}
-
-void DiskLocal::createHardLink(const String & src_path, const String & dst_path)
-{
- DB::createHardLink(fs::path(disk_path) / src_path, fs::path(disk_path) / dst_path);
-}
-
-void DiskLocal::truncateFile(const String & path, size_t size)
-{
- int res = truncate((fs::path(disk_path) / path).string().data(), size);
- if (-1 == res)
- throwFromErrnoWithPath("Cannot truncate file " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE);
-}
-
-void DiskLocal::createFile(const String & path)
-{
- FS::createFile(fs::path(disk_path) / path);
-}
-
-void DiskLocal::setReadOnly(const String & path)
-{
- fs::permissions(fs::path(disk_path) / path,
- fs::perms::owner_write | fs::perms::group_write | fs::perms::others_write,
- fs::perm_options::remove);
-}
-
-bool inline isSameDiskType(const IDisk & one, const IDisk & another)
-{
- return typeid(one) == typeid(another);
-}
-
-void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
-{
- if (isSameDiskType(*this, *to_disk))
- {
- fs::path to = fs::path(to_disk->getPath()) / to_path;
- fs::path from = fs::path(disk_path) / from_path;
- if (from_path.ends_with('/'))
- from = from.parent_path();
- if (fs::is_directory(from))
- to /= from.filename();
-
- fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
- }
- else
- copyThroughBuffers(from_path, to_disk, to_path); /// Base implementation.
-}
-
-SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
-{
- return std::make_unique<LocalDirectorySyncGuard>(fs::path(disk_path) / path);
-}
-
-
-void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &)
-{
- String new_disk_path;
- UInt64 new_keep_free_space_bytes;
-
- loadDiskLocalConfig(name, config, config_prefix, context, new_disk_path, new_keep_free_space_bytes);
-
- if (disk_path != new_disk_path)
- throw Exception("Disk path can't be updated from config " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
-
- if (keep_free_space_bytes != new_keep_free_space_bytes)
- keep_free_space_bytes = new_keep_free_space_bytes;
-}
-
-DiskPtr DiskLocalReservation::getDisk(size_t i) const
-{
- if (i != 0)
- {
- throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX);
- }
- return disk;
-}
-
-void DiskLocalReservation::update(UInt64 new_size)
-{
- std::lock_guard lock(DiskLocal::reservation_mutex);
- disk->reserved_bytes -= size;
- size = new_size;
- disk->reserved_bytes += size;
-}
-
-DiskLocalReservation::~DiskLocalReservation()
-{
- try
- {
- std::lock_guard lock(DiskLocal::reservation_mutex);
- if (disk->reserved_bytes < size)
- {
- disk->reserved_bytes = 0;
- LOG_ERROR(disk->log, "Unbalanced reservations size for disk '{}'.", disk->getName());
- }
- else
- {
- disk->reserved_bytes -= size;
- }
-
- if (disk->reservation_count == 0)
- LOG_ERROR(disk->log, "Unbalanced reservation count for disk '{}'.", disk->getName());
- else
- --disk->reservation_count;
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
-}
-
-
-void registerDiskLocal(DiskFactory & factory)
-{
- auto creator = [](const String & name,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- ContextPtr context,
- const DisksMap & /*map*/) -> DiskPtr {
- String path;
- UInt64 keep_free_space_bytes;
- loadDiskLocalConfig(name, config, config_prefix, context, path, keep_free_space_bytes);
- return std::make_shared<DiskLocal>(name, path, keep_free_space_bytes);
- };
- factory.registerDiskType("local", creator);
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h
deleted file mode 100644
index 030b1614228..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskLocal.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#pragma once
-
-#include <common/logger_useful.h>
-#include <Disks/IDisk.h>
-#include <IO/ReadBufferFromFile.h>
-#include <IO/ReadBufferFromFileBase.h>
-#include <IO/WriteBufferFromFile.h>
-#include <Poco/Util/AbstractConfiguration.h>
-
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-class DiskLocalReservation;
-
-class DiskLocal : public IDisk
-{
-public:
- friend class DiskLocalReservation;
-
- DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_)
- : name(name_), disk_path(path_), keep_free_space_bytes(keep_free_space_bytes_)
- {
- if (disk_path.back() != '/')
- throw Exception("Disk path must end with '/', but '" + disk_path + "' doesn't.", ErrorCodes::LOGICAL_ERROR);
- }
-
- const String & getName() const override { return name; }
-
- const String & getPath() const override { return disk_path; }
-
- ReservationPtr reserve(UInt64 bytes) override;
-
- UInt64 getTotalSpace() const override;
-
- UInt64 getAvailableSpace() const override;
-
- UInt64 getUnreservedSpace() const override;
-
- UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; }
-
- bool exists(const String & path) const override;
-
- bool isFile(const String & path) const override;
-
- bool isDirectory(const String & path) const override;
-
- size_t getFileSize(const String & path) const override;
-
- void createDirectory(const String & path) override;
-
- void createDirectories(const String & path) override;
-
- void clearDirectory(const String & path) override;
-
- void moveDirectory(const String & from_path, const String & to_path) override;
-
- DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
-
- void createFile(const String & path) override;
-
- void moveFile(const String & from_path, const String & to_path) override;
-
- void replaceFile(const String & from_path, const String & to_path) override;
-
- void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
-
- void listFiles(const String & path, std::vector<String> & file_names) override;
-
- std::unique_ptr<ReadBufferFromFileBase> readFile(
- const String & path,
- const ReadSettings & settings,
- size_t estimated_size) const override;
-
- std::unique_ptr<WriteBufferFromFileBase> writeFile(
- const String & path,
- size_t buf_size,
- WriteMode mode) override;
-
- void removeFile(const String & path) override;
- void removeFileIfExists(const String & path) override;
- void removeDirectory(const String & path) override;
- void removeRecursive(const String & path) override;
-
- void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
-
- Poco::Timestamp getLastModified(const String & path) override;
-
- void setReadOnly(const String & path) override;
-
- void createHardLink(const String & src_path, const String & dst_path) override;
-
- void truncateFile(const String & path, size_t size) override;
-
- DiskType getType() const override { return DiskType::Local; }
- bool isRemote() const override { return false; }
-
- bool supportZeroCopyReplication() const override { return false; }
-
- SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
-
- void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &) override;
-
-private:
- bool tryReserve(UInt64 bytes);
-
-private:
- const String name;
- const String disk_path;
- std::atomic<UInt64> keep_free_space_bytes;
-
- UInt64 reserved_bytes = 0;
- UInt64 reservation_count = 0;
-
- static std::mutex reservation_mutex;
-
- Poco::Logger * log = &Poco::Logger::get("DiskLocal");
-};
-
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp
deleted file mode 100644
index 66439f41f96..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-#include "DiskLocal.h"
-#include "DiskSelector.h"
-
-#include <IO/WriteHelpers.h>
-#include <Common/escapeForFileName.h>
-#include <Common/quoteString.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <common/logger_useful.h>
-#include <Interpreters/Context.h>
-
-#include <set>
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
- extern const int UNKNOWN_DISK;
-}
-
-DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
-{
- Poco::Util::AbstractConfiguration::Keys keys;
- config.keys(config_prefix, keys);
-
- auto & factory = DiskFactory::instance();
-
- constexpr auto default_disk_name = "default";
- bool has_default_disk = false;
- for (const auto & disk_name : keys)
- {
- if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
- throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
-
- if (disk_name == default_disk_name)
- has_default_disk = true;
-
- auto disk_config_prefix = config_prefix + "." + disk_name;
-
- disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks));
- }
- if (!has_default_disk)
- disks.emplace(default_disk_name, std::make_shared<DiskLocal>(default_disk_name, context->getPath(), 0));
-}
-
-
-DiskSelectorPtr DiskSelector::updateFromConfig(
- const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) const
-{
- Poco::Util::AbstractConfiguration::Keys keys;
- config.keys(config_prefix, keys);
-
- auto & factory = DiskFactory::instance();
-
- std::shared_ptr<DiskSelector> result = std::make_shared<DiskSelector>(*this);
-
- constexpr auto default_disk_name = "default";
- DisksMap old_disks_minus_new_disks (result->getDisksMap());
-
- for (const auto & disk_name : keys)
- {
- if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
- throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
-
- auto disk_config_prefix = config_prefix + "." + disk_name;
- if (result->getDisksMap().count(disk_name) == 0)
- {
- result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap()));
- }
- else
- {
- auto disk = old_disks_minus_new_disks[disk_name];
-
- disk->applyNewSettings(config, context, disk_config_prefix, result->getDisksMap());
-
- old_disks_minus_new_disks.erase(disk_name);
- }
- }
-
- old_disks_minus_new_disks.erase(default_disk_name);
-
- if (!old_disks_minus_new_disks.empty())
- {
- WriteBufferFromOwnString warning;
- if (old_disks_minus_new_disks.size() == 1)
- writeString("Disk ", warning);
- else
- writeString("Disks ", warning);
-
- int index = 0;
- for (const auto & [name, _] : old_disks_minus_new_disks)
- {
- if (index++ > 0)
- writeString(", ", warning);
- writeBackQuotedString(name, warning);
- }
-
- writeString(" disappeared from configuration, this change will be applied after restart of ClickHouse", warning);
- LOG_WARNING(&Poco::Logger::get("DiskSelector"), warning.str());
- }
-
- return result;
-}
-
-
-DiskPtr DiskSelector::get(const String & name) const
-{
- auto it = disks.find(name);
- if (it == disks.end())
- throw Exception("Unknown disk " + name, ErrorCodes::UNKNOWN_DISK);
- return it->second;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h
deleted file mode 100644
index 58b83f1ebe7..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskSelector.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#pragma once
-
-#include <Disks/DiskFactory.h>
-#include <Disks/IDisk.h>
-
-#include <Poco/Util/AbstractConfiguration.h>
-
-#include <map>
-
-namespace NDB
-{
-
-class DiskSelector;
-using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;
-
-/// Parse .xml configuration and store information about disks
-/// Mostly used for introspection.
-class DiskSelector
-{
-public:
- DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
- DiskSelector(const DiskSelector & from) : disks(from.disks) { }
-
- DiskSelectorPtr updateFromConfig(
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- ContextPtr context
- ) const;
-
- /// Get disk by name
- DiskPtr get(const String & name) const;
-
- /// Get all disks with names
- const DisksMap & getDisksMap() const { return disks; }
- void addToDiskMap(const String & name, DiskPtr disk)
- {
- disks.emplace(name, disk);
- }
-
-private:
- DisksMap disks;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskType.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskType.h
deleted file mode 100644
index a3e1c80b44f..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/DiskType.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-
-#include <common/types.h>
-
-namespace NDB
-{
-
-enum class DiskType
-{
- Local,
- RAM,
- S3,
- HDFS,
- Encrypted,
- WebServer,
-};
-
-inline String toString(DiskType disk_type)
-{
- switch (disk_type)
- {
- case DiskType::Local:
- return "local";
- case DiskType::RAM:
- return "memory";
- case DiskType::S3:
- return "s3";
- case DiskType::HDFS:
- return "hdfs";
- case DiskType::Encrypted:
- return "encrypted";
- case DiskType::WebServer:
- return "web";
- }
- __builtin_unreachable();
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/Executor.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/Executor.h
deleted file mode 100644
index c024bdfccd5..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/Executor.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#pragma once
-
-#include <future>
-#include <functional>
-
-namespace NDB
-{
-
-/// Interface to run task asynchronously with possibility to wait for execution.
-class Executor
-{
-public:
- virtual ~Executor() = default;
- virtual std::future<void> execute(std::function<void()> task) = 0;
-};
-
-/// Executes task synchronously in case when disk doesn't support async operations.
-class SyncExecutor : public Executor
-{
-public:
- SyncExecutor() = default;
- std::future<void> execute(std::function<void()> task) override
- {
- auto promise = std::make_shared<std::promise<void>>();
- try
- {
- task();
- promise->set_value();
- }
- catch (...)
- {
- try
- {
- promise->set_exception(std::current_exception());
- }
- catch (...) { }
- }
- return promise->get_future();
- }
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp
deleted file mode 100644
index 91c3299ec9d..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#include "IDisk.h"
-#include "Disks/Executor.h"
-#include <IO/ReadBufferFromFileBase.h>
-#include <IO/WriteBufferFromFileBase.h>
-#include <IO/copyData.h>
-#include <Poco/Logger.h>
-#include <common/logger_useful.h>
-#include <Common/setThreadName.h>
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int NOT_IMPLEMENTED;
-}
-
-bool IDisk::isDirectoryEmpty(const String & path)
-{
- return !iterateDirectory(path)->isValid();
-}
-
-void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, const String & to_path)
-{
- LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.",
- from_disk.getName(), from_disk.getPath(), from_path, to_disk.getName(), to_disk.getPath(), to_path);
-
- auto in = from_disk.readFile(from_path);
- auto out = to_disk.writeFile(to_path);
- copyData(*in, *out);
- out->finalize();
-}
-
-
-using ResultsCollector = std::vector<std::future<void>>;
-
-void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results)
-{
- if (from_disk.isFile(from_path))
- {
- auto result = exec.execute(
- [&from_disk, from_path, &to_disk, to_path]()
- {
- setThreadName("DiskCopier");
- DB::copyFile(from_disk, from_path, to_disk, fs::path(to_path) / fileName(from_path));
- });
-
- results.push_back(std::move(result));
- }
- else
- {
- fs::path dir_name = fs::path(from_path).parent_path().filename();
- fs::path dest(fs::path(to_path) / dir_name);
- to_disk.createDirectories(dest);
-
- for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next())
- asyncCopy(from_disk, it->path(), to_disk, dest, exec, results);
- }
-}
-
-void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
-{
- auto & exec = to_disk->getExecutor();
- ResultsCollector results;
-
- asyncCopy(*this, from_path, *to_disk, to_path, exec, results);
-
- for (auto & result : results)
- result.wait();
- for (auto & result : results)
- result.get();
-}
-
-void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
-{
- copyThroughBuffers(from_path, to_disk, to_path);
-}
-
-void IDisk::truncateFile(const String &, size_t)
-{
- throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate operation is not implemented for disk of type {}", getType());
-}
-
-SyncGuardPtr IDisk::getDirectorySyncGuard(const String & /* path */) const
-{
- return nullptr;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h
deleted file mode 100644
index bdbe186bb65..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IDisk.h
+++ /dev/null
@@ -1,342 +0,0 @@
-#pragma once
-
-#include <Interpreters/Context_fwd.h>
-#include <Interpreters/Context.h>
-#include <Core/Defines.h>
-#include <common/types.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/Exception.h>
-#include <Disks/Executor.h>
-#include <Disks/DiskType.h>
-#include <IO/ReadSettings.h>
-
-#include <memory>
-#include <mutex>
-#include <utility>
-#include <boost/noncopyable.hpp>
-#include <Poco/Timestamp.h>
-#include <filesystem>
-
-
-namespace fs = std::filesystem;
-
-namespace Poco
-{
- namespace Util
- {
- class AbstractConfiguration;
- }
-}
-
-namespace CurrentMetrics
-{
- extern const Metric DiskSpaceReservedForMerge;
-}
-
-namespace NDB
-{
-
-class IDiskDirectoryIterator;
-using DiskDirectoryIteratorPtr = std::unique_ptr<IDiskDirectoryIterator>;
-
-class IReservation;
-using ReservationPtr = std::unique_ptr<IReservation>;
-using Reservations = std::vector<ReservationPtr>;
-
-class ReadBufferFromFileBase;
-class WriteBufferFromFileBase;
-class MMappedFileCache;
-
-/**
- * Mode of opening a file for write.
- */
-enum class WriteMode
-{
- Rewrite,
- Append
-};
-
-/**
- * Provide interface for reservation.
- */
-class Space : public std::enable_shared_from_this<Space>
-{
-public:
- /// Return the name of the space object.
- virtual const String & getName() const = 0;
-
- /// Reserve the specified number of bytes.
- virtual ReservationPtr reserve(UInt64 bytes) = 0;
-
- virtual ~Space() = default;
-};
-
-using SpacePtr = std::shared_ptr<Space>;
-
-/**
- * A guard, that should synchronize file's or directory's state
- * with storage device (e.g. fsync in POSIX) in its destructor.
- */
-class ISyncGuard
-{
-public:
- ISyncGuard() = default;
- virtual ~ISyncGuard() = default;
-};
-
-using SyncGuardPtr = std::unique_ptr<ISyncGuard>;
-
-/**
- * A unit of storage persisting data and metadata.
- * Abstract underlying storage technology.
- * Responsible for:
- * - file management;
- * - space accounting and reservation.
- */
-class IDisk : public Space
-{
-public:
- /// Default constructor.
- explicit IDisk(std::unique_ptr<Executor> executor_ = std::make_unique<SyncExecutor>()) : executor(std::move(executor_)) { }
-
- /// Root path for all files stored on the disk.
- /// It's not required to be a local filesystem path.
- virtual const String & getPath() const = 0;
-
- /// Total available space on the disk.
- virtual UInt64 getTotalSpace() const = 0;
-
- /// Space currently available on the disk.
- virtual UInt64 getAvailableSpace() const = 0;
-
- /// Space available for reservation (available space minus reserved space).
- virtual UInt64 getUnreservedSpace() const = 0;
-
- /// Amount of bytes which should be kept free on the disk.
- virtual UInt64 getKeepingFreeSpace() const { return 0; }
-
- /// Return `true` if the specified file exists.
- virtual bool exists(const String & path) const = 0;
-
- /// Return `true` if the specified file exists and it's a regular file (not a directory or special file type).
- virtual bool isFile(const String & path) const = 0;
-
- /// Return `true` if the specified file exists and it's a directory.
- virtual bool isDirectory(const String & path) const = 0;
-
- /// Return size of the specified file.
- virtual size_t getFileSize(const String & path) const = 0;
-
- /// Create directory.
- virtual void createDirectory(const String & path) = 0;
-
- /// Create directory and all parent directories if necessary.
- virtual void createDirectories(const String & path) = 0;
-
- /// Remove all files from the directory. Directories are not removed.
- virtual void clearDirectory(const String & path) = 0;
-
- /// Move directory from `from_path` to `to_path`.
- virtual void moveDirectory(const String & from_path, const String & to_path) = 0;
-
- /// Return iterator to the contents of the specified directory.
- virtual DiskDirectoryIteratorPtr iterateDirectory(const String & path) = 0;
-
- /// Return `true` if the specified directory is empty.
- bool isDirectoryEmpty(const String & path);
-
- /// Create empty file at `path`.
- virtual void createFile(const String & path) = 0;
-
- /// Move the file from `from_path` to `to_path`.
- /// If a file with `to_path` path already exists, an exception will be thrown .
- virtual void moveFile(const String & from_path, const String & to_path) = 0;
-
- /// Move the file from `from_path` to `to_path`.
- /// If a file with `to_path` path already exists, it will be replaced.
- virtual void replaceFile(const String & from_path, const String & to_path) = 0;
-
- /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`.
- virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
-
- /// List files at `path` and add their names to `file_names`
- virtual void listFiles(const String & path, std::vector<String> & file_names) = 0;
-
- /// Open the file for read and return ReadBufferFromFileBase object.
- virtual std::unique_ptr<ReadBufferFromFileBase> readFile(
- const String & path,
- const ReadSettings & settings = ReadSettings{},
- size_t estimated_size = 0) const = 0;
-
- /// Open the file for write and return WriteBufferFromFileBase object.
- virtual std::unique_ptr<WriteBufferFromFileBase> writeFile(
- const String & path,
- size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
- WriteMode mode = WriteMode::Rewrite) = 0;
-
- /// Remove file. Throws exception if file doesn't exists or it's a directory.
- virtual void removeFile(const String & path) = 0;
-
- /// Remove file if it exists.
- virtual void removeFileIfExists(const String & path) = 0;
-
- /// Remove directory. Throws exception if it's not a directory or if directory is not empty.
- virtual void removeDirectory(const String & path) = 0;
-
- /// Remove file or directory with all children. Use with extra caution. Throws exception if file doesn't exists.
- virtual void removeRecursive(const String & path) = 0;
-
- /// Remove file. Throws exception if file doesn't exists or if directory is not empty.
- /// Differs from removeFile for S3/HDFS disks
- /// Second bool param is a flag to remove (true) or keep (false) shared data on S3
- virtual void removeSharedFile(const String & path, bool) { removeFile(path); }
-
- /// Remove file or directory with all children. Use with extra caution. Throws exception if file doesn't exists.
- /// Differs from removeRecursive for S3/HDFS disks
- /// Second bool param is a flag to remove (true) or keep (false) shared data on S3
- virtual void removeSharedRecursive(const String & path, bool) { removeRecursive(path); }
-
- /// Remove file or directory if it exists.
- /// Differs from removeFileIfExists for S3/HDFS disks
- /// Second bool param is a flag to remove (true) or keep (false) shared data on S3
- virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); }
-
- /// Set last modified time to file or directory at `path`.
- virtual void setLastModified(const String & path, const Poco::Timestamp & timestamp) = 0;
-
- /// Get last modified time of file or directory at `path`.
- virtual Poco::Timestamp getLastModified(const String & path) = 0;
-
- /// Set file at `path` as read-only.
- virtual void setReadOnly(const String & path) = 0;
-
- /// Create hardlink from `src_path` to `dst_path`.
- virtual void createHardLink(const String & src_path, const String & dst_path) = 0;
-
- /// Truncate file to specified size.
- virtual void truncateFile(const String & path, size_t size);
-
- /// Return disk type - "local", "s3", etc.
- virtual DiskType getType() const = 0;
-
- /// Involves network interaction.
- virtual bool isRemote() const = 0;
-
- /// Whether this disk support zero-copy replication.
- /// Overrode in remote fs disks.
- virtual bool supportZeroCopyReplication() const = 0;
-
- virtual bool isReadOnly() const { return false; }
-
- /// Invoked when Global Context is shutdown.
- virtual void shutdown() {}
-
- /// Performs action on disk startup.
- virtual void startup() {}
-
- /// Return some uniq string for file, overrode for IDiskRemote
- /// Required for distinguish different copies of the same part on remote disk
- virtual String getUniqueId(const String & path) const { return path; }
-
- /// Check file exists and ClickHouse has an access to it
- /// Overrode in remote FS disks (s3/hdfs)
- /// Required for remote disk to ensure that replica has access to data written by other node
- virtual bool checkUniqueId(const String & id) const { return exists(id); }
-
- /// Invoked on partitions freeze query.
- virtual void onFreeze(const String &) { }
-
- /// Returns guard, that insures synchronization of directory metadata with storage device.
- virtual SyncGuardPtr getDirectorySyncGuard(const String & path) const;
-
- /// Applies new settings for disk in runtime.
- virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr, const String &, const DisksMap &) {}
-
-protected:
- friend class DiskDecorator;
-
- /// Returns executor to perform asynchronous operations.
- virtual Executor & getExecutor() { return *executor; }
-
- /// Base implementation of the function copy().
- /// It just opens two files, reads data by portions from the first file, and writes it to the second one.
- /// A derived class may override copy() to provide a faster implementation.
- void copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
-
-private:
- std::unique_ptr<Executor> executor;
-};
-
-using DiskPtr = std::shared_ptr<IDisk>;
-using Disks = std::vector<DiskPtr>;
-
-/**
- * Iterator of directory contents on particular disk.
- */
-class IDiskDirectoryIterator
-{
-public:
- /// Iterate to the next file.
- virtual void next() = 0;
-
- /// Return `true` if the iterator points to a valid element.
- virtual bool isValid() const = 0;
-
- /// Path to the file that the iterator currently points to.
- virtual String path() const = 0;
-
- /// Name of the file that the iterator currently points to.
- virtual String name() const = 0;
-
- virtual ~IDiskDirectoryIterator() = default;
-};
-
-/**
- * Information about reserved size on particular disk.
- */
-class IReservation : boost::noncopyable
-{
-public:
- /// Get reservation size.
- virtual UInt64 getSize() const = 0;
-
- /// Get i-th disk where reservation take place.
- virtual DiskPtr getDisk(size_t i = 0) const = 0;
-
- /// Get all disks, used in reservation
- virtual Disks getDisks() const = 0;
-
- /// Changes amount of reserved space.
- virtual void update(UInt64 new_size) = 0;
-
- /// Unreserves reserved space.
- virtual ~IReservation() = default;
-};
-
-/// Return full path to a file on disk.
-inline String fullPath(const DiskPtr & disk, const String & path)
-{
- return fs::path(disk->getPath()) / path;
-}
-
-/// Return parent path for the specified path.
-inline String parentPath(const String & path)
-{
- if (path.ends_with('/'))
- return fs::path(path).parent_path().parent_path() / "";
- return fs::path(path).parent_path() / "";
-}
-
-/// Return file name for the specified path.
-inline String fileName(const String & path)
-{
- return fs::path(path).filename();
-}
-
-/// Return directory path for the specified path.
-inline String directoryPath(const String & path)
-{
- return fs::path(path).parent_path() / "";
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp
deleted file mode 100644
index 1d57f28584c..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#include "IVolume.h"
-
-#include <Common/quoteString.h>
-
-#include <memory>
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int NO_ELEMENTS_IN_CONFIG;
- extern const int INCONSISTENT_RESERVATIONS;
- extern const int NO_RESERVATIONS_PROVIDED;
- extern const int UNKNOWN_VOLUME_TYPE;
-}
-
-String volumeTypeToString(VolumeType type)
-{
- switch (type)
- {
- case VolumeType::JBOD:
- return "JBOD";
- case VolumeType::RAID1:
- return "RAID1";
- case VolumeType::SINGLE_DISK:
- return "SINGLE_DISK";
- case VolumeType::UNKNOWN:
- return "UNKNOWN";
- }
- throw Exception("Unknown volume type, please add it to DB::volumeTypeToString", ErrorCodes::UNKNOWN_VOLUME_TYPE);
-}
-
-IVolume::IVolume(
- String name_,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- DiskSelectorPtr disk_selector)
- : name(std::move(name_))
-{
- Poco::Util::AbstractConfiguration::Keys keys;
- config.keys(config_prefix, keys);
-
- for (const auto & disk : keys)
- {
- if (disk.starts_with("disk"))
- {
- auto disk_name = config.getString(config_prefix + "." + disk);
- disks.push_back(disk_selector->get(disk_name));
- }
- }
-
- if (disks.empty())
- throw Exception("Volume must contain at least one disk", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
-}
-
-UInt64 IVolume::getMaxUnreservedFreeSpace() const
-{
- UInt64 res = 0;
- for (const auto & disk : disks)
- res = std::max(res, disk->getUnreservedSpace());
- return res;
-}
-
-MultiDiskReservation::MultiDiskReservation(Reservations & reservations_, UInt64 size_)
- : reservations(std::move(reservations_))
- , size(size_)
-{
- if (reservations.empty())
- {
- throw Exception("At least one reservation must be provided to MultiDiskReservation", ErrorCodes::NO_RESERVATIONS_PROVIDED);
- }
-
- for (auto & reservation : reservations)
- {
- if (reservation->getSize() != size_)
- {
- throw Exception("Reservations must have same size", ErrorCodes::INCONSISTENT_RESERVATIONS);
- }
- }
-}
-
-Disks MultiDiskReservation::getDisks() const
-{
- Disks res;
- res.reserve(reservations.size());
- for (const auto & reservation : reservations)
- {
- res.push_back(reservation->getDisk());
- }
- return res;
-}
-
-void MultiDiskReservation::update(UInt64 new_size)
-{
- for (auto & reservation : reservations)
- {
- reservation->update(new_size);
- }
- size = new_size;
-}
-
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h
deleted file mode 100644
index f6b7e6147b1..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/IVolume.h
+++ /dev/null
@@ -1,103 +0,0 @@
-#pragma once
-
-#include <Disks/IDisk.h>
-#include <Disks/DiskSelector.h>
-
-#include <Poco/Util/AbstractConfiguration.h>
-
-namespace NDB
-{
-
-enum class VolumeType
-{
- JBOD,
- RAID1,
- SINGLE_DISK,
- UNKNOWN
-};
-
-String volumeTypeToString(VolumeType t);
-
-class IVolume;
-using VolumePtr = std::shared_ptr<IVolume>;
-using Volumes = std::vector<VolumePtr>;
-
-/**
- * Disks group by some (user) criteria. For example,
- * - VolumeJBOD("slow_disks", [d1, d2], 100)
- * - VolumeJBOD("fast_disks", [d3, d4], 200)
- *
- * Here VolumeJBOD is one of implementations of IVolume.
- *
- * Different of implementations of this interface implement different reserve behaviour —
- * VolumeJBOD reserves space on the next disk after the last used, other future implementations
- * will reserve, for example, equal spaces on all disks.
- */
-class IVolume : public Space
-{
-public:
- IVolume(String name_, Disks disks_, size_t max_data_part_size_ = 0, bool perform_ttl_move_on_insert_ = true)
- : disks(std::move(disks_))
- , name(name_)
- , max_data_part_size(max_data_part_size_)
- , perform_ttl_move_on_insert(perform_ttl_move_on_insert_)
- {
- }
-
- IVolume(
- String name_,
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- DiskSelectorPtr disk_selector
- );
-
- virtual ReservationPtr reserve(UInt64 bytes) override = 0;
-
- /// Volume name from config
- const String & getName() const override { return name; }
- virtual VolumeType getType() const = 0;
-
- /// Return biggest unreserved space across all disks
- UInt64 getMaxUnreservedFreeSpace() const;
-
- DiskPtr getDisk() const { return getDisk(0); }
- virtual DiskPtr getDisk(size_t i) const { return disks[i]; }
- const Disks & getDisks() const { return disks; }
-
- /// Returns effective value of whether merges are allowed on this volume (true) or not (false).
- virtual bool areMergesAvoided() const { return false; }
-
- /// User setting for enabling and disabling merges on volume.
- virtual void setAvoidMergesUserOverride(bool /*avoid*/) {}
-
-protected:
- Disks disks;
- const String name;
-
-public:
- /// Max size of reservation, zero means unlimited size
- UInt64 max_data_part_size = 0;
- /// Should a new data part be synchronously moved to a volume according to ttl on insert
- /// or move this part in background task asynchronously after insert.
- bool perform_ttl_move_on_insert = true;
-};
-
-/// Reservation for multiple disks at once. Can be used in RAID1 implementation.
-class MultiDiskReservation : public IReservation
-{
-public:
- MultiDiskReservation(Reservations & reservations, UInt64 size);
-
- UInt64 getSize() const override { return size; }
-
- DiskPtr getDisk(size_t i) const override { return reservations[i]->getDisk(); }
-
- Disks getDisks() const override;
-
- void update(UInt64 new_size) override;
-private:
- Reservations reservations;
- UInt64 size;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/LocalDirectorySyncGuard.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/LocalDirectorySyncGuard.cpp
deleted file mode 100644
index 9cf98c5aa6e..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/LocalDirectorySyncGuard.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <Disks/LocalDirectorySyncGuard.h>
-#include <Common/Exception.h>
-#include <Disks/IDisk.h>
-#include <fcntl.h> // O_RDWR
-
-/// OSX does not have O_DIRECTORY
-#ifndef O_DIRECTORY
-#define O_DIRECTORY O_RDWR
-#endif
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int CANNOT_FSYNC;
- extern const int FILE_DOESNT_EXIST;
- extern const int CANNOT_OPEN_FILE;
- extern const int CANNOT_CLOSE_FILE;
-}
-
-LocalDirectorySyncGuard::LocalDirectorySyncGuard(const String & full_path)
- : fd(::open(full_path.c_str(), O_DIRECTORY))
-{
- if (-1 == fd)
- throwFromErrnoWithPath("Cannot open file " + full_path, full_path,
- errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
-}
-
-LocalDirectorySyncGuard::~LocalDirectorySyncGuard()
-{
- try
- {
-#if defined(OS_DARWIN)
- if (fcntl(fd, F_FULLFSYNC, 0))
- throwFromErrno("Cannot fcntl(F_FULLFSYNC)", ErrorCodes::CANNOT_FSYNC);
-#endif
- if (-1 == ::fsync(fd))
- throw Exception("Cannot fsync", ErrorCodes::CANNOT_FSYNC);
-
- if (-1 == ::close(fd))
- throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/LocalDirectorySyncGuard.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/LocalDirectorySyncGuard.h
deleted file mode 100644
index d740a8b2fef..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/LocalDirectorySyncGuard.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-#include <Disks/IDisk.h>
-
-namespace NDB
-{
-
-class IDisk;
-using DiskPtr = std::shared_ptr<IDisk>;
-
-/// Helper class, that receives file descriptor and does fsync for it in destructor.
-/// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end.
-/// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version.
-/// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496
-class LocalDirectorySyncGuard final : public ISyncGuard
-{
-public:
- /// NOTE: If you have already opened descriptor, it's preferred to use
- /// this constructor instead of constructor with path.
- LocalDirectorySyncGuard(int fd_) : fd(fd_) {}
- LocalDirectorySyncGuard(const String & full_path);
- ~LocalDirectorySyncGuard() override;
-
-private:
- int fd = -1;
-};
-
-}
-
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h
deleted file mode 100644
index 923e6611723..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Disks/SingleDiskVolume.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include <Disks/IVolume.h>
-
-namespace NDB
-{
-
-class SingleDiskVolume : public IVolume
-{
-public:
- SingleDiskVolume(const String & name_, DiskPtr disk, size_t max_data_part_size_ = 0): IVolume(name_, {disk}, max_data_part_size_)
- {
- }
-
- ReservationPtr reserve(UInt64 bytes) override
- {
- return disks[0]->reserve(bytes);
- }
-
- VolumeType getType() const override { return VolumeType::SINGLE_DISK; }
-
-};
-
-using VolumeSingleDiskPtr = std::shared_ptr<SingleDiskVolume>;
-using VolumesSingleDiskPtr = std::vector<VolumeSingleDiskPtr>;
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ConnectionTimeoutsContext.h b/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ConnectionTimeoutsContext.h
deleted file mode 100644
index 06de1de2942..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/IO/ConnectionTimeoutsContext.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-
-#include <IO/ConnectionTimeouts.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <Interpreters/Context.h>
-
-namespace NDB
-{
-
-/// Timeouts for the case when we have just single attempt to connect.
-inline ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithoutFailover(const Settings & settings)
-{
- return ConnectionTimeouts(settings.connect_timeout, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout);
-}
-
-/// Timeouts for the case when we will try many addresses in a loop.
-inline ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithFailover(const Settings & settings)
-{
- return ConnectionTimeouts(
- settings.connect_timeout_with_failover_ms,
- settings.send_timeout,
- settings.receive_timeout,
- settings.tcp_keep_alive_timeout,
- 0,
- settings.connect_timeout_with_failover_secure_ms,
- settings.hedged_connection_timeout_ms,
- settings.receive_data_timeout_ms);
-}
-
-inline ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(ContextPtr context)
-{
- const auto & settings = context->getSettingsRef();
- const auto & config = context->getConfigRef();
- Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", 10), 0};
- return ConnectionTimeouts(settings.http_connection_timeout, settings.http_send_timeout, settings.http_receive_timeout, settings.tcp_keep_alive_timeout, http_keep_alive_timeout);
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h
deleted file mode 100644
index bf3f4d1e929..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/AggregationCommon.h
+++ /dev/null
@@ -1,339 +0,0 @@
-#pragma once
-
-#include <array>
-
-#include <Common/SipHash.h>
-#include <Common/Arena.h>
-#include <Common/HashTable/Hash.h>
-#include <Common/memcpySmall.h>
-#include <Common/assert_cast.h>
-#include <Core/Defines.h>
-#include <common/StringRef.h>
-#include <Columns/IColumn.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnLowCardinality.h>
-
-#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
-#include <tmmintrin.h>
-#endif
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-using Sizes = std::vector<size_t>;
-
-/// When packing the values of nullable columns at a given row, we have to
-/// store the fact that these values are nullable or not. This is achieved
-/// by encoding this information as a bitmap. Let S be the size in bytes of
-/// a packed values binary blob and T the number of bytes we may place into
-/// this blob, the size that the bitmap shall occupy in the blob is equal to:
-/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for
-/// each value of S, the corresponding value of T, and the bitmap size:
-///
-/// 32,28,4
-/// 16,14,2
-/// 8,7,1
-/// 4,3,1
-/// 2,1,1
-///
-
-namespace
-{
-
-template <typename T>
-constexpr auto getBitmapSize()
-{
- return
- (sizeof(T) == 32) ?
- 4 :
- (sizeof(T) == 16) ?
- 2 :
- ((sizeof(T) == 8) ?
- 1 :
- ((sizeof(T) == 4) ?
- 1 :
- ((sizeof(T) == 2) ?
- 1 :
- 0)));
-}
-
-}
-
-template<typename T, size_t step>
-void fillFixedBatch(size_t num_rows, const T * source, T * dest)
-{
- for (size_t i = 0; i < num_rows; ++i)
- {
- *dest = *source;
- ++source;
- dest += step;
- }
-}
-
-/// Move keys of size T into binary blob, starting from offset.
-/// It is assumed that offset is aligned to sizeof(T).
-/// Example: sizeof(key) = 16, sizeof(T) = 4, offset = 8
-/// out[0] : [--------****----]
-/// out[1] : [--------****----]
-/// ...
-template<typename T, typename Key>
-void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, PaddedPODArray<Key> & out, size_t & offset)
-{
- for (size_t i = 0; i < keys_size; ++i)
- {
- if (key_sizes[i] == sizeof(T))
- {
- const auto * column = key_columns[i];
- size_t num_rows = column->size();
- out.resize_fill(num_rows);
-
- /// Note: here we violate strict aliasing.
- /// It should be ok as log as we do not reffer to any value from `out` before filling.
- const char * source = static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<sizeof(T)>();
- T * dest = reinterpret_cast<T *>(reinterpret_cast<char *>(out.data()) + offset);
- fillFixedBatch<T, sizeof(Key) / sizeof(T)>(num_rows, reinterpret_cast<const T *>(source), dest);
- offset += sizeof(T);
- }
- }
-}
-
-/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the
-/// binary blob. Keys are placed starting from the longest one.
-template <typename T>
-void packFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, PaddedPODArray<T> & out)
-{
- size_t offset = 0;
- fillFixedBatch<UInt128>(keys_size, key_columns, key_sizes, out, offset);
- fillFixedBatch<UInt64>(keys_size, key_columns, key_sizes, out, offset);
- fillFixedBatch<UInt32>(keys_size, key_columns, key_sizes, out, offset);
- fillFixedBatch<UInt16>(keys_size, key_columns, key_sizes, out, offset);
- fillFixedBatch<UInt8>(keys_size, key_columns, key_sizes, out, offset);
-}
-
-template <typename T>
-using KeysNullMap = std::array<UInt8, getBitmapSize<T>()>;
-
-/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the
-/// binary blob, they are disposed in it consecutively.
-template <typename T, bool has_low_cardinality = false>
-static inline T ALWAYS_INLINE packFixed(
- size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes,
- const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr,
- const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr)
-{
- T key{};
- char * bytes = reinterpret_cast<char *>(&key);
- size_t offset = 0;
-
- for (size_t j = 0; j < keys_size; ++j)
- {
- size_t index = i;
- const IColumn * column = key_columns[j];
- if constexpr (has_low_cardinality)
- {
- if (const IColumn * positions = (*low_cardinality_positions)[j])
- {
- switch ((*low_cardinality_sizes)[j])
- {
- case sizeof(UInt8): index = assert_cast<const ColumnUInt8 *>(positions)->getElement(i); break;
- case sizeof(UInt16): index = assert_cast<const ColumnUInt16 *>(positions)->getElement(i); break;
- case sizeof(UInt32): index = assert_cast<const ColumnUInt32 *>(positions)->getElement(i); break;
- case sizeof(UInt64): index = assert_cast<const ColumnUInt64 *>(positions)->getElement(i); break;
- default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
- }
- }
- }
-
- switch (key_sizes[j])
- {
- case 1:
- {
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index, 1);
- offset += 1;
- }
- break;
- case 2:
- if constexpr (sizeof(T) >= 2) /// To avoid warning about memcpy exceeding object size.
- {
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<2>() + index * 2, 2);
- offset += 2;
- }
- break;
- case 4:
- if constexpr (sizeof(T) >= 4)
- {
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<4>() + index * 4, 4);
- offset += 4;
- }
- break;
- case 8:
- if constexpr (sizeof(T) >= 8)
- {
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<8>() + index * 8, 8);
- offset += 8;
- }
- break;
- default:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]);
- offset += key_sizes[j];
- }
- }
-
- return key;
-}
-
-/// Similar as above but supports nullable values.
-template <typename T>
-static inline T ALWAYS_INLINE packFixed(
- size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes,
- const KeysNullMap<T> & bitmap)
-{
- union
- {
- T key;
- char bytes[sizeof(key)] = {};
- };
-
- size_t offset = 0;
-
- static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value;
- static constexpr bool has_bitmap = bitmap_size > 0;
-
- if (has_bitmap)
- {
- memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8));
- offset += bitmap_size;
- }
-
- for (size_t j = 0; j < keys_size; ++j)
- {
- bool is_null;
-
- if (!has_bitmap)
- is_null = false;
- else
- {
- size_t bucket = j / 8;
- size_t off = j % 8;
- is_null = ((bitmap[bucket] >> off) & 1) == 1;
- }
-
- if (is_null)
- continue;
-
- switch (key_sizes[j])
- {
- case 1:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i, 1);
- offset += 1;
- break;
- case 2:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<2>() + i * 2, 2);
- offset += 2;
- break;
- case 4:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<4>() + i * 4, 4);
- offset += 4;
- break;
- case 8:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<8>() + i * 8, 8);
- offset += 8;
- break;
- default:
- memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]);
- offset += key_sizes[j];
- }
- }
-
- return key;
-}
-
-
-/// Hash a set of keys into a UInt128 value.
-static inline UInt128 ALWAYS_INLINE hash128(
- size_t i, size_t keys_size, const ColumnRawPtrs & key_columns)
-{
- UInt128 key;
- SipHash hash;
-
- for (size_t j = 0; j < keys_size; ++j)
- key_columns[j]->updateHashWithValue(i, hash);
-
- hash.get128(key);
-
- return key;
-}
-
-
-/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first.
-static inline StringRef * ALWAYS_INLINE placeKeysInPool(
- size_t keys_size, StringRefs & keys, Arena & pool)
-{
- for (size_t j = 0; j < keys_size; ++j)
- {
- char * place = pool.alloc(keys[j].size);
- memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size);
- keys[j].data = place;
- }
-
- /// Place the StringRefs on the newly copied keys in the pool.
- char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef));
- memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef));
-
- return reinterpret_cast<StringRef *>(res);
-}
-
-
-/** Serialize keys into a continuous chunk of memory.
- */
-static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous(
- size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool)
-{
- const char * begin = nullptr;
-
- size_t sum_size = 0;
- for (size_t j = 0; j < keys_size; ++j)
- sum_size += key_columns[j]->serializeValueIntoArena(i, pool, begin).size;
-
- return {begin, sum_size};
-}
-
-
-/** Pack elements with shuffle instruction.
- * See the explanation in ColumnsHashing.h
- */
-#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
-template <typename T>
-static T inline packFixedShuffle(
- const char * __restrict * __restrict srcs,
- size_t num_srcs,
- const size_t * __restrict elem_sizes,
- size_t idx,
- const uint8_t * __restrict masks)
-{
- assert(num_srcs > 0);
-
- __m128i res = _mm_shuffle_epi8(
- _mm_loadu_si128(reinterpret_cast<const __m128i *>(srcs[0] + elem_sizes[0] * idx)),
- _mm_loadu_si128(reinterpret_cast<const __m128i *>(masks)));
-
- for (size_t i = 1; i < num_srcs; ++i)
- {
- res = _mm_xor_si128(res,
- _mm_shuffle_epi8(
- _mm_loadu_si128(reinterpret_cast<const __m128i *>(srcs[i] + elem_sizes[i] * idx)),
- _mm_loadu_si128(reinterpret_cast<const __m128i *>(&masks[i * sizeof(T)]))));
- }
-
- T out;
- __builtin_memcpy(&out, &res, sizeof(T));
- return out;
-}
-#endif
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h
deleted file mode 100644
index fc65a046315..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Aggregator.h
+++ /dev/null
@@ -1,1335 +0,0 @@
-#pragma once
-
-#include <mutex>
-#include <memory>
-#include <functional>
-
-#include <common/logger_useful.h>
-
-#include <common/StringRef.h>
-#include <Common/Arena.h>
-#include <Common/HashTable/FixedHashMap.h>
-#include <Common/HashTable/HashMap.h>
-#include <Common/HashTable/TwoLevelHashMap.h>
-#include <Common/HashTable/StringHashMap.h>
-#include <Common/HashTable/TwoLevelStringHashMap.h>
-
-#include <Common/ThreadPool.h>
-#include <Common/ColumnsHashing.h>
-#include <Common/assert_cast.h>
-#include <Common/filesystemHelpers.h>
-
-#include <DataStreams/IBlockStream_fwd.h>
-#include <DataStreams/SizeLimits.h>
-
-#include <Disks/SingleDiskVolume.h>
-
-#include <Interpreters/AggregateDescription.h>
-#include <Interpreters/AggregationCommon.h>
-//#include <Interpreters/JIT/compileFunction.h>
-
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnAggregateFunction.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnNullable.h>
-#include <Columns/ColumnLowCardinality.h>
-
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
-}
-
-class IBlockOutputStream;
-
-/** Different data structures that can be used for aggregation
- * For efficiency, the aggregation data itself is put into the pool.
- * Data and pool ownership (states of aggregate functions)
- * is acquired later - in `convertToBlocks` function, by the ColumnAggregateFunction object.
- *
- * Most data structures exist in two versions: normal and two-level (TwoLevel).
- * A two-level hash table works a little slower with a small number of different keys,
- * but with a large number of different keys scales better, because it allows
- * parallelize some operations (merging, post-processing) in a natural way.
- *
- * To ensure efficient work over a wide range of conditions,
- * first single-level hash tables are used,
- * and when the number of different keys is large enough,
- * they are converted to two-level ones.
- *
- * PS. There are many different approaches to the effective implementation of parallel and distributed aggregation,
- * best suited for different cases, and this approach is just one of them, chosen for a combination of reasons.
- */
-
-using AggregatedDataWithoutKey = AggregateDataPtr;
-
-using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize<UInt8, AggregateDataPtr>;
-using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap<UInt16, AggregateDataPtr>;
-
-using AggregatedDataWithUInt32Key = HashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>;
-using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
-
-using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>;
-
-using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
-
-using AggregatedDataWithKeys128 = HashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
-using AggregatedDataWithKeys256 = HashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>;
-
-using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>;
-using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
-
-using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap<AggregateDataPtr>;
-
-using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr>;
-
-using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
-using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>;
-
-/** Variants with better hash function, using more than 32 bits for hash.
- * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion,
- * but we keep in memory and merge only sub-partition of them simultaneously.
- * TODO We need to switch for better hash function not only for external aggregation,
- * but also for huge aggregation results on machines with terabytes of RAM.
- */
-
-using AggregatedDataWithUInt64KeyHash64 = HashMap<UInt64, AggregateDataPtr, DefaultHash<UInt64>>;
-using AggregatedDataWithStringKeyHash64 = HashMapWithSavedHash<StringRef, AggregateDataPtr, StringRefHash64>;
-using AggregatedDataWithKeys128Hash64 = HashMap<UInt128, AggregateDataPtr, UInt128Hash>;
-using AggregatedDataWithKeys256Hash64 = HashMap<UInt256, AggregateDataPtr, UInt256Hash>;
-
-template <typename Base>
-struct AggregationDataWithNullKey : public Base
-{
- using Base::Base;
-
- bool & hasNullKeyData() { return has_null_key_data; }
- AggregateDataPtr & getNullKeyData() { return null_key_data; }
- bool hasNullKeyData() const { return has_null_key_data; }
- const AggregateDataPtr & getNullKeyData() const { return null_key_data; }
- size_t size() const { return Base::size() + (has_null_key_data ? 1 : 0); }
- bool empty() const { return Base::empty() && !has_null_key_data; }
- void clear()
- {
- Base::clear();
- has_null_key_data = false;
- }
- void clearAndShrink()
- {
- Base::clearAndShrink();
- has_null_key_data = false;
- }
-
-private:
- bool has_null_key_data = false;
- AggregateDataPtr null_key_data = nullptr;
-};
-
-template <typename Base>
-struct AggregationDataWithNullKeyTwoLevel : public Base
-{
- using Base::impls;
-
- AggregationDataWithNullKeyTwoLevel() = default;
-
- template <typename Other>
- explicit AggregationDataWithNullKeyTwoLevel(const Other & other) : Base(other)
- {
- impls[0].hasNullKeyData() = other.hasNullKeyData();
- impls[0].getNullKeyData() = other.getNullKeyData();
- }
-
- bool & hasNullKeyData() { return impls[0].hasNullKeyData(); }
- AggregateDataPtr & getNullKeyData() { return impls[0].getNullKeyData(); }
- bool hasNullKeyData() const { return impls[0].hasNullKeyData(); }
- const AggregateDataPtr & getNullKeyData() const { return impls[0].getNullKeyData(); }
-};
-
-template <typename ... Types>
-using HashTableWithNullKey = AggregationDataWithNullKey<HashMapTable<Types ...>>;
-template <typename ... Types>
-using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Types ...>>;
-
-using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>;
-using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>;
-
-using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey<AggregatedDataWithUInt64Key>;
-using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<AggregatedDataWithStringKey>;
-
-using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
- TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>,
- TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
-
-using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
- TwoLevelStringHashMap<AggregateDataPtr, HashTableAllocator, StringHashTableWithNullKey>>;
-
-using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
- TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>,
- TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
-
-
-/// For the case where there is one numeric key.
-/// FieldType is UInt8/16/32/64 for any type with corresponding bit width.
-template <typename FieldType, typename TData,
- bool consecutive_keys_optimization = true>
-struct AggregationMethodOneNumber
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
- AggregationMethodOneNumber() = default;
-
- template <typename Other>
- AggregationMethodOneNumber(const Other & other) : data(other.data) {}
-
- /// To use one `Method` in different threads, use different `State`.
- using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type,
- Mapped, FieldType, consecutive_keys_optimization>;
-
- /// Use optimization for low cardinality.
- static const bool low_cardinality_optimization = false;
-
- /// Shuffle key columns before `insertKeyIntoColumns` call if needed.
- std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
-
- // Insert the key from the hash table into columns.
- static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & /*key_sizes*/)
- {
- const auto * key_holder = reinterpret_cast<const char *>(&key);
- auto * column = static_cast<ColumnVectorHelper *>(key_columns[0]);
- column->insertRawData<sizeof(FieldType)>(key_holder);
- }
-};
-
-
-/// For the case where there is one string key.
-template <typename TData>
-struct AggregationMethodString
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
- AggregationMethodString() = default;
-
- template <typename Other>
- AggregationMethodString(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
-
- static const bool low_cardinality_optimization = false;
-
- std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
-
- static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
- static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size);
- }
-};
-
-
-/// Same as above but without cache
-template <typename TData>
-struct AggregationMethodStringNoCache
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
- AggregationMethodStringNoCache() = default;
-
- template <typename Other>
- AggregationMethodStringNoCache(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>;
-
- static const bool low_cardinality_optimization = false;
-
- std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
-
- static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
- static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size);
- }
-};
-
-
-/// For the case where there is one fixed-length string key.
-template <typename TData>
-struct AggregationMethodFixedString
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
- AggregationMethodFixedString() = default;
-
- template <typename Other>
- AggregationMethodFixedString(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped>;
-
- static const bool low_cardinality_optimization = false;
-
- std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
-
- static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
- static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size);
- }
-};
-
-/// Same as above but without cache
-template <typename TData>
-struct AggregationMethodFixedStringNoCache
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
- AggregationMethodFixedStringNoCache() = default;
-
- template <typename Other>
- AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>;
-
- static const bool low_cardinality_optimization = false;
-
- std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
-
- static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
- static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size);
- }
-};
-
-
-/// Single low cardinality column.
-template <typename SingleColumnMethod>
-struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod
-{
- using Base = SingleColumnMethod;
- using BaseState = typename Base::State;
-
- using Data = typename Base::Data;
- using Key = typename Base::Key;
- using Mapped = typename Base::Mapped;
-
- using Base::data;
-
- AggregationMethodSingleLowCardinalityColumn() = default;
-
- template <typename Other>
- explicit AggregationMethodSingleLowCardinalityColumn(const Other & other) : Base(other) {}
-
- using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn<BaseState, Mapped, true>;
-
- static const bool low_cardinality_optimization = true;
-
- std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
-
- static void insertKeyIntoColumns(const Key & key,
- std::vector<IColumn *> & key_columns_low_cardinality, const Sizes & /*key_sizes*/)
- {
- auto * col = assert_cast<ColumnLowCardinality *>(key_columns_low_cardinality[0]);
-
- if constexpr (std::is_same_v<Key, StringRef>)
- {
- col->insertData(key.data, key.size);
- }
- else
- {
- col->insertData(reinterpret_cast<const char *>(&key), sizeof(key));
- }
- }
-};
-
-
-/// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits.
-template <typename TData, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
-struct AggregationMethodKeysFixed
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
- static constexpr bool has_nullable_keys = has_nullable_keys_;
- static constexpr bool has_low_cardinality = has_low_cardinality_;
-
- Data data;
-
- AggregationMethodKeysFixed() = default;
-
- template <typename Other>
- AggregationMethodKeysFixed(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodKeysFixed<
- typename Data::value_type,
- Key,
- Mapped,
- has_nullable_keys,
- has_low_cardinality,
- use_cache>;
-
- static const bool low_cardinality_optimization = false;
-
- std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> & key_columns, const Sizes & key_sizes)
- {
- return State::shuffleKeyColumns(key_columns, key_sizes);
- }
-
- static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & key_sizes)
- {
- size_t keys_size = key_columns.size();
-
- static constexpr auto bitmap_size = has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 0;
- /// In any hash key value, column values to be read start just after the bitmap, if it exists.
- size_t pos = bitmap_size;
-
- for (size_t i = 0; i < keys_size; ++i)
- {
- IColumn * observed_column;
- ColumnUInt8 * null_map;
-
- bool column_nullable = false;
- if constexpr (has_nullable_keys)
- column_nullable = isColumnNullable(*key_columns[i]);
-
- /// If we have a nullable column, get its nested column and its null map.
- if (column_nullable)
- {
- ColumnNullable & nullable_col = assert_cast<ColumnNullable &>(*key_columns[i]);
- observed_column = &nullable_col.getNestedColumn();
- null_map = assert_cast<ColumnUInt8 *>(&nullable_col.getNullMapColumn());
- }
- else
- {
- observed_column = key_columns[i];
- null_map = nullptr;
- }
-
- bool is_null = false;
- if (column_nullable)
- {
- /// The current column is nullable. Check if the value of the
- /// corresponding key is nullable. Update the null map accordingly.
- size_t bucket = i / 8;
- size_t offset = i % 8;
- UInt8 val = (reinterpret_cast<const UInt8 *>(&key)[bucket] >> offset) & 1;
- null_map->insertValue(val);
- is_null = val == 1;
- }
-
- if (has_nullable_keys && is_null)
- observed_column->insertDefault();
- else
- {
- size_t size = key_sizes[i];
- observed_column->insertData(reinterpret_cast<const char *>(&key) + pos, size);
- pos += size;
- }
- }
- }
-};
-
-
-/** Aggregates by concatenating serialized key values.
- * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
- * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
- * Therefore, when aggregating by several strings, there is no ambiguity.
- */
-template <typename TData>
-struct AggregationMethodSerialized
-{
- using Data = TData;
- using Key = typename Data::key_type;
- using Mapped = typename Data::mapped_type;
-
- Data data;
-
- AggregationMethodSerialized() = default;
-
- template <typename Other>
- AggregationMethodSerialized(const Other & other) : data(other.data) {}
-
- using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>;
-
- static const bool low_cardinality_optimization = false;
-
- std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
-
- static void insertKeyIntoColumns(const StringRef & key, std::vector<IColumn *> & key_columns, const Sizes &)
- {
- const auto * pos = key.data;
- for (auto & column : key_columns)
- pos = column->deserializeAndInsertFromArena(pos);
- }
-};
-
-
-class Aggregator;
-
-using ColumnsHashing::HashMethodContext;
-using ColumnsHashing::HashMethodContextPtr;
-
-struct AggregatedDataVariants : private boost::noncopyable
-{
- /** Working with states of aggregate functions in the pool is arranged in the following (inconvenient) way:
- * - when aggregating, states are created in the pool using IAggregateFunction::create (inside - `placement new` of arbitrary structure);
- * - they must then be destroyed using IAggregateFunction::destroy (inside - calling the destructor of arbitrary structure);
- * - if aggregation is complete, then, in the Aggregator::convertToBlocks function, pointers to the states of aggregate functions
- * are written to ColumnAggregateFunction; ColumnAggregateFunction "acquires ownership" of them, that is - calls `destroy` in its destructor.
- * - if during the aggregation, before call to Aggregator::convertToBlocks, an exception was thrown,
- * then the states of aggregate functions must still be destroyed,
- * otherwise, for complex states (eg, AggregateFunctionUniq), there will be memory leaks;
- * - in this case, to destroy states, the destructor calls Aggregator::destroyAggregateStates method,
- * but only if the variable aggregator (see below) is not nullptr;
- * - that is, until you transfer ownership of the aggregate function states in the ColumnAggregateFunction, set the variable `aggregator`,
- * so that when an exception occurs, the states are correctly destroyed.
- *
- * PS. This can be corrected by making a pool that knows about which states of aggregate functions and in which order are put in it, and knows how to destroy them.
- * But this can hardly be done simply because it is planned to put variable-length strings into the same pool.
- * In this case, the pool will not be able to know with what offsets objects are stored.
- */
- const Aggregator * aggregator = nullptr;
-
- size_t keys_size{}; /// Number of keys. NOTE do we need this field?
- Sizes key_sizes; /// Dimensions of keys, if keys of fixed length
-
- /// Pools for states of aggregate functions. Ownership will be later transferred to ColumnAggregateFunction.
- Arenas aggregates_pools;
- Arena * aggregates_pool{}; /// The pool that is currently used for allocation.
-
- /** Specialization for the case when there are no keys, and for keys not fitted into max_rows_to_group_by.
- */
- AggregatedDataWithoutKey without_key = nullptr;
-
- // Disable consecutive key optimization for Uint8/16, because they use a FixedHashMap
- // and the lookup there is almost free, so we don't need to cache the last lookup result
- std::unique_ptr<AggregationMethodOneNumber<UInt8, AggregatedDataWithUInt8Key, false>> key8;
- std::unique_ptr<AggregationMethodOneNumber<UInt16, AggregatedDataWithUInt16Key, false>> key16;
-
- std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64Key>> key32;
- std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>> key64;
- std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKey>> key_string;
- std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKey>> key_fixed_string;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt16Key, false, false, false>> keys16;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32Key>> keys32;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64Key>> keys64;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128>> keys128;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256>> keys256;
- std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKey>> serialized;
-
- std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64KeyTwoLevel>> key32_two_level;
- std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyTwoLevel>> key64_two_level;
- std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_string_two_level;
- std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_fixed_string_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt32KeyTwoLevel>> keys32_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithUInt64KeyTwoLevel>> keys64_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel>> keys128_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel>> keys256_two_level;
- std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level;
-
- std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyHash64>> key64_hash64;
- std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKeyHash64>> key_string_hash64;
- std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKeyHash64>> key_fixed_string_hash64;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128Hash64>> keys128_hash64;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256Hash64>> keys256_hash64;
- std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyHash64>> serialized_hash64;
-
- /// Support for nullable keys.
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level;
-
- /// Support for low cardinality.
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false>>> low_cardinality_key8;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false>>> low_cardinality_key16;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key32;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key>>> low_cardinality_key64;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_string;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKey>>> low_cardinality_key_fixed_string;
-
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key32_two_level;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyTwoLevel>>> low_cardinality_key64_two_level;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_string_two_level;
- std::unique_ptr<AggregationMethodSingleLowCardinalityColumn<AggregationMethodFixedString<AggregatedDataWithNullableStringKeyTwoLevel>>> low_cardinality_key_fixed_string_two_level;
-
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, false, true>> low_cardinality_keys128;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, false, true>> low_cardinality_keys256;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, false, true>> low_cardinality_keys128_two_level;
- std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, false, true>> low_cardinality_keys256_two_level;
-
- /// In this and similar macros, the option without_key is not considered.
- #define APPLY_FOR_AGGREGATED_VARIANTS(M) \
- M(key8, false) \
- M(key16, false) \
- M(key32, false) \
- M(key64, false) \
- M(key_string, false) \
- M(key_fixed_string, false) \
- M(keys16, false) \
- M(keys32, false) \
- M(keys64, false) \
- M(keys128, false) \
- M(keys256, false) \
- M(serialized, false) \
- M(key32_two_level, true) \
- M(key64_two_level, true) \
- M(key_string_two_level, true) \
- M(key_fixed_string_two_level, true) \
- M(keys32_two_level, true) \
- M(keys64_two_level, true) \
- M(keys128_two_level, true) \
- M(keys256_two_level, true) \
- M(serialized_two_level, true) \
- M(key64_hash64, false) \
- M(key_string_hash64, false) \
- M(key_fixed_string_hash64, false) \
- M(keys128_hash64, false) \
- M(keys256_hash64, false) \
- M(serialized_hash64, false) \
- M(nullable_keys128, false) \
- M(nullable_keys256, false) \
- M(nullable_keys128_two_level, true) \
- M(nullable_keys256_two_level, true) \
- M(low_cardinality_key8, false) \
- M(low_cardinality_key16, false) \
- M(low_cardinality_key32, false) \
- M(low_cardinality_key64, false) \
- M(low_cardinality_keys128, false) \
- M(low_cardinality_keys256, false) \
- M(low_cardinality_key_string, false) \
- M(low_cardinality_key_fixed_string, false) \
- M(low_cardinality_key32_two_level, true) \
- M(low_cardinality_key64_two_level, true) \
- M(low_cardinality_keys128_two_level, true) \
- M(low_cardinality_keys256_two_level, true) \
- M(low_cardinality_key_string_two_level, true) \
- M(low_cardinality_key_fixed_string_two_level, true) \
-
- enum class Type
- {
- EMPTY = 0,
- without_key,
-
- #define M(NAME, IS_TWO_LEVEL) NAME,
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- };
- Type type = Type::EMPTY;
-
- AggregatedDataVariants() : aggregates_pools(1, std::make_shared<Arena>()), aggregates_pool(aggregates_pools.back().get()) {}
- bool empty() const { return type == Type::EMPTY; }
- void invalidate() { type = Type::EMPTY; }
-
- ~AggregatedDataVariants();
-
- void init(Type type_)
- {
- switch (type_)
- {
- case Type::EMPTY: break;
- case Type::without_key: break;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: NAME = std::make_unique<decltype(NAME)::element_type>(); break;
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- type = type_;
- }
-
- /// Number of rows (different keys).
- size_t size() const
- {
- switch (type)
- {
- case Type::EMPTY: return 0;
- case Type::without_key: return 1;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: return NAME->data.size() + (without_key != nullptr);
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- __builtin_unreachable();
- }
-
- /// The size without taking into account the row in which data is written for the calculation of TOTALS.
- size_t sizeWithoutOverflowRow() const
- {
- switch (type)
- {
- case Type::EMPTY: return 0;
- case Type::without_key: return 1;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: return NAME->data.size();
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- __builtin_unreachable();
- }
-
- const char * getMethodName() const
- {
- switch (type)
- {
- case Type::EMPTY: return "EMPTY";
- case Type::without_key: return "without_key";
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: return #NAME;
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- __builtin_unreachable();
- }
-
- bool isTwoLevel() const
- {
- switch (type)
- {
- case Type::EMPTY: return false;
- case Type::without_key: return false;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: return IS_TWO_LEVEL;
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
- }
-
- __builtin_unreachable();
- }
-
- #define APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \
- M(key32) \
- M(key64) \
- M(key_string) \
- M(key_fixed_string) \
- M(keys32) \
- M(keys64) \
- M(keys128) \
- M(keys256) \
- M(serialized) \
- M(nullable_keys128) \
- M(nullable_keys256) \
- M(low_cardinality_key32) \
- M(low_cardinality_key64) \
- M(low_cardinality_keys128) \
- M(low_cardinality_keys256) \
- M(low_cardinality_key_string) \
- M(low_cardinality_key_fixed_string) \
-
- #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
- M(key8) \
- M(key16) \
- M(keys16) \
- M(key64_hash64) \
- M(key_string_hash64)\
- M(key_fixed_string_hash64) \
- M(keys128_hash64) \
- M(keys256_hash64) \
- M(serialized_hash64) \
- M(low_cardinality_key8) \
- M(low_cardinality_key16) \
-
- #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \
- APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
- APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \
-
- bool isConvertibleToTwoLevel() const
- {
- switch (type)
- {
- #define M(NAME) \
- case Type::NAME: return true;
-
- APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M)
-
- #undef M
- default:
- return false;
- }
- }
-
- void convertToTwoLevel();
-
- #define APPLY_FOR_VARIANTS_TWO_LEVEL(M) \
- M(key32_two_level) \
- M(key64_two_level) \
- M(key_string_two_level) \
- M(key_fixed_string_two_level) \
- M(keys32_two_level) \
- M(keys64_two_level) \
- M(keys128_two_level) \
- M(keys256_two_level) \
- M(serialized_two_level) \
- M(nullable_keys128_two_level) \
- M(nullable_keys256_two_level) \
- M(low_cardinality_key32_two_level) \
- M(low_cardinality_key64_two_level) \
- M(low_cardinality_keys128_two_level) \
- M(low_cardinality_keys256_two_level) \
- M(low_cardinality_key_string_two_level) \
- M(low_cardinality_key_fixed_string_two_level) \
-
- #define APPLY_FOR_LOW_CARDINALITY_VARIANTS(M) \
- M(low_cardinality_key8) \
- M(low_cardinality_key16) \
- M(low_cardinality_key32) \
- M(low_cardinality_key64) \
- M(low_cardinality_keys128) \
- M(low_cardinality_keys256) \
- M(low_cardinality_key_string) \
- M(low_cardinality_key_fixed_string) \
- M(low_cardinality_key32_two_level) \
- M(low_cardinality_key64_two_level) \
- M(low_cardinality_keys128_two_level) \
- M(low_cardinality_keys256_two_level) \
- M(low_cardinality_key_string_two_level) \
- M(low_cardinality_key_fixed_string_two_level)
-
- bool isLowCardinality() const
- {
- switch (type)
- {
- #define M(NAME) \
- case Type::NAME: return true;
-
- APPLY_FOR_LOW_CARDINALITY_VARIANTS(M)
- #undef M
- default:
- return false;
- }
- }
-
- static HashMethodContextPtr createCache(Type type, const HashMethodContext::Settings & settings)
- {
- switch (type)
- {
- case Type::without_key: return nullptr;
-
- #define M(NAME, IS_TWO_LEVEL) \
- case Type::NAME: \
- { \
- using TPtr ## NAME = decltype(AggregatedDataVariants::NAME); \
- using T ## NAME = typename TPtr ## NAME ::element_type; \
- return T ## NAME ::State::createContext(settings); \
- }
-
- APPLY_FOR_AGGREGATED_VARIANTS(M)
- #undef M
-
- default:
- throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
- }
- }
-};
-
-using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;
-using ManyAggregatedDataVariants = std::vector<AggregatedDataVariantsPtr>;
-using ManyAggregatedDataVariantsPtr = std::shared_ptr<ManyAggregatedDataVariants>;
-
-class CompiledAggregateFunctionsHolder;
-
-/** How are "total" values calculated with WITH TOTALS?
- * (For more details, see TotalsHavingTransform.)
- *
- * In the absence of group_by_overflow_mode = 'any', the data is aggregated as usual, but the states of the aggregate functions are not finalized.
- * Later, the aggregate function states for all rows (passed through HAVING) are merged into one - this will be TOTALS.
- *
- * If there is group_by_overflow_mode = 'any', the data is aggregated as usual, except for the keys that did not fit in max_rows_to_group_by.
- * For these keys, the data is aggregated into one additional row - see below under the names `overflow_row`, `overflows`...
- * Later, the aggregate function states for all rows (passed through HAVING) are merged into one,
- * also overflow_row is added or not added (depending on the totals_mode setting) also - this will be TOTALS.
- */
-
-
-/** Aggregates the source of the blocks.
- */
-class Aggregator final
-{
-public:
- struct Params
- {
- /// Data structure of source blocks.
- Block src_header;
- /// Data structure of intermediate blocks before merge.
- Block intermediate_header;
-
- /// What to count.
- const ColumnNumbers keys;
- const AggregateDescriptions aggregates;
- const size_t keys_size;
- const size_t aggregates_size;
-
- /// The settings of approximate calculation of GROUP BY.
- const bool overflow_row; /// Do we need to put into AggregatedDataVariants::without_key aggregates for keys that are not in max_rows_to_group_by.
- const size_t max_rows_to_group_by;
- const OverflowMode group_by_overflow_mode;
-
- /// Two-level aggregation settings (used for a large number of keys).
- /** With how many keys or the size of the aggregation state in bytes,
- * two-level aggregation begins to be used. Enough to reach of at least one of the thresholds.
- * 0 - the corresponding threshold is not specified.
- */
- size_t group_by_two_level_threshold;
- size_t group_by_two_level_threshold_bytes;
-
- /// Settings to flush temporary data to the filesystem (external aggregation).
- const size_t max_bytes_before_external_group_by; /// 0 - do not use external aggregation.
-
- /// Return empty result when aggregating without keys on empty set.
- bool empty_result_for_aggregation_by_empty_set;
-
- VolumePtr tmp_volume;
-
- /// Settings is used to determine cache size. No threads are created.
- size_t max_threads;
-
- const size_t min_free_disk_space;
-
- bool compile_aggregate_expressions;
- size_t min_count_to_compile_aggregate_expression;
-
- Params(
- const Block & src_header_,
- const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_,
- bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
- size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_,
- size_t max_bytes_before_external_group_by_,
- bool empty_result_for_aggregation_by_empty_set_,
- VolumePtr tmp_volume_, size_t max_threads_,
- size_t min_free_disk_space_,
- bool compile_aggregate_expressions_,
- size_t min_count_to_compile_aggregate_expression_,
- const Block & intermediate_header_ = {})
- : src_header(src_header_),
- intermediate_header(intermediate_header_),
- keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()),
- overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_),
- group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_),
- max_bytes_before_external_group_by(max_bytes_before_external_group_by_),
- empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_),
- tmp_volume(tmp_volume_), max_threads(max_threads_),
- min_free_disk_space(min_free_disk_space_),
- compile_aggregate_expressions(compile_aggregate_expressions_),
- min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
- {
- }
-
- /// Only parameters that matter during merge.
- Params(const Block & intermediate_header_,
- const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_)
- : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0, false, 0)
- {
- intermediate_header = intermediate_header_;
- }
-
- static Block getHeader(
- const Block & src_header,
- const Block & intermediate_header,
- const ColumnNumbers & keys,
- const AggregateDescriptions & aggregates,
- bool final);
-
- Block getHeader(bool final) const
- {
- return getHeader(src_header, intermediate_header, keys, aggregates, final);
- }
-
- /// Returns keys and aggregated for EXPLAIN query
- void explain(WriteBuffer & out, size_t indent) const;
- void explain(JSONBuilder::JSONMap & map) const;
- };
-
- explicit Aggregator(const Params & params_);
-
- using AggregateColumns = std::vector<ColumnRawPtrs>;
- using AggregateColumnsData = std::vector<ColumnAggregateFunction::Container *>;
- using AggregateColumnsConstData = std::vector<const ColumnAggregateFunction::Container *>;
- using AggregateFunctionsPlainPtrs = std::vector<const IAggregateFunction *>;
-
- /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break').
- bool executeOnBlock(const Block & block, AggregatedDataVariants & result,
- ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block
- bool & no_more_keys) const;
-
- bool executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result,
- ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block
- bool & no_more_keys) const;
-
- /// Used for aggregate projection.
- bool mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const;
-
- /** Convert the aggregation data structure into a block.
- * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block.
- *
- * If final = false, then ColumnAggregateFunction is created as the aggregation columns with the state of the calculations,
- * which can then be combined with other states (for distributed query processing).
- * If final = true, then columns with ready values are created as aggregate columns.
- */
- BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const;
-
- ManyAggregatedDataVariants prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const;
-
- using BucketToBlocks = std::map<Int32, BlocksList>;
- /// Merge partially aggregated blocks separated to buckets into one data structure.
- void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads);
-
- /// Merge several partially aggregated blocks into one.
- /// Precondition: for all blocks block.info.is_overflows flag must be the same.
- /// (either all blocks are from overflow data or none blocks are).
- /// The resulting block has the same value of is_overflows flag.
- Block mergeBlocks(BlocksList & blocks, bool final);
-
- /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used.
- * This is needed to simplify merging of that data with other results, that are already two-level.
- */
- std::vector<Block> convertBlockToTwoLevel(const Block & block) const;
-
- /// For external aggregation.
- void writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path) const;
- void writeToTemporaryFile(AggregatedDataVariants & data_variants) const;
-
- bool hasTemporaryFiles() const { return !temporary_files.empty(); }
-
- struct TemporaryFiles
- {
- std::vector<std::unique_ptr<Poco::TemporaryFile>> files;
- size_t sum_size_uncompressed = 0;
- size_t sum_size_compressed = 0;
- mutable std::mutex mutex;
-
- bool empty() const
- {
- std::lock_guard lock(mutex);
- return files.empty();
- }
- };
-
- const TemporaryFiles & getTemporaryFiles() const { return temporary_files; }
-
- /// Get data structure of the result.
- Block getHeader(bool final) const;
-
-private:
-
- friend struct AggregatedDataVariants;
- friend class ConvertingAggregatedToChunksTransform;
- friend class ConvertingAggregatedToChunksSource;
- friend class AggregatingInOrderTransform;
-
- Params params;
-
- AggregatedDataVariants::Type method_chosen;
- Sizes key_sizes;
-
- HashMethodContextPtr aggregation_state_cache;
-
- AggregateFunctionsPlainPtrs aggregate_functions;
-
- /** This array serves two purposes.
- *
- * Function arguments are collected side by side, and they do not need to be collected from different places. Also the array is made zero-terminated.
- * The inner loop (for the case without_key) is almost twice as compact; performance gain of about 30%.
- */
- struct AggregateFunctionInstruction
- {
- const IAggregateFunction * that{};
- size_t state_offset{};
- const IColumn ** arguments{};
- const IAggregateFunction * batch_that{};
- const IColumn ** batch_arguments{};
- const UInt64 * offsets{};
- };
-
- using AggregateFunctionInstructions = std::vector<AggregateFunctionInstruction>;
- using NestedColumnsHolder = std::vector<std::vector<const IColumn *>>;
-
- Sizes offsets_of_aggregate_states; /// The offset to the n-th aggregate function in a row of aggregate functions.
- size_t total_size_of_aggregate_states = 0; /// The total size of the row from the aggregate functions.
-
- // add info to track alignment requirement
- // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
- size_t align_aggregate_states = 1;
-
- bool all_aggregates_has_trivial_destructor = false;
-
- /// How many RAM were used to process the query before processing the first block.
- Int64 memory_usage_before_aggregation = 0;
-
- Poco::Logger * log = &Poco::Logger::get("Aggregator");
-
- /// For external aggregation.
- mutable TemporaryFiles temporary_files;
-
-#if USE_EMBEDDED_COMPILER
- std::shared_ptr<CompiledAggregateFunctionsHolder> compiled_aggregate_functions_holder;
-#endif
-
- std::vector<bool> is_aggregate_function_compiled;
-
- /** Try to compile aggregate functions.
- */
- void compileAggregateFunctionsIfNeeded();
-
- /** Select the aggregation method based on the number and types of keys. */
- AggregatedDataVariants::Type chooseAggregationMethod();
-
- /** Create states of aggregate functions for one key.
- */
- template <bool skip_compiled_aggregate_functions = false>
- void createAggregateStates(AggregateDataPtr & aggregate_data) const;
-
- /** Call `destroy` methods for states of aggregate functions.
- * Used in the exception handler for aggregation, since RAII in this case is not applicable.
- */
- void destroyAllAggregateStates(AggregatedDataVariants & result) const;
-
-
- /// Process one data block, aggregate the data into a hash table.
- template <typename Method>
- void executeImpl(
- Method & method,
- Arena * aggregates_pool,
- size_t rows,
- ColumnRawPtrs & key_columns,
- AggregateFunctionInstruction * aggregate_instructions,
- bool no_more_keys,
- AggregateDataPtr overflow_row) const;
-
- /// Specialization for a particular value no_more_keys.
- template <bool no_more_keys, bool use_compiled_functions, typename Method>
- void executeImplBatch(
- Method & method,
- typename Method::State & state,
- Arena * aggregates_pool,
- size_t rows,
- AggregateFunctionInstruction * aggregate_instructions,
- AggregateDataPtr overflow_row) const;
-
- /// For case when there are no keys (all aggregate into one row).
- template <bool use_compiled_functions>
- void executeWithoutKeyImpl(
- AggregatedDataWithoutKey & res,
- size_t rows,
- AggregateFunctionInstruction * aggregate_instructions,
- Arena * arena) const;
-
- static void executeOnIntervalWithoutKeyImpl(
- AggregatedDataWithoutKey & res,
- size_t row_begin,
- size_t row_end,
- AggregateFunctionInstruction * aggregate_instructions,
- Arena * arena);
-
- template <typename Method>
- void writeToTemporaryFileImpl(
- AggregatedDataVariants & data_variants,
- Method & method,
- IBlockOutputStream & out) const;
-
- /// Merge NULL key data from hash table `src` into `dst`.
- template <typename Method, typename Table>
- void mergeDataNullKey(
- Table & table_dst,
- Table & table_src,
- Arena * arena) const;
-
- /// Merge data from hash table `src` into `dst`.
- template <typename Method, bool use_compiled_functions, typename Table>
- void mergeDataImpl(
- Table & table_dst,
- Table & table_src,
- Arena * arena) const;
-
- /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`.
- template <typename Method, typename Table>
- void mergeDataNoMoreKeysImpl(
- Table & table_dst,
- AggregatedDataWithoutKey & overflows,
- Table & table_src,
- Arena * arena) const;
-
- /// Same, but ignores the rest of the keys.
- template <typename Method, typename Table>
- void mergeDataOnlyExistingKeysImpl(
- Table & table_dst,
- Table & table_src,
- Arena * arena) const;
-
- void mergeWithoutKeyDataImpl(
- ManyAggregatedDataVariants & non_empty_data) const;
-
- template <typename Method>
- void mergeSingleLevelDataImpl(
- ManyAggregatedDataVariants & non_empty_data) const;
-
- template <typename Method, typename Table>
- void convertToBlockImpl(
- Method & method,
- Table & data,
- MutableColumns & key_columns,
- AggregateColumnsData & aggregate_columns,
- MutableColumns & final_aggregate_columns,
- Arena * arena,
- bool final) const;
-
- template <typename Mapped>
- void insertAggregatesIntoColumns(
- Mapped & mapped,
- MutableColumns & final_aggregate_columns,
- Arena * arena) const;
-
- template <typename Method, bool use_compiled_functions, typename Table>
- void convertToBlockImplFinal(
- Method & method,
- Table & data,
- std::vector<IColumn *> key_columns,
- MutableColumns & final_aggregate_columns,
- Arena * arena) const;
-
- template <typename Method, typename Table>
- void convertToBlockImplNotFinal(
- Method & method,
- Table & data,
- std::vector<IColumn *> key_columns,
- AggregateColumnsData & aggregate_columns) const;
-
- template <typename Filler>
- Block prepareBlockAndFill(
- AggregatedDataVariants & data_variants,
- bool final,
- size_t rows,
- Filler && filler) const;
-
- template <typename Method>
- Block convertOneBucketToBlock(
- AggregatedDataVariants & data_variants,
- Method & method,
- Arena * arena,
- bool final,
- size_t bucket) const;
-
- Block mergeAndConvertOneBucketToBlock(
- ManyAggregatedDataVariants & variants,
- Arena * arena,
- bool final,
- size_t bucket,
- std::atomic<bool> * is_cancelled = nullptr) const;
-
- Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const;
- Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const;
- BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const;
-
- template <typename Method>
- BlocksList prepareBlocksAndFillTwoLevelImpl(
- AggregatedDataVariants & data_variants,
- Method & method,
- bool final,
- ThreadPool * thread_pool) const;
-
- template <bool no_more_keys, typename Method, typename Table>
- void mergeStreamsImplCase(
- Block & block,
- Arena * aggregates_pool,
- Method & method,
- Table & data,
- AggregateDataPtr overflow_row) const;
-
- template <typename Method, typename Table>
- void mergeStreamsImpl(
- Block & block,
- Arena * aggregates_pool,
- Method & method,
- Table & data,
- AggregateDataPtr overflow_row,
- bool no_more_keys) const;
-
- void mergeWithoutKeyStreamsImpl(
- Block & block,
- AggregatedDataVariants & result) const;
-
- template <typename Method>
- void mergeBucketImpl(
- ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic<bool> * is_cancelled = nullptr) const;
-
- template <typename Method>
- void convertBlockToTwoLevelImpl(
- Method & method,
- Arena * pool,
- ColumnRawPtrs & key_columns,
- const Block & source,
- std::vector<Block> & destinations) const;
-
- template <typename Method, typename Table>
- void destroyImpl(Table & table) const;
-
- void destroyWithoutKey(
- AggregatedDataVariants & result) const;
-
-
- /** Checks constraints on the maximum number of keys for aggregation.
- * If it is exceeded, then, depending on the group_by_overflow_mode, either
- * - throws an exception;
- * - returns false, which means that execution must be aborted;
- * - sets the variable no_more_keys to true.
- */
- bool checkLimits(size_t result_size, bool & no_more_keys) const;
-
- void prepareAggregateInstructions(
- Columns columns,
- AggregateColumns & aggregate_columns,
- Columns & materialized_columns,
- AggregateFunctionInstructions & instructions,
- NestedColumnsHolder & nested_columns_holder) const;
-
- void addSingleKeyToAggregateColumns(
- const AggregatedDataVariants & data_variants,
- MutableColumns & aggregate_columns) const;
-
- void addArenasToAggregateColumns(
- const AggregatedDataVariants & data_variants,
- MutableColumns & aggregate_columns) const;
-
- void createStatesAndFillKeyColumnsWithSingleKey(
- AggregatedDataVariants & data_variants,
- Columns & key_columns, size_t key_row,
- MutableColumns & final_key_columns) const;
-};
-
-
-/** Get the aggregation variant by its type. */
-template <typename Method> Method & getDataVariant(AggregatedDataVariants & variants);
-
-#define M(NAME, IS_TWO_LEVEL) \
- template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant<decltype(AggregatedDataVariants::NAME)::element_type>(AggregatedDataVariants & variants) { return *variants.NAME; }
-
-APPLY_FOR_AGGREGATED_VARIANTS(M)
-
-#undef M
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp
deleted file mode 100644
index 23eb179eacb..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.cpp
+++ /dev/null
@@ -1,723 +0,0 @@
-#include <Interpreters/Cluster.h>
-#include <common/SimpleCache.h>
-#include <Common/DNSResolver.h>
-#include <Common/escapeForFileName.h>
-#include <Common/isLocalAddress.h>
-#include <Common/parseAddress.h>
-#include <Common/Config/AbstractConfigurationComparison.h>
-#include <Core/Settings.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <Poco/Util/Application.h>
-#include <common/range.h>
-#include <boost/range/algorithm_ext/erase.hpp>
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int UNKNOWN_ELEMENT_IN_CONFIG;
- extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
- extern const int LOGICAL_ERROR;
- extern const int SHARD_HAS_NO_CONNECTIONS;
- extern const int SYNTAX_ERROR;
-}
-
-namespace
-{
-
-/// Default shard weight.
-constexpr UInt32 default_weight = 1;
-
-inline bool isLocalImpl(const Cluster::Address & address, const Poco::Net::SocketAddress & resolved_address, UInt16 clickhouse_port)
-{
- /// If there is replica, for which:
- /// - its port is the same that the server is listening;
- /// - its host is resolved to set of addresses, one of which is the same as one of addresses of network interfaces of the server machine*;
- /// then we must go to this shard without any inter-process communication.
- ///
- /// * - this criteria is somewhat approximate.
- ///
- /// Also, replica is considered non-local, if it has default database set
- /// (only reason is to avoid query rewrite).
-
- return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port);
-}
-
-void concatInsertPath(std::string & insert_path, const std::string & dir_name)
-{
- if (insert_path.empty())
- insert_path = dir_name;
- else
- insert_path += "," + dir_name;
-}
-
-}
-
-/// Implementation of Cluster::Address class
-
-std::optional<Poco::Net::SocketAddress> Cluster::Address::getResolvedAddress() const
-{
- try
- {
- return DNSResolver::instance().resolveAddress(host_name, port);
- }
- catch (...)
- {
- /// Failure in DNS resolution in cluster initialization is Ok.
- tryLogCurrentException("Cluster");
- return {};
- }
-}
-
-
-bool Cluster::Address::isLocal(UInt16 clickhouse_port) const
-{
- if (auto resolved = getResolvedAddress())
- return isLocalImpl(*this, *resolved, clickhouse_port);
- return false;
-}
-
-
-Cluster::Address::Address(
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- const String & cluster_,
- const String & cluster_secret_,
- UInt32 shard_index_,
- UInt32 replica_index_)
- : cluster(cluster_)
- , cluster_secret(cluster_secret_)
- , shard_index(shard_index_)
- , replica_index(replica_index_)
-{
- host_name = config.getString(config_prefix + ".host");
- port = static_cast<UInt16>(config.getInt(config_prefix + ".port"));
- if (config.has(config_prefix + ".user"))
- user_specified = true;
-
- user = config.getString(config_prefix + ".user", "default");
- password = config.getString(config_prefix + ".password", "");
- default_database = config.getString(config_prefix + ".default_database", "");
- secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
- priority = config.getInt(config_prefix + ".priority", 1);
- const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
- is_local = isLocal(config.getInt(port_type, 0));
-
- /// By default compression is disabled if address looks like localhost.
- /// NOTE: it's still enabled when interacting with servers on different port, but we don't want to complicate the logic.
- compression = config.getBool(config_prefix + ".compression", !is_local)
- ? Protocol::Compression::Enable : Protocol::Compression::Disable;
-}
-
-
-Cluster::Address::Address(
- const String & host_port_,
- const String & user_,
- const String & password_,
- UInt16 clickhouse_port,
- bool treat_local_port_as_remote,
- bool secure_,
- Int64 priority_,
- UInt32 shard_index_,
- UInt32 replica_index_)
- : user(user_), password(password_)
-{
- bool can_be_local = true;
- std::pair<std::string, UInt16> parsed_host_port;
- if (!treat_local_port_as_remote)
- {
- parsed_host_port = parseAddress(host_port_, clickhouse_port);
- }
- else
- {
- /// For clickhouse-local (treat_local_port_as_remote) try to read the address without passing a default port
- /// If it works we have a full address that includes a port, which means it won't be local
- /// since clickhouse-local doesn't listen in any port
- /// If it doesn't include a port then use the default one and it could be local (if the address is)
- try
- {
- parsed_host_port = parseAddress(host_port_, 0);
- can_be_local = false;
- }
- catch (...)
- {
- parsed_host_port = parseAddress(host_port_, clickhouse_port);
- }
- }
- host_name = parsed_host_port.first;
- port = parsed_host_port.second;
- secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
- priority = priority_;
- is_local = can_be_local && isLocal(clickhouse_port);
- shard_index = shard_index_;
- replica_index = replica_index_;
-}
-
-
-String Cluster::Address::toString() const
-{
- return toString(host_name, port);
-}
-
-String Cluster::Address::toString(const String & host_name, UInt16 port)
-{
- return escapeForFileName(host_name) + ':' + DB::toString(port);
-}
-
-String Cluster::Address::readableString() const
-{
- String res;
-
- /// If it looks like IPv6 address add braces to avoid ambiguity in ipv6_host:port notation
- if (host_name.find_first_of(':') != std::string::npos && !host_name.empty() && host_name.back() != ']')
- res += '[' + host_name + ']';
- else
- res += host_name;
-
- res += ':' + DB::toString(port);
- return res;
-}
-
-std::pair<String, UInt16> Cluster::Address::fromString(const String & host_port_string)
-{
- auto pos = host_port_string.find_last_of(':');
- if (pos == std::string::npos)
- throw Exception("Incorrect <host>:<port> format " + host_port_string, ErrorCodes::SYNTAX_ERROR);
-
- return {unescapeForFileName(host_port_string.substr(0, pos)), parse<UInt16>(host_port_string.substr(pos + 1))};
-}
-
-
-String Cluster::Address::toFullString(bool use_compact_format) const
-{
- if (use_compact_format)
- {
- if (shard_index == 0 || replica_index == 0)
- // shard_num/replica_num like in system.clusters table
- throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR);
-
- return fmt::format("shard{}_replica{}", shard_index, replica_index);
- }
- else
- {
- return
- escapeForFileName(user)
- + (password.empty() ? "" : (':' + escapeForFileName(password))) + '@'
- + escapeForFileName(host_name) + ':' + std::to_string(port)
- + (default_database.empty() ? "" : ('#' + escapeForFileName(default_database)))
- + ((secure == Protocol::Secure::Enable) ? "+secure" : "");
- }
-}
-
-Cluster::Address Cluster::Address::fromFullString(const String & full_string)
-{
- const char * address_begin = full_string.data();
- const char * address_end = address_begin + full_string.size();
-
- const char * user_pw_end = strchr(full_string.data(), '@');
-
- /// parsing with the new shard{shard_index}[_replica{replica_index}] format
- if (!user_pw_end && full_string.starts_with("shard"))
- {
- const char * underscore = strchr(full_string.data(), '_');
-
- Address address;
- address.shard_index = parse<UInt32>(address_begin + strlen("shard"));
- address.replica_index = underscore ? parse<UInt32>(underscore + strlen("_replica")) : 0;
-
- return address;
- }
- else
- {
- /// parsing with the old user[:password]@host:port#default_database format
- /// This format is appeared to be inconvenient for the following reasons:
- /// - credentials are exposed in file name;
- /// - the file name can be too long.
-
- Protocol::Secure secure = Protocol::Secure::Disable;
- const char * secure_tag = "+secure";
- if (full_string.ends_with(secure_tag))
- {
- address_end -= strlen(secure_tag);
- secure = Protocol::Secure::Enable;
- }
-
- const char * colon = strchr(full_string.data(), ':');
- if (!user_pw_end || !colon)
- throw Exception("Incorrect user[:password]@host:port#default_database format " + full_string, ErrorCodes::SYNTAX_ERROR);
-
- const bool has_pw = colon < user_pw_end;
- const char * host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon;
- if (!host_end)
- throw Exception("Incorrect address '" + full_string + "', it does not contain port", ErrorCodes::SYNTAX_ERROR);
-
- const char * has_db = strchr(full_string.data(), '#');
- const char * port_end = has_db ? has_db : address_end;
-
- Address address;
- address.secure = secure;
- address.port = parse<UInt16>(host_end + 1, port_end - (host_end + 1));
- address.host_name = unescapeForFileName(std::string(user_pw_end + 1, host_end));
- address.user = unescapeForFileName(std::string(address_begin, has_pw ? colon : user_pw_end));
- address.password = has_pw ? unescapeForFileName(std::string(colon + 1, user_pw_end)) : std::string();
- address.default_database = has_db ? unescapeForFileName(std::string(has_db + 1, address_end)) : std::string();
- // address.priority ignored
- return address;
- }
-}
-
-
-/// Implementation of Clusters class
-
-Clusters::Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix)
-{
- updateClusters(config, settings, config_prefix);
-}
-
-
-ClusterPtr Clusters::getCluster(const std::string & cluster_name) const
-{
- std::lock_guard lock(mutex);
-
- auto it = impl.find(cluster_name);
- return (it != impl.end()) ? it->second : nullptr;
-}
-
-
-void Clusters::setCluster(const String & cluster_name, const std::shared_ptr<Cluster> & cluster)
-{
- std::lock_guard lock(mutex);
- impl[cluster_name] = cluster;
-}
-
-
-void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_config, const Settings & settings, const String & config_prefix, Poco::Util::AbstractConfiguration * old_config)
-{
- Poco::Util::AbstractConfiguration::Keys new_config_keys;
- new_config.keys(config_prefix, new_config_keys);
-
- /// If old config is set, we will update only clusters with updated config.
- /// In this case, we first need to find clusters that were deleted from config.
- Poco::Util::AbstractConfiguration::Keys deleted_keys;
- if (old_config)
- {
- std::sort(new_config_keys.begin(), new_config_keys.end());
-
- Poco::Util::AbstractConfiguration::Keys old_config_keys;
- old_config->keys(config_prefix, old_config_keys);
- std::sort(old_config_keys.begin(), old_config_keys.end());
-
- std::set_difference(
- old_config_keys.begin(), old_config_keys.end(), new_config_keys.begin(), new_config_keys.end(), std::back_inserter(deleted_keys));
- }
-
- std::lock_guard lock(mutex);
-
- /// If old config is set, remove deleted clusters from impl, otherwise just clear it.
- if (old_config)
- {
- for (const auto & key : deleted_keys)
- impl.erase(key);
- }
- else
- impl.clear();
-
- for (const auto & key : new_config_keys)
- {
- if (key.find('.') != String::npos)
- throw Exception("Cluster names with dots are not supported: '" + key + "'", ErrorCodes::SYNTAX_ERROR);
-
- /// If old config is set and cluster config wasn't changed, don't update this cluster.
- if (!old_config || !isSameConfiguration(new_config, *old_config, config_prefix + "." + key))
- impl[key] = std::make_shared<Cluster>(new_config, settings, config_prefix, key);
- }
-}
-
-Clusters::Impl Clusters::getContainer() const
-{
- std::lock_guard lock(mutex);
- /// The following line copies container of shared_ptrs to return value under lock
- return impl;
-}
-
-
-/// Implementation of `Cluster` class
-
-Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
- const Settings & settings,
- const String & config_prefix_,
- const String & cluster_name) : name(cluster_name)
-{
- auto config_prefix = config_prefix_ + "." + cluster_name;
-
- Poco::Util::AbstractConfiguration::Keys config_keys;
- config.keys(config_prefix, config_keys);
-
- config_prefix += ".";
-
- secret = config.getString(config_prefix + "secret", "");
- boost::range::remove_erase(config_keys, "secret");
-
- if (config_keys.empty())
- throw Exception("No cluster elements (shard, node) specified in config at path " + config_prefix, ErrorCodes::SHARD_HAS_NO_CONNECTIONS);
-
- UInt32 current_shard_num = 1;
- for (const auto & key : config_keys)
- {
- if (key.starts_with("node"))
- {
- /// Shard without replicas.
-
- Addresses addresses;
-
- const auto & prefix = config_prefix + key;
- const auto weight = config.getInt(prefix + ".weight", default_weight);
-
- addresses.emplace_back(config, prefix, cluster_name, secret, current_shard_num, 1);
- const auto & address = addresses.back();
-
- ShardInfo info;
- info.shard_num = current_shard_num;
- info.weight = weight;
-
- if (address.is_local)
- info.local_addresses.push_back(address);
-
- auto pool = ConnectionPoolFactory::instance().get(
- settings.distributed_connections_pool_size,
- address.host_name, address.port,
- address.default_database, address.user, address.password,
- address.cluster, address.cluster_secret,
- "server", address.compression,
- address.secure, address.priority);
-
- info.pool = std::make_shared<ConnectionPoolWithFailover>(
- ConnectionPoolPtrs{pool}, settings.load_balancing);
- info.per_replica_pools = {std::move(pool)};
-
- if (weight)
- slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
-
- shards_info.emplace_back(std::move(info));
- addresses_with_failover.emplace_back(std::move(addresses));
- }
- else if (key.starts_with("shard"))
- {
- /// Shard with replicas.
-
- Poco::Util::AbstractConfiguration::Keys replica_keys;
- config.keys(config_prefix + key, replica_keys);
-
- addresses_with_failover.emplace_back();
- Addresses & replica_addresses = addresses_with_failover.back();
- UInt32 current_replica_num = 1;
-
- const auto & partial_prefix = config_prefix + key + ".";
- const auto weight = config.getUInt(partial_prefix + ".weight", default_weight);
-
- bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
-
- ShardInfoInsertPathForInternalReplication insert_paths;
- /// "_all_replicas" is a marker that will be replaced with all replicas
- /// (for creating connections in the Distributed engine)
- insert_paths.compact = fmt::format("shard{}_all_replicas", current_shard_num);
-
- for (const auto & replica_key : replica_keys)
- {
- if (replica_key.starts_with("weight") ||replica_key.starts_with("internal_replication"))
- continue;
-
- if (replica_key.starts_with("replica"))
- {
- replica_addresses.emplace_back(config,
- partial_prefix + replica_key,
- cluster_name,
- secret,
- current_shard_num,
- current_replica_num);
- ++current_replica_num;
-
- if (internal_replication)
- {
- auto dir_name = replica_addresses.back().toFullString(/* use_compact_format= */ false);
- if (!replica_addresses.back().is_local)
- concatInsertPath(insert_paths.prefer_localhost_replica, dir_name);
- concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name);
- }
- }
- else
- throw Exception("Unknown element in config: " + replica_key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
- }
-
- Addresses shard_local_addresses;
-
- ConnectionPoolPtrs all_replicas_pools;
- all_replicas_pools.reserve(replica_addresses.size());
-
- for (const auto & replica : replica_addresses)
- {
- auto replica_pool = ConnectionPoolFactory::instance().get(
- settings.distributed_connections_pool_size,
- replica.host_name, replica.port,
- replica.default_database, replica.user, replica.password,
- replica.cluster, replica.cluster_secret,
- "server", replica.compression,
- replica.secure, replica.priority);
-
- all_replicas_pools.emplace_back(replica_pool);
- if (replica.is_local)
- shard_local_addresses.push_back(replica);
- }
-
- ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
- all_replicas_pools, settings.load_balancing,
- settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
-
- if (weight)
- slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
-
- shards_info.push_back({
- std::move(insert_paths),
- current_shard_num,
- weight,
- std::move(shard_local_addresses),
- std::move(shard_pool),
- std::move(all_replicas_pools),
- internal_replication
- });
- }
- else
- throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
-
- ++current_shard_num;
- }
-
- if (addresses_with_failover.empty())
- throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
-
- initMisc();
-}
-
-
-Cluster::Cluster(
- const Settings & settings,
- const std::vector<std::vector<String>> & names,
- const String & username,
- const String & password,
- UInt16 clickhouse_port,
- bool treat_local_as_remote,
- bool treat_local_port_as_remote,
- bool secure,
- Int64 priority)
-{
- UInt32 current_shard_num = 1;
-
- for (const auto & shard : names)
- {
- Addresses current;
- for (const auto & replica : shard)
- current.emplace_back(
- replica,
- username,
- password,
- clickhouse_port,
- treat_local_port_as_remote,
- secure,
- priority,
- current_shard_num,
- current.size() + 1);
-
- addresses_with_failover.emplace_back(current);
-
- Addresses shard_local_addresses;
- ConnectionPoolPtrs all_replicas;
- all_replicas.reserve(current.size());
-
- for (const auto & replica : current)
- {
- auto replica_pool = ConnectionPoolFactory::instance().get(
- settings.distributed_connections_pool_size,
- replica.host_name, replica.port,
- replica.default_database, replica.user, replica.password,
- replica.cluster, replica.cluster_secret,
- "server", replica.compression, replica.secure, replica.priority);
- all_replicas.emplace_back(replica_pool);
- if (replica.is_local && !treat_local_as_remote)
- shard_local_addresses.push_back(replica);
- }
-
- ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
- all_replicas, settings.load_balancing,
- settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
-
- slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size());
- shards_info.push_back({
- {}, // insert_path_for_internal_replication
- current_shard_num,
- default_weight,
- std::move(shard_local_addresses),
- std::move(shard_pool),
- std::move(all_replicas),
- false // has_internal_replication
- });
- ++current_shard_num;
- }
-
- initMisc();
-}
-
-
-Poco::Timespan Cluster::saturate(Poco::Timespan v, Poco::Timespan limit)
-{
- if (limit.totalMicroseconds() == 0)
- return v;
- else
- return (v > limit) ? limit : v;
-}
-
-
-void Cluster::initMisc()
-{
- for (const auto & shard_info : shards_info)
- {
- if (!shard_info.isLocal() && !shard_info.hasRemoteConnections())
- throw Exception("Found shard without any specified connection",
- ErrorCodes::SHARD_HAS_NO_CONNECTIONS);
- }
-
- for (const auto & shard_info : shards_info)
- {
- if (shard_info.isLocal())
- ++local_shard_count;
- else
- ++remote_shard_count;
- }
-
- for (auto & shard_info : shards_info)
- {
- if (!shard_info.isLocal())
- {
- any_remote_shard_info = &shard_info;
- break;
- }
- }
-}
-
-std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings) const
-{
- return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings)};
-}
-
-std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
-{
- return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, {index}) };
-}
-
-std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector<size_t> & indices) const
-{
- return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) };
-}
-
-Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings)
-{
- if (from.addresses_with_failover.empty())
- throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR);
-
- UInt32 shard_num = 0;
- std::set<std::pair<String, int>> unique_hosts;
- for (size_t shard_index : collections::range(0, from.shards_info.size()))
- {
- const auto & replicas = from.addresses_with_failover[shard_index];
- for (const auto & address : replicas)
- {
- if (!unique_hosts.emplace(address.host_name, address.port).second)
- continue; /// Duplicate host, skip.
-
- ShardInfo info;
- info.shard_num = ++shard_num;
-
- if (address.is_local)
- info.local_addresses.push_back(address);
-
- auto pool = ConnectionPoolFactory::instance().get(
- settings.distributed_connections_pool_size,
- address.host_name,
- address.port,
- address.default_database,
- address.user,
- address.password,
- address.cluster,
- address.cluster_secret,
- "server",
- address.compression,
- address.secure,
- address.priority);
-
- info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing);
- info.per_replica_pools = {std::move(pool)};
-
- addresses_with_failover.emplace_back(Addresses{address});
- shards_info.emplace_back(std::move(info));
- }
- }
-
- initMisc();
-}
-
-
-Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector<size_t> & indices)
-{
- for (size_t index : indices)
- {
- shards_info.emplace_back(from.shards_info.at(index));
-
- if (!from.addresses_with_failover.empty())
- addresses_with_failover.emplace_back(from.addresses_with_failover.at(index));
- }
-
- initMisc();
-}
-
-const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const
-{
- if (!has_internal_replication)
- throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR);
-
- const auto & paths = insert_path_for_internal_replication;
- if (!use_compact_format)
- {
- const auto & path = prefer_localhost_replica ? paths.prefer_localhost_replica : paths.no_prefer_localhost_replica;
- if (path.size() > NAME_MAX)
- {
- throw Exception(ErrorCodes::LOGICAL_ERROR,
- "Path '{}' for async distributed INSERT is too long (exceed {} limit)", path, NAME_MAX);
- }
- return path;
- }
- else
- {
- return paths.compact;
- }
-}
-
-bool Cluster::maybeCrossReplication() const
-{
- /// Cluster can be used for cross-replication if some replicas have different default database names,
- /// so one clickhouse-server instance can contain multiple replicas.
-
- if (addresses_with_failover.empty())
- return false;
-
- const String & database_name = addresses_with_failover.front().front().default_database;
- for (const auto & shard : addresses_with_failover)
- for (const auto & replica : shard)
- if (replica.default_database != database_name)
- return true;
-
- return false;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h
deleted file mode 100644
index ca28e63fb19..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/Cluster.h
+++ /dev/null
@@ -1,306 +0,0 @@
-#pragma once
-
-#include <Client/ConnectionPool.h>
-#include <Client/ConnectionPoolWithFailover.h>
-
-#include <Poco/Net/SocketAddress.h>
-
-#include <map>
-
-namespace Poco
-{
- namespace Util
- {
- class AbstractConfiguration;
- }
-}
-
-namespace NDB
-{
-
-struct Settings;
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-/// Cluster contains connection pools to each node
-/// With the local nodes, the connection is not established, but the request is executed directly.
-/// Therefore we store only the number of local nodes
-/// In the config, the cluster includes nodes <node> or <shard>
-class Cluster
-{
-public:
- Cluster(const Poco::Util::AbstractConfiguration & config,
- const Settings & settings,
- const String & config_prefix_,
- const String & cluster_name);
-
- /// Construct a cluster by the names of shards and replicas.
- /// Local are treated as well as remote ones if treat_local_as_remote is true.
- /// Local are also treated as remote if treat_local_port_as_remote is set and the local address includes a port
- /// 'clickhouse_port' - port that this server instance listen for queries.
- /// This parameter is needed only to check that some address is local (points to ourself).
- ///
- /// Used for remote() function.
- Cluster(
- const Settings & settings,
- const std::vector<std::vector<String>> & names,
- const String & username,
- const String & password,
- UInt16 clickhouse_port,
- bool treat_local_as_remote,
- bool treat_local_port_as_remote,
- bool secure = false,
- Int64 priority = 1);
-
- Cluster(const Cluster &)= delete;
- Cluster & operator=(const Cluster &) = delete;
-
- /// is used to set a limit on the size of the timeout
- static Poco::Timespan saturate(Poco::Timespan v, Poco::Timespan limit);
-
-public:
- using SlotToShard = std::vector<UInt64>;
-
- struct Address
- {
- /** In configuration file,
- * addresses are located either in <node> elements:
- * <node>
- * <host>example01-01-1</host>
- * <port>9000</port>
- * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed -->
- * </node>
- * ...
- * or in <shard> and inside in <replica> elements:
- * <shard>
- * <replica>
- * <host>example01-01-1</host>
- * <port>9000</port>
- * <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed -->
- * </replica>
- * </shard>
- */
-
- String host_name;
- UInt16 port;
- String user;
- String password;
-
- /// For inter-server authorization
- String cluster;
- String cluster_secret;
-
- UInt32 shard_index{}; /// shard serial number in configuration file, starting from 1.
- UInt32 replica_index{}; /// replica serial number in this shard, starting from 1; zero means no replicas.
-
- /// This database is selected when no database is specified for Distributed table
- String default_database;
- /// The locality is determined at the initialization, and is not changed even if DNS is changed
- bool is_local = false;
- bool user_specified = false;
-
- Protocol::Compression compression = Protocol::Compression::Enable;
- Protocol::Secure secure = Protocol::Secure::Disable;
-
- Int64 priority = 1;
-
- Address() = default;
-
- Address(
- const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix,
- const String & cluster_,
- const String & cluster_secret_,
- UInt32 shard_index_ = 0,
- UInt32 replica_index_ = 0);
-
- Address(
- const String & host_port_,
- const String & user_,
- const String & password_,
- UInt16 clickhouse_port,
- bool treat_local_port_as_remote,
- bool secure_ = false,
- Int64 priority_ = 1,
- UInt32 shard_index_ = 0,
- UInt32 replica_index_ = 0);
-
- /// Returns 'escaped_host_name:port'
- String toString() const;
-
- /// Returns 'host_name:port'
- String readableString() const;
-
- static String toString(const String & host_name, UInt16 port);
-
- static std::pair<String, UInt16> fromString(const String & host_port_string);
-
- /// Returns escaped shard{shard_index}_replica{replica_index} or escaped
- /// user:password@resolved_host_address:resolved_host_port#default_database
- /// depending on use_compact_format flag
- String toFullString(bool use_compact_format) const;
-
- /// Returns address with only shard index and replica index or full address without shard index and replica index
- static Address fromFullString(const String & address_full_string);
-
- /// Returns resolved address if it does resolve.
- std::optional<Poco::Net::SocketAddress> getResolvedAddress() const;
-
- auto tuple() const { return std::tie(host_name, port, secure, user, password, default_database); }
- bool operator==(const Address & other) const { return tuple() == other.tuple(); }
-
- private:
- bool isLocal(UInt16 clickhouse_port) const;
- };
-
- using Addresses = std::vector<Address>;
- using AddressesWithFailover = std::vector<Addresses>;
-
- /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
- ///
- /// Contains different path for permutations of:
- /// - prefer_localhost_replica
- /// Notes with prefer_localhost_replica==0 will contains local nodes.
- /// - use_compact_format_in_distributed_parts_names
- /// See toFullString()
- ///
- /// This is cached to avoid looping by replicas in insertPathForInternalReplication().
- struct ShardInfoInsertPathForInternalReplication
- {
- /// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=0
- std::string prefer_localhost_replica;
- /// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=0
- std::string no_prefer_localhost_replica;
- /// use_compact_format_in_distributed_parts_names=1
- std::string compact;
- };
-
- struct ShardInfo
- {
- public:
- bool isLocal() const { return !local_addresses.empty(); }
- bool hasRemoteConnections() const { return local_addresses.size() != per_replica_pools.size(); }
- size_t getLocalNodeCount() const { return local_addresses.size(); }
- bool hasInternalReplication() const { return has_internal_replication; }
- /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
- const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const;
-
- public:
- ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication;
- /// Number of the shard, the indexation begins with 1
- UInt32 shard_num = 0;
- UInt32 weight = 1;
- Addresses local_addresses;
- /// nullptr if there are no remote addresses
- ConnectionPoolWithFailoverPtr pool;
- /// Connection pool for each replica, contains nullptr for local replicas
- ConnectionPoolPtrs per_replica_pools;
- bool has_internal_replication = false;
- };
-
- using ShardsInfo = std::vector<ShardInfo>;
-
- String getHashOfAddresses() const { return hash_of_addresses; }
- const ShardsInfo & getShardsInfo() const { return shards_info; }
- const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; }
-
- const ShardInfo & getAnyShardInfo() const
- {
- if (shards_info.empty())
- throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR);
- return shards_info.front();
- }
-
- /// The number of remote shards.
- size_t getRemoteShardCount() const { return remote_shard_count; }
-
- /// The number of clickhouse nodes located locally
- /// we access the local nodes directly.
- size_t getLocalShardCount() const { return local_shard_count; }
-
- /// The number of all shards.
- size_t getShardCount() const { return shards_info.size(); }
-
- const String & getSecret() const { return secret; }
-
- /// Get a subcluster consisting of one shard - index by count (from 0) of the shard of this cluster.
- std::unique_ptr<Cluster> getClusterWithSingleShard(size_t index) const;
-
- /// Get a subcluster consisting of one or multiple shards - indexes by count (from 0) of the shard of this cluster.
- std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
-
- /// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
- std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const;
-
- /// Returns false if cluster configuration doesn't allow to use it for cross-replication.
- /// NOTE: true does not mean, that it's actually a cross-replication cluster.
- bool maybeCrossReplication() const;
-
-private:
- SlotToShard slot_to_shard;
-
-public:
- const SlotToShard & getSlotToShard() const { return slot_to_shard; }
-
-private:
- void initMisc();
-
- /// For getClusterWithMultipleShards implementation.
- struct SubclusterTag {};
- Cluster(SubclusterTag, const Cluster & from, const std::vector<size_t> & indices);
-
- /// For getClusterWithReplicasAsShards implementation
- struct ReplicasAsShardsTag {};
- Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings);
-
- /// Inter-server secret
- String secret;
-
- String hash_of_addresses;
- /// Description of the cluster shards.
- ShardsInfo shards_info;
- /// Any remote shard.
- ShardInfo * any_remote_shard_info = nullptr;
-
- /// Non-empty is either addresses or addresses_with_failover.
- /// The size and order of the elements in the corresponding array corresponds to shards_info.
-
- /// An array of shards. For each shard, an array of replica addresses (servers that are considered identical).
- AddressesWithFailover addresses_with_failover;
-
- size_t remote_shard_count = 0;
- size_t local_shard_count = 0;
-
- String name;
-};
-
-using ClusterPtr = std::shared_ptr<Cluster>;
-
-
-class Clusters
-{
-public:
- Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix = "remote_servers");
-
- Clusters(const Clusters &) = delete;
- Clusters & operator=(const Clusters &) = delete;
-
- ClusterPtr getCluster(const std::string & cluster_name) const;
- void setCluster(const String & cluster_name, const ClusterPtr & cluster);
-
- void updateClusters(const Poco::Util::AbstractConfiguration & new_config, const Settings & settings, const String & config_prefix, Poco::Util::AbstractConfiguration * old_config = nullptr);
-
-public:
- using Impl = std::map<String, ClusterPtr>;
-
- Impl getContainer() const;
-
-protected:
- Impl impl;
- mutable std::mutex mutex;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h
deleted file mode 100644
index 5d9b9b6157f..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ExpressionAnalyzer.h
+++ /dev/null
@@ -1,379 +0,0 @@
-#pragma once
-
-#include <Columns/FilterDescription.h>
-#include <DataStreams/IBlockStream_fwd.h>
-#include <Interpreters/AggregateDescription.h>
-#include <Interpreters/DatabaseCatalog.h>
-#include <Interpreters/SubqueryForSet.h>
-#include <Interpreters/TreeRewriter.h>
-#include <Interpreters/WindowDescription.h>
-#include <Interpreters/join_common.h>
-#include <Parsers/IAST_fwd.h>
-#include <Storages/IStorage_fwd.h>
-#include <Storages/SelectQueryInfo.h>
-
-namespace NDB
-{
-
-class Block;
-struct Settings;
-
-struct ExpressionActionsChain;
-class ExpressionActions;
-using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
-using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
-
-struct ASTTableJoin;
-class IJoin;
-using JoinPtr = std::shared_ptr<IJoin>;
-
-class ASTFunction;
-class ASTExpressionList;
-class ASTSelectQuery;
-struct ASTTablesInSelectQueryElement;
-
-struct StorageInMemoryMetadata;
-using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
-
-class ArrayJoinAction;
-using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>;
-
-class ActionsDAG;
-using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
-
-/// Create columns in block or return false if not possible
-bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false);
-
-/// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately.
-struct ExpressionAnalyzerData
-{
- ~ExpressionAnalyzerData();
-
- SubqueriesForSets subqueries_for_sets;
- PreparedSets prepared_sets;
-
- std::unique_ptr<QueryPlan> joined_plan;
-
- /// Columns after ARRAY JOIN. If there is no ARRAY JOIN, it's source_columns.
- NamesAndTypesList columns_after_array_join;
- /// Columns after Columns after ARRAY JOIN and JOIN. If there is no JOIN, it's columns_after_array_join.
- NamesAndTypesList columns_after_join;
- /// Columns after ARRAY JOIN, JOIN, and/or aggregation.
- NamesAndTypesList aggregated_columns;
- /// Columns after window functions.
- NamesAndTypesList columns_after_window;
-
- bool has_aggregation = false;
- NamesAndTypesList aggregation_keys;
- bool has_const_aggregation_keys = false;
- AggregateDescriptions aggregate_descriptions;
-
- WindowDescriptions window_descriptions;
- NamesAndTypesList window_columns;
-
- bool has_global_subqueries = false;
-
- /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
- TemporaryTablesMapping external_tables;
-};
-
-
-/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
- *
- * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.
- */
-class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::noncopyable, protected WithContext
-{
-private:
- /// Extracts settings to enlight which are used (and avoid copy of others).
- struct ExtractedSettings
- {
- const bool use_index_for_in_with_subqueries;
- const SizeLimits size_limits_for_set;
- const UInt64 distributed_group_by_no_merge;
-
- ExtractedSettings(const Settings & settings_);
- };
-
-public:
- /// Ctor for non-select queries. Generally its usage is:
- /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions();
- ExpressionAnalyzer(const ASTPtr & query_, const TreeRewriterResultPtr & syntax_analyzer_result_, ContextPtr context_)
- : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false, {}, {})
- {
- }
-
- ~ExpressionAnalyzer();
-
- void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types);
-
- /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression.
- /// If add_aliases, only the calculated values in the desired order and add aliases.
- /// If also project_result, than only aliases remain in the output block.
- /// Otherwise, only temporary columns will be deleted from the block.
- ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true);
- ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no);
-
- /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants.
- /// Does not execute subqueries.
- ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {});
-
- /** Sets that require a subquery to be create.
- * Only the sets needed to perform actions returned from already executed `append*` or `getActions`.
- * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions`
- * and create all the returned sets before performing the actions.
- */
- SubqueriesForSets & getSubqueriesForSets() { return subqueries_for_sets; }
-
- PreparedSets & getPreparedSets() { return prepared_sets; }
-
- /// Get intermediates for tests
- const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
-
- /// A list of windows for window functions.
- const WindowDescriptions & windowDescriptions() const { return window_descriptions; }
-
- void makeWindowDescriptions(ActionsDAGPtr actions);
-
- /**
- * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index.
- * The set will not be created if its size hits the limit.
- */
- void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options = {});
-
- /**
- * Checks if subquery is not a plain StorageSet.
- * Because while making set we will read data from StorageSet which is not allowed.
- * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise.
- */
- SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name);
-
-protected:
- ExpressionAnalyzer(
- const ASTPtr & query_,
- const TreeRewriterResultPtr & syntax_analyzer_result_,
- ContextPtr context_,
- size_t subquery_depth_,
- bool do_global_,
- SubqueriesForSets subqueries_for_sets_,
- PreparedSets prepared_sets_);
-
- ASTPtr query;
- const ExtractedSettings settings;
- size_t subquery_depth;
-
- TreeRewriterResultPtr syntax;
-
- const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
- const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; }
- const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
- const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
- /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
- void initGlobalSubqueriesAndExternalTables(bool do_global);
-
- ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;
-
- void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
-
- /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
- * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
- * prepared sets would not be applicable for MergeTree index optimization.
- */
- void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
-
- void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
-
- /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
- * Create a set of columns aggregated_columns resulting after the aggregation, if any,
- * or after all the actions that are normally performed before aggregation.
- * Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
- */
- void analyzeAggregation();
- bool makeAggregateDescriptions(ActionsDAGPtr & actions);
-
- const ASTSelectQuery * getSelectQuery() const;
-
- bool isRemoteStorage() const { return syntax->is_remote_storage; }
-};
-
-class SelectQueryExpressionAnalyzer;
-
-/// Result of SelectQueryExpressionAnalyzer: expressions for InterpreterSelectQuery
-struct ExpressionAnalysisResult
-{
- std::string dump() const;
-
- /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
- bool first_stage = false;
- /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
- bool second_stage = false;
-
- bool need_aggregate = false;
- bool has_order_by = false;
- bool has_window = false;
-
- String where_column_name;
- bool remove_where_filter = false;
- bool optimize_read_in_order = false;
- bool optimize_aggregation_in_order = false;
- bool join_has_delayed_stream = false;
-
- ActionsDAGPtr before_array_join;
- ArrayJoinActionPtr array_join;
- ActionsDAGPtr before_join;
- ActionsDAGPtr converting_join_columns;
- JoinPtr join;
- ActionsDAGPtr before_where;
- ActionsDAGPtr before_aggregation;
- ActionsDAGPtr before_having;
- ActionsDAGPtr before_window;
- ActionsDAGPtr before_order_by;
- ActionsDAGPtr before_limit_by;
- ActionsDAGPtr final_projection;
-
- /// Columns from the SELECT list, before renaming them to aliases. Used to
- /// perform SELECT DISTINCT.
- Names selected_columns;
-
- /// Columns to read from storage if any.
- Names required_columns;
-
- /// Columns will be removed after prewhere actions execution.
- NameSet columns_to_remove_after_prewhere;
-
- PrewhereInfoPtr prewhere_info;
- FilterDAGInfoPtr filter_info;
- ConstantFilterDescription prewhere_constant_filter_description;
- ConstantFilterDescription where_constant_filter_description;
- /// Actions by every element of ORDER BY
- ManyExpressionActions order_by_elements_actions;
- ManyExpressionActions group_by_elements_actions;
-
- ExpressionAnalysisResult() = default;
-
- ExpressionAnalysisResult(
- SelectQueryExpressionAnalyzer & query_analyzer,
- const StorageMetadataPtr & metadata_snapshot,
- bool first_stage,
- bool second_stage,
- bool only_types,
- const FilterDAGInfoPtr & filter_info,
- const Block & source_header);
-
- /// Filter for row-level security.
- bool hasFilter() const { return filter_info.get(); }
-
- bool hasJoin() const { return join.get(); }
- bool hasPrewhere() const { return prewhere_info.get(); }
- bool hasWhere() const { return before_where.get(); }
- bool hasHaving() const { return before_having.get(); }
- bool hasLimitBy() const { return before_limit_by.get(); }
-
- void removeExtraColumns() const;
- void checkActions() const;
- void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query);
-};
-
-/// SelectQuery specific ExpressionAnalyzer part.
-class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer
-{
-public:
- friend struct ExpressionAnalysisResult;
-
- SelectQueryExpressionAnalyzer(
- const ASTPtr & query_,
- const TreeRewriterResultPtr & syntax_analyzer_result_,
- ContextPtr context_,
- const StorageMetadataPtr & metadata_snapshot_,
- const NameSet & required_result_columns_ = {},
- bool do_global_ = false,
- const SelectQueryOptions & options_ = {},
- SubqueriesForSets subqueries_for_sets_ = {},
- PreparedSets prepared_sets_ = {})
- : ExpressionAnalyzer(
- query_,
- syntax_analyzer_result_,
- context_,
- options_.subquery_depth,
- do_global_,
- std::move(subqueries_for_sets_),
- std::move(prepared_sets_))
- , metadata_snapshot(metadata_snapshot_)
- , required_result_columns(required_result_columns_)
- , query_options(options_)
- {
- }
-
- /// Does the expression have aggregate functions or a GROUP BY or HAVING section.
- bool hasAggregation() const { return has_aggregation; }
- bool hasWindow() const { return !syntax->window_function_asts.empty(); }
- bool hasGlobalSubqueries() { return has_global_subqueries; }
- bool hasTableJoin() const { return syntax->ast_join; }
-
- const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
- bool hasConstAggregationKeys() const { return has_const_aggregation_keys; }
- const AggregateDescriptions & aggregates() const { return aggregate_descriptions; }
-
- const PreparedSets & getPreparedSets() const { return prepared_sets; }
- std::unique_ptr<QueryPlan> getJoinedPlan();
-
- /// Tables that will need to be sent to remote servers for distributed query processing.
- const TemporaryTablesMapping & getExternalTables() const { return external_tables; }
-
- ActionsDAGPtr simpleSelectActions();
-
- /// These appends are public only for tests
- void appendSelect(ExpressionActionsChain & chain, bool only_types);
- /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
- ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const;
-
- /// Create Set-s that we make from IN section to use index on them.
- void makeSetsForIndex(const ASTPtr & node);
-
-private:
- StorageMetadataPtr metadata_snapshot;
- /// If non-empty, ignore all expressions not from this list.
- NameSet required_result_columns;
- SelectQueryOptions query_options;
-
- JoinPtr makeTableJoin(
- const ASTTablesInSelectQueryElement & join_element,
- const ColumnsWithTypeAndName & left_sample_columns);
-
- const ASTSelectQuery * getAggregatingQuery() const;
-
- /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query.
- *
- * Example usage:
- * ExpressionActionsChain chain;
- * analyzer.appendWhere(chain);
- * chain.addStep();
- * analyzer.appendSelect(chain);
- * analyzer.appendOrderBy(chain);
- * chain.finalize();
- *
- * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way
- * shouldn't be executed, they are only needed to get a list of columns with their types.
- */
-
- /// Before aggregation:
- ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types);
- bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types);
- JoinPtr appendJoin(ExpressionActionsChain & chain);
- /// remove_filter is set in ExpressionActionsChain::finalize();
- /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
- ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);
- bool appendWhere(ExpressionActionsChain & chain, bool only_types);
- bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
- void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
- void appendWindowFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
-
- /// After aggregation:
- bool appendHaving(ExpressionActionsChain & chain, bool only_types);
- /// appendSelect
- ActionsDAGPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &);
- bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
- /// appendProjectResult
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h
deleted file mode 100644
index 97961753c85..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/IJoin.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-
-#include <memory>
-#include <vector>
-
-#include <Core/Names.h>
-#include <Columns/IColumn.h>
-
-namespace NDB
-{
-
-class Block;
-struct ExtraBlock;
-using ExtraBlockPtr = std::shared_ptr<ExtraBlock>;
-
-class TableJoin;
-class NotJoinedBlocks;
-
-class IJoin
-{
-public:
- virtual ~IJoin() = default;
-
- virtual const TableJoin & getTableJoin() const = 0;
-
- /// Add block of data from right hand of JOIN.
- /// @returns false, if some limit was exceeded and you should not insert more data.
- virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0;
-
- /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock).
- /// Could be called from different threads in parallel.
- virtual void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) = 0;
-
- /// Set/Get totals for right table
- virtual void setTotals(const Block & block) = 0;
- virtual const Block & getTotals() const = 0;
-
- virtual size_t getTotalRowCount() const = 0;
- virtual size_t getTotalByteCount() const = 0;
- virtual bool alwaysReturnsEmptySet() const = 0;
-
- /// StorageJoin/Dictionary is already filled. No need to call addJoinedBlock.
- /// Different query plan is used for such joins.
- virtual bool isFilled() const { return false; }
-
- virtual std::shared_ptr<NotJoinedBlocks> getNonJoinedBlocks(const Block &, UInt64) const = 0;
-};
-
-using JoinPtr = std::shared_ptr<IJoin>;
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h
deleted file mode 100644
index 70b8612e7fe..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/PreparedSets.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-#include <DataTypes/IDataType.h>
-#include <memory>
-#include <unordered_map>
-#include <DataTypes/DataTypeLowCardinality.h>
-
-
-namespace NDB
-{
-
-struct PreparedSetKey
-{
- /// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired
- /// data types of set elements (two different Sets can be required for two tuples with the same contents
- /// if left hand sides of the IN operators have different types).
- static PreparedSetKey forLiteral(const IAST & ast, DataTypes types_)
- {
- /// Remove LowCardinality types from type list because Set doesn't support LowCardinality keys now,
- /// just converts LowCardinality to ordinary types.
- for (auto & type : types_)
- type = recursiveRemoveLowCardinality(type);
-
- PreparedSetKey key;
- key.ast_hash = ast.getTreeHash();
- key.types = std::move(types_);
- return key;
- }
-
- /// Prepared sets for subqueries are indexed only by the AST contents because the type of the resulting
- /// set is fully determined by the subquery.
- static PreparedSetKey forSubquery(const IAST & ast)
- {
- PreparedSetKey key;
- key.ast_hash = ast.getTreeHash();
- return key;
- }
-
- IAST::Hash ast_hash;
- DataTypes types; /// Empty for subqueries.
-
- bool operator==(const PreparedSetKey & other) const
- {
- if (ast_hash != other.ast_hash)
- return false;
-
- if (types.size() != other.types.size())
- return false;
-
- for (size_t i = 0; i < types.size(); ++i)
- {
- if (!types[i]->equals(*other.types[i]))
- return false;
- }
-
- return true;
- }
-
- struct Hash
- {
- UInt64 operator()(const PreparedSetKey & key) const { return key.ast_hash.first; }
- };
-};
-
-class Set;
-using SetPtr = std::shared_ptr<Set>;
-
-using PreparedSets = std::unordered_map<PreparedSetKey, SetPtr, PreparedSetKey::Hash>;
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryViewsLog.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryViewsLog.cpp
deleted file mode 100644
index 7ff6950fe46..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryViewsLog.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "QueryViewsLog.h"
-
-#include <Columns/IColumn.h>
-#include <Core/Block.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeDateTime64.h>
-#include <DataTypes/DataTypeEnum.h>
-#include <DataTypes/DataTypeMap.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeUUID.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Interpreters/ProfileEventsExt.h>
-#include <common/DateLUT.h>
-#include <common/types.h>
-
-namespace NDB
-{
-NamesAndTypesList QueryViewsLogElement::getNamesAndTypes()
-{
- auto view_status_datatype = std::make_shared<DataTypeEnum8>(DataTypeEnum8::Values{
- {"QueryStart", static_cast<Int8>(QUERY_START)},
- {"QueryFinish", static_cast<Int8>(QUERY_FINISH)},
- {"ExceptionBeforeStart", static_cast<Int8>(EXCEPTION_BEFORE_START)},
- {"ExceptionWhileProcessing", static_cast<Int8>(EXCEPTION_WHILE_PROCESSING)}});
-
- auto view_type_datatype = std::make_shared<DataTypeEnum8>(DataTypeEnum8::Values{
- {"Default", static_cast<Int8>(ViewType::DEFAULT)},
- {"Materialized", static_cast<Int8>(ViewType::MATERIALIZED)},
- {"Live", static_cast<Int8>(ViewType::LIVE)}});
-
- return {
- {"event_date", std::make_shared<DataTypeDate>()},
- {"event_time", std::make_shared<DataTypeDateTime>()},
- {"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
- {"view_duration_ms", std::make_shared<DataTypeUInt64>()},
-
- {"initial_query_id", std::make_shared<DataTypeString>()},
- {"view_name", std::make_shared<DataTypeString>()},
- {"view_uuid", std::make_shared<DataTypeUUID>()},
- {"view_type", std::move(view_type_datatype)},
- {"view_query", std::make_shared<DataTypeString>()},
- {"view_target", std::make_shared<DataTypeString>()},
-
- {"read_rows", std::make_shared<DataTypeUInt64>()},
- {"read_bytes", std::make_shared<DataTypeUInt64>()},
- {"written_rows", std::make_shared<DataTypeUInt64>()},
- {"written_bytes", std::make_shared<DataTypeUInt64>()},
- {"peak_memory_usage", std::make_shared<DataTypeInt64>()},
- {"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
-
- {"status", std::move(view_status_datatype)},
- {"exception_code", std::make_shared<DataTypeInt32>()},
- {"exception", std::make_shared<DataTypeString>()},
- {"stack_trace", std::make_shared<DataTypeString>()}};
-}
-
-NamesAndAliases QueryViewsLogElement::getNamesAndAliases()
-{
- return {
- {"ProfileEvents.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(ProfileEvents)"},
- {"ProfileEvents.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())}, "mapValues(ProfileEvents)"}};
-}
-
-void QueryViewsLogElement::appendToBlock(MutableColumns & columns) const
-{
- size_t i = 0;
-
- columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); // event_date
- columns[i++]->insert(event_time);
- columns[i++]->insert(event_time_microseconds);
- columns[i++]->insert(view_duration_ms);
-
- columns[i++]->insertData(initial_query_id.data(), initial_query_id.size());
- columns[i++]->insertData(view_name.data(), view_name.size());
- columns[i++]->insert(view_uuid);
- columns[i++]->insert(view_type);
- columns[i++]->insertData(view_query.data(), view_query.size());
- columns[i++]->insertData(view_target.data(), view_target.size());
-
- columns[i++]->insert(read_rows);
- columns[i++]->insert(read_bytes);
- columns[i++]->insert(written_rows);
- columns[i++]->insert(written_bytes);
- columns[i++]->insert(peak_memory_usage);
-
- if (profile_counters)
- {
- auto * column = columns[i++].get();
- ProfileEvents::dumpToMapColumn(*profile_counters, column, true);
- }
- else
- {
- columns[i++]->insertDefault();
- }
-
- columns[i++]->insert(status);
- columns[i++]->insert(exception_code);
- columns[i++]->insertData(exception.data(), exception.size());
- columns[i++]->insertData(stack_trace.data(), stack_trace.size());
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryViewsLog.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryViewsLog.h
deleted file mode 100644
index 8157b0fb4e7..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/QueryViewsLog.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#pragma once
-
-#include <chrono>
-#include <memory>
-#include <sys/types.h>
-
-#include <Columns/IColumn.h>
-#include <Core/Block.h>
-#include <Core/SettingsEnums.h>
-#include <Core/Types.h>
-#include <Core/UUID.h>
-#include <Interpreters/SystemLog.h>
-#include <common/types.h>
-
-namespace ProfileEvents
-{
-class Counters;
-}
-
-namespace NDB
-{
-class ThreadStatus;
-
-struct QueryViewsLogElement
-{
- using ViewStatus = QueryLogElementType;
-
- enum class ViewType : int8_t
- {
- DEFAULT = 1,
- MATERIALIZED = 2,
- LIVE = 3
- };
-
- struct ViewRuntimeStats
- {
- String target_name;
- ViewType type = ViewType::DEFAULT;
- std::unique_ptr<ThreadStatus> thread_status = nullptr;
- UInt64 elapsed_ms = 0;
- std::chrono::time_point<std::chrono::system_clock> event_time;
- ViewStatus event_status = ViewStatus::QUERY_START;
-
- void setStatus(ViewStatus s)
- {
- event_status = s;
- event_time = std::chrono::system_clock::now();
- }
- };
-
- time_t event_time{};
- Decimal64 event_time_microseconds{};
- UInt64 view_duration_ms{};
-
- String initial_query_id;
- String view_name;
- UUID view_uuid{UUIDHelpers::Nil};
- ViewType view_type{ViewType::DEFAULT};
- String view_query;
- String view_target;
-
- UInt64 read_rows{};
- UInt64 read_bytes{};
- UInt64 written_rows{};
- UInt64 written_bytes{};
- Int64 peak_memory_usage{};
- std::shared_ptr<ProfileEvents::Counters> profile_counters;
-
- ViewStatus status = ViewStatus::QUERY_START;
- Int32 exception_code{};
- String exception;
- String stack_trace;
-
- static std::string name() { return "QueryLog"; }
-
- static NamesAndTypesList getNamesAndTypes();
- static NamesAndAliases getNamesAndAliases();
- void appendToBlock(MutableColumns & columns) const;
-};
-
-
-class QueryViewsLog : public SystemLog<QueryViewsLogElement>
-{
- using SystemLog<QueryViewsLogElement>::SystemLog;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h
deleted file mode 100644
index fc9d7c7ac11..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SelectQueryOptions.h
+++ /dev/null
@@ -1,143 +0,0 @@
-#pragma once
-
-#include <Core/QueryProcessingStage.h>
-#include <optional>
-
-namespace NDB
-{
-
-/**
- * to_stage
- * - the stage to which the query is to be executed. By default - till to the end.
- * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
- *
- * subquery_depth
- * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
- * for INSERT SELECT, a value 1 is passed instead of 0.
- *
- * only_analyze
- * - the object was created only for query analysis.
- *
- * is_subquery
- * - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results.
- *
- * is_internal
- * - the object was created only for internal queries.
- */
-struct SelectQueryOptions
-{
- QueryProcessingStage::Enum to_stage;
- size_t subquery_depth;
- bool only_analyze = false;
- bool modify_inplace = false;
- bool remove_duplicates = false;
- bool ignore_quota = false;
- bool ignore_limits = false;
- /// This flag is needed to analyze query ignoring table projections.
- /// It is needed because we build another one InterpreterSelectQuery while analyzing projections.
- /// It helps to avoid infinite recursion.
- bool ignore_projections = false;
- /// This flag is also used for projection analysis.
- /// It is needed because lazy normal projections require special planning in FetchColumns stage, such as adding WHERE transform.
- /// It is also used to avoid adding aggregating step when aggregate projection is chosen.
- bool is_projection_query = false;
- bool ignore_alias = false;
- bool is_internal = false;
- bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select
- bool with_all_cols = false; /// asterisk include materialized and aliased columns
-
- /// These two fields are used to evaluate shardNum() and shardCount() function when
- /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local
- /// instance might have multiple shards and scalars can only hold one value.
- std::optional<UInt32> shard_num;
- std::optional<UInt32> shard_count;
-
- SelectQueryOptions(
- QueryProcessingStage::Enum stage = QueryProcessingStage::Complete,
- size_t depth = 0,
- bool is_subquery_ = false)
- : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_)
- {}
-
- SelectQueryOptions copy() const { return *this; }
-
- SelectQueryOptions subquery() const
- {
- SelectQueryOptions out = *this;
- out.to_stage = QueryProcessingStage::Complete;
- ++out.subquery_depth;
- out.is_subquery = true;
- return out;
- }
-
- SelectQueryOptions & analyze(bool dry_run = true)
- {
- only_analyze = dry_run;
- return *this;
- }
-
- SelectQueryOptions & modify(bool value = true)
- {
- modify_inplace = value;
- return *this;
- }
-
- SelectQueryOptions & noModify() { return modify(false); }
-
- SelectQueryOptions & removeDuplicates(bool value = true)
- {
- remove_duplicates = value;
- return *this;
- }
-
- SelectQueryOptions & noSubquery()
- {
- subquery_depth = 0;
- return *this;
- }
-
- SelectQueryOptions & ignoreLimits(bool value = true)
- {
- ignore_limits = value;
- return *this;
- }
-
- SelectQueryOptions & ignoreProjections(bool value = true)
- {
- ignore_projections = value;
- return *this;
- }
-
- SelectQueryOptions & projectionQuery(bool value = true)
- {
- is_projection_query = value;
- return *this;
- }
-
- SelectQueryOptions & ignoreAlias(bool value = true)
- {
- ignore_alias = value;
- return *this;
- }
-
- SelectQueryOptions & setInternal(bool value = false)
- {
- is_internal = value;
- return *this;
- }
-
- SelectQueryOptions & setWithAllColumns(bool value = true)
- {
- with_all_cols = value;
- return *this;
- }
-
- SelectQueryOptions & setShardInfo(UInt32 shard_num_, UInt32 shard_count_)
- {
- shard_num = shard_num_;
- shard_count = shard_count_;
- return *this;
- }
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp
deleted file mode 100644
index 7f0ed60c88c..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/StorageID.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-#include <Interpreters/StorageID.h>
-#include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Common/quoteString.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-#include <Interpreters/DatabaseAndTableWithAlias.h>
-#include <Poco/Util/AbstractConfiguration.h>
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int UNKNOWN_DATABASE;
-}
-
-StorageID::StorageID(const ASTQueryWithTableAndOutput & query)
-{
- database_name = query.database;
- table_name = query.table;
- uuid = query.uuid;
- assertNotEmpty();
-}
-
-StorageID::StorageID(const ASTTableIdentifier & table_identifier_node)
-{
- DatabaseAndTableWithAlias database_table(table_identifier_node);
- database_name = database_table.database;
- table_name = database_table.table;
- uuid = database_table.uuid;
- assertNotEmpty();
-}
-
-StorageID::StorageID(const ASTPtr & node)
-{
- if (const auto * identifier = node->as<ASTTableIdentifier>())
- *this = StorageID(*identifier);
- else if (const auto * simple_query = dynamic_cast<const ASTQueryWithTableAndOutput *>(node.get()))
- *this = StorageID(*simple_query);
- else
- throw Exception("Unexpected AST", ErrorCodes::LOGICAL_ERROR);
-}
-
-String StorageID::getTableName() const
-{
- assertNotEmpty();
- return table_name;
-}
-
-String StorageID::getDatabaseName() const
-{
- assertNotEmpty();
- if (database_name.empty())
- throw Exception("Database name is empty", ErrorCodes::UNKNOWN_DATABASE);
- return database_name;
-}
-
-String StorageID::getNameForLogs() const
-{
- assertNotEmpty();
- return (database_name.empty() ? "" : backQuoteIfNeed(database_name) + ".") + backQuoteIfNeed(table_name)
- + (hasUUID() ? " (" + toString(uuid) + ")" : "");
-}
-
-bool StorageID::operator<(const StorageID & rhs) const
-{
- assertNotEmpty();
- /// It's needed for ViewDependencies
- if (!hasUUID() && !rhs.hasUUID())
- /// If both IDs don't have UUID, compare them like pair of strings
- return std::tie(database_name, table_name) < std::tie(rhs.database_name, rhs.table_name);
- else if (hasUUID() && rhs.hasUUID())
- /// If both IDs have UUID, compare UUIDs and ignore database and table name
- return uuid < rhs.uuid;
- else
- /// All IDs without UUID are less, then all IDs with UUID
- return !hasUUID();
-}
-
-bool StorageID::operator==(const StorageID & rhs) const
-{
- assertNotEmpty();
- if (hasUUID() && rhs.hasUUID())
- return uuid == rhs.uuid;
- else
- return std::tie(database_name, table_name) == std::tie(rhs.database_name, rhs.table_name);
-}
-
-String StorageID::getFullTableName() const
-{
- return backQuoteIfNeed(getDatabaseName()) + "." + backQuoteIfNeed(table_name);
-}
-
-String StorageID::getFullNameNotQuoted() const
-{
- return getDatabaseName() + "." + table_name;
-}
-
-StorageID StorageID::fromDictionaryConfig(const Poco::Util::AbstractConfiguration & config,
- const String & config_prefix)
-{
- StorageID res = StorageID::createEmpty();
- res.database_name = config.getString(config_prefix + ".database", "");
- res.table_name = config.getString(config_prefix + ".name");
- const String uuid_str = config.getString(config_prefix + ".uuid", "");
- if (!uuid_str.empty())
- res.uuid = parseFromString<UUID>(uuid_str);
- return res;
-}
-
-String StorageID::getInternalDictionaryName() const
-{
- assertNotEmpty();
- if (hasUUID())
- return toString(uuid);
- if (database_name.empty())
- return table_name;
- return database_name + "." + table_name;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h
deleted file mode 100644
index 1d25df9dc55..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/SubqueryForSet.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include <Core/Block.h>
-#include <Storages/IStorage_fwd.h>
-
-
-namespace NDB
-{
-
-class QueryPlan;
-
-class Set;
-using SetPtr = std::shared_ptr<Set>;
-
-/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section.
-struct SubqueryForSet
-{
- SubqueryForSet();
- ~SubqueryForSet();
- SubqueryForSet(SubqueryForSet &&);
- SubqueryForSet & operator= (SubqueryForSet &&);
-
- /// The source is obtained using the InterpreterSelectQuery subquery.
- std::unique_ptr<QueryPlan> source;
-
- /// If set, build it from result.
- SetPtr set;
-
- /// If set, put the result into the table.
- /// This is a temporary table for transferring to remote servers for distributed query processing.
- StoragePtr table;
-};
-
-/// ID of subquery -> what to do with it.
-using SubqueriesForSets = std::unordered_map<String, SubqueryForSet>;
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp
deleted file mode 100644
index cf58199e57b..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/ThreadStatusExt.cpp
+++ /dev/null
@@ -1,625 +0,0 @@
-#include <Common/ThreadStatus.h>
-
-#include <DataStreams/PushingToViewsBlockOutputStream.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/OpenTelemetrySpanLog.h>
-#include <Interpreters/ProcessList.h>
-#include <Interpreters/QueryThreadLog.h>
-#include <Interpreters/QueryViewsLog.h>
-#include <Parsers/formatAST.h>
-#include <Common/CurrentThread.h>
-#include <Common/Exception.h>
-#include <Common/ProfileEvents.h>
-#include <Common/QueryProfiler.h>
-#include <Common/SensitiveDataMasker.h>
-#include <Common/ThreadProfileEvents.h>
-#include <Common/TraceCollector.h>
-#include <common/errnoToString.h>
-
-#if defined(OS_LINUX)
-# include <Common/hasLinuxCapability.h>
-
-# include <sys/time.h>
-# include <sys/resource.h>
-#endif
-
-namespace ProfileEvents
-{
-extern const Event SelectedRows;
-extern const Event SelectedBytes;
-extern const Event InsertedRows;
-extern const Event InsertedBytes;
-}
-
-
-/// Implement some methods of ThreadStatus and CurrentThread here to avoid extra linking dependencies in clickhouse_common_io
-/// TODO It doesn't make sense.
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int CANNOT_SET_THREAD_PRIORITY;
-}
-
-void ThreadStatus::applyQuerySettings()
-{
- auto query_context_ptr = query_context.lock();
- assert(query_context_ptr);
- const Settings & settings = query_context_ptr->getSettingsRef();
-
- query_id = query_context_ptr->getCurrentQueryId();
- initQueryProfiler();
-
- untracked_memory_limit = settings.max_untracked_memory;
- if (settings.memory_profiler_step && settings.memory_profiler_step < UInt64(untracked_memory_limit))
- untracked_memory_limit = settings.memory_profiler_step;
-
-#if defined(OS_LINUX)
- /// Set "nice" value if required.
- Int32 new_os_thread_priority = settings.os_thread_priority;
- if (new_os_thread_priority && hasLinuxCapability(CAP_SYS_NICE))
- {
- LOG_TRACE(log, "Setting nice to {}", new_os_thread_priority);
-
- if (0 != setpriority(PRIO_PROCESS, thread_id, new_os_thread_priority))
- throwFromErrno("Cannot 'setpriority'", ErrorCodes::CANNOT_SET_THREAD_PRIORITY);
-
- os_thread_priority = new_os_thread_priority;
- }
-#endif
-}
-
-
-void ThreadStatus::attachQueryContext(ContextPtr query_context_)
-{
- query_context = query_context_;
-
- if (global_context.expired())
- global_context = query_context_->getGlobalContext();
-
- if (thread_group)
- {
- std::lock_guard lock(thread_group->mutex);
-
- thread_group->query_context = query_context;
- if (thread_group->global_context.expired())
- thread_group->global_context = global_context;
- }
-
- // Generate new span for thread manually here, because we can't depend
- // on OpenTelemetrySpanHolder due to link order issues.
- // FIXME why and how is this different from setupState()?
- thread_trace_context = query_context_->query_trace_context;
- if (thread_trace_context.trace_id != UUID())
- {
- thread_trace_context.span_id = thread_local_rng();
- }
-
- applyQuerySettings();
-}
-
-void CurrentThread::defaultThreadDeleter()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->detachQuery(true, true);
-}
-
-void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_)
-{
- assertState({ThreadState::DetachedFromQuery}, __PRETTY_FUNCTION__);
-
- /// Attach or init current thread to thread group and copy useful information from it
- thread_group = thread_group_;
-
- performance_counters.setParent(&thread_group->performance_counters);
- memory_tracker.setParent(&thread_group->memory_tracker);
-
- {
- std::lock_guard lock(thread_group->mutex);
-
- /// NOTE: thread may be attached multiple times if it is reused from a thread pool.
- thread_group->thread_ids.emplace_back(thread_id);
-
- logs_queue_ptr = thread_group->logs_queue_ptr;
- fatal_error_callback = thread_group->fatal_error_callback;
- query_context = thread_group->query_context;
-
- if (global_context.expired())
- global_context = thread_group->global_context;
- }
-
- if (auto query_context_ptr = query_context.lock())
- {
- applyQuerySettings();
-
- // Generate new span for thread manually here, because we can't depend
- // on OpenTelemetrySpanHolder due to link order issues.
- thread_trace_context = query_context_ptr->query_trace_context;
- if (thread_trace_context.trace_id != UUID())
- {
- thread_trace_context.span_id = thread_local_rng();
- }
- }
- else
- {
- thread_trace_context.trace_id = 0;
- }
-
- initPerformanceCounters();
-
- thread_state = ThreadState::AttachedToQuery;
-}
-
-void ThreadStatus::initializeQuery()
-{
- setupState(std::make_shared<ThreadGroupStatus>());
-
- /// No need to lock on mutex here
- thread_group->memory_tracker.setDescription("(for query)");
- thread_group->master_thread_id = thread_id;
-}
-
-void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool check_detached)
-{
- if (thread_state == ThreadState::AttachedToQuery)
- {
- if (check_detached)
- throw Exception("Can't attach query to the thread, it is already attached", ErrorCodes::LOGICAL_ERROR);
- return;
- }
-
- if (!thread_group_)
- throw Exception("Attempt to attach to nullptr thread group", ErrorCodes::LOGICAL_ERROR);
-
- setupState(thread_group_);
-}
-
-inline UInt64 time_in_nanoseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
-{
- return std::chrono::duration_cast<std::chrono::nanoseconds>(timepoint.time_since_epoch()).count();
-}
-
-inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
-{
- return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
-}
-
-
-inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
-{
- return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
-}
-
-void ThreadStatus::initPerformanceCounters()
-{
- performance_counters_finalized = false;
-
- /// Clear stats from previous query if a new query is started
- /// TODO: make separate query_thread_performance_counters and thread_performance_counters
- performance_counters.resetCounters();
- memory_tracker.resetCounters();
- memory_tracker.setDescription("(for thread)");
-
- // query_start_time_{microseconds, nanoseconds} are all constructed from the same time point
- // to ensure that they are all equal up to the precision of a second.
- const auto now = std::chrono::system_clock::now();
-
- query_start_time_nanoseconds = time_in_nanoseconds(now);
- query_start_time = time_in_seconds(now);
- query_start_time_microseconds = time_in_microseconds(now);
- ++queries_started;
-
- // query_start_time_nanoseconds cannot be used here since RUsageCounters expect CLOCK_MONOTONIC
- *last_rusage = RUsageCounters::current();
-
- if (auto query_context_ptr = query_context.lock())
- {
- const Settings & settings = query_context_ptr->getSettingsRef();
- if (settings.metrics_perf_events_enabled)
- {
- try
- {
- current_thread_counters.initializeProfileEvents(
- settings.metrics_perf_events_list);
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
- }
- }
-
- if (!taskstats)
- {
- try
- {
- taskstats = TasksStatsCounters::create(thread_id);
- }
- catch (...)
- {
- tryLogCurrentException(log);
- }
- }
- if (taskstats)
- taskstats->reset();
-}
-
-void ThreadStatus::finalizePerformanceCounters()
-{
- if (performance_counters_finalized)
- return;
-
- performance_counters_finalized = true;
- updatePerformanceCounters();
-
- // We want to close perf file descriptors if the perf events were enabled for
- // one query. What this code does in practice is less clear -- e.g., if I run
- // 'select 1 settings metrics_perf_events_enabled = 1', I still get
- // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*.
- bool close_perf_descriptors = true;
- if (auto query_context_ptr = query_context.lock())
- close_perf_descriptors = !query_context_ptr->getSettingsRef().metrics_perf_events_enabled;
-
- try
- {
- current_thread_counters.finalizeProfileEvents(performance_counters);
- if (close_perf_descriptors)
- current_thread_counters.closeEventDescriptors();
- }
- catch (...)
- {
- tryLogCurrentException(log);
- }
-
- try
- {
- auto global_context_ptr = global_context.lock();
- auto query_context_ptr = query_context.lock();
- if (global_context_ptr && query_context_ptr)
- {
- const auto & settings = query_context_ptr->getSettingsRef();
- if (settings.log_queries && settings.log_query_threads)
- {
- const auto now = std::chrono::system_clock::now();
- Int64 query_duration_ms = (time_in_microseconds(now) - query_start_time_microseconds) / 1000;
- if (query_duration_ms >= settings.log_queries_min_query_duration_ms.totalMilliseconds())
- {
- if (auto thread_log = global_context_ptr->getQueryThreadLog())
- logToQueryThreadLog(*thread_log, query_context_ptr->getCurrentDatabase(), now);
- }
- }
- }
- }
- catch (...)
- {
- tryLogCurrentException(log);
- }
-}
-
-void ThreadStatus::resetPerformanceCountersLastUsage()
-{
- *last_rusage = RUsageCounters::current();
- if (taskstats)
- taskstats->reset();
-}
-
-void ThreadStatus::initQueryProfiler()
-{
- if (!query_profiled_enabled)
- return;
-
- /// query profilers are useless without trace collector
- auto global_context_ptr = global_context.lock();
- if (!global_context_ptr || !global_context_ptr->hasTraceCollector())
- return;
-
- auto query_context_ptr = query_context.lock();
- assert(query_context_ptr);
- const auto & settings = query_context_ptr->getSettingsRef();
-
- try
- {
- if (settings.query_profiler_real_time_period_ns > 0)
- query_profiler_real = std::make_unique<QueryProfilerReal>(thread_id,
- /* period */ static_cast<UInt32>(settings.query_profiler_real_time_period_ns));
-
- if (settings.query_profiler_cpu_time_period_ns > 0)
- query_profiler_cpu = std::make_unique<QueryProfilerCPU>(thread_id,
- /* period */ static_cast<UInt32>(settings.query_profiler_cpu_time_period_ns));
- }
- catch (...)
- {
- /// QueryProfiler is optional.
- tryLogCurrentException("ThreadStatus", "Cannot initialize QueryProfiler");
- }
-}
-
-void ThreadStatus::finalizeQueryProfiler()
-{
- query_profiler_real.reset();
- query_profiler_cpu.reset();
-}
-
-void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
-{
- MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
-
- if (exit_if_already_detached && thread_state == ThreadState::DetachedFromQuery)
- {
- thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery;
- return;
- }
-
- assertState({ThreadState::AttachedToQuery}, __PRETTY_FUNCTION__);
-
- std::shared_ptr<OpenTelemetrySpanLog> opentelemetry_span_log;
- auto query_context_ptr = query_context.lock();
- if (thread_trace_context.trace_id != UUID() && query_context_ptr)
- {
- opentelemetry_span_log = query_context_ptr->getOpenTelemetrySpanLog();
- }
-
- if (opentelemetry_span_log)
- {
- // Log the current thread span.
- // We do this manually, because we can't use OpenTelemetrySpanHolder as a
- // ThreadStatus member, because of linking issues. This file is linked
- // separately, so we can reference OpenTelemetrySpanLog here, but if we had
- // the span holder as a field, we would have to reference it in the
- // destructor, which is in another library.
- OpenTelemetrySpanLogElement span;
-
- span.trace_id = thread_trace_context.trace_id;
- // All child span holders should be finished by the time we detach this
- // thread, so the current span id should be the thread span id. If not,
- // an assertion for a proper parent span in ~OpenTelemetrySpanHolder()
- // is going to fail, because we're going to reset it to zero later in
- // this function.
- span.span_id = thread_trace_context.span_id;
- assert(query_context_ptr);
- span.parent_span_id = query_context_ptr->query_trace_context.span_id;
- span.operation_name = getThreadName();
- span.start_time_us = query_start_time_microseconds;
- span.finish_time_us =
- std::chrono::duration_cast<std::chrono::microseconds>(
- std::chrono::system_clock::now().time_since_epoch()).count();
- span.attribute_names.push_back("clickhouse.thread_id");
- span.attribute_values.push_back(thread_id);
-
- opentelemetry_span_log->add(span);
- }
-
- finalizeQueryProfiler();
- finalizePerformanceCounters();
-
- /// Detach from thread group
- performance_counters.setParent(&ProfileEvents::global_counters);
- memory_tracker.reset();
-
- /// Must reset pointer to thread_group's memory_tracker, because it will be destroyed two lines below (will reset to its parent).
- memory_tracker.setParent(thread_group->memory_tracker.getParent());
-
- query_id.clear();
- query_context.reset();
- thread_trace_context.trace_id = 0;
- thread_trace_context.span_id = 0;
- thread_group.reset();
-
- thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery;
-
-#if defined(__linux__)
- if (os_thread_priority)
- {
- LOG_TRACE(log, "Resetting nice");
-
- if (0 != setpriority(PRIO_PROCESS, thread_id, 0))
- LOG_ERROR(log, "Cannot 'setpriority' back to zero: {}", errnoToString(ErrorCodes::CANNOT_SET_THREAD_PRIORITY, errno));
-
- os_thread_priority = 0;
- }
-#endif
-}
-
-void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point<std::chrono::system_clock> now)
-{
- QueryThreadLogElement elem;
-
- // construct current_time and current_time_microseconds using the same time point
- // so that the two times will always be equal up to a precision of a second.
- auto current_time = time_in_seconds(now);
- auto current_time_microseconds = time_in_microseconds(now);
-
- elem.event_time = current_time;
- elem.event_time_microseconds = current_time_microseconds;
- elem.query_start_time = query_start_time;
- elem.query_start_time_microseconds = query_start_time_microseconds;
- elem.query_duration_ms = (time_in_nanoseconds(now) - query_start_time_nanoseconds) / 1000000U;
-
- elem.read_rows = progress_in.read_rows.load(std::memory_order_relaxed);
- elem.read_bytes = progress_in.read_bytes.load(std::memory_order_relaxed);
-
- /// TODO: Use written_rows and written_bytes when run time progress is implemented
- elem.written_rows = progress_out.read_rows.load(std::memory_order_relaxed);
- elem.written_bytes = progress_out.read_bytes.load(std::memory_order_relaxed);
- elem.memory_usage = memory_tracker.get();
- elem.peak_memory_usage = memory_tracker.getPeak();
-
- elem.thread_name = getThreadName();
- elem.thread_id = thread_id;
-
- elem.current_database = current_database;
- if (thread_group)
- {
- {
- std::lock_guard lock(thread_group->mutex);
-
- elem.master_thread_id = thread_group->master_thread_id;
- elem.query = thread_group->query;
- elem.normalized_query_hash = thread_group->normalized_query_hash;
- }
- }
-
- auto query_context_ptr = query_context.lock();
- if (query_context_ptr)
- {
- elem.client_info = query_context_ptr->getClientInfo();
-
- if (query_context_ptr->getSettingsRef().log_profile_events != 0)
- {
- /// NOTE: Here we are in the same thread, so we can make memcpy()
- elem.profile_counters = std::make_shared<ProfileEvents::Counters>(performance_counters.getPartiallyAtomicSnapshot());
- }
- }
-
- thread_log.add(elem);
-}
-
-static String getCleanQueryAst(const ASTPtr q, ContextPtr context)
-{
- String res = serializeAST(*q, true);
- if (auto * masker = SensitiveDataMasker::getInstance())
- masker->wipeSensitiveData(res);
-
- res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
-
- return res;
-}
-
-void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo)
-{
- auto query_context_ptr = query_context.lock();
- if (!query_context_ptr)
- return;
- auto views_log = query_context_ptr->getQueryViewsLog();
- if (!views_log)
- return;
-
- QueryViewsLogElement element;
-
- element.event_time = time_in_seconds(vinfo.runtime_stats.event_time);
- element.event_time_microseconds = time_in_microseconds(vinfo.runtime_stats.event_time);
- element.view_duration_ms = vinfo.runtime_stats.elapsed_ms;
-
- element.initial_query_id = query_id;
- element.view_name = vinfo.table_id.getFullTableName();
- element.view_uuid = vinfo.table_id.uuid;
- element.view_type = vinfo.runtime_stats.type;
- if (vinfo.query)
- element.view_query = getCleanQueryAst(vinfo.query, query_context_ptr);
- element.view_target = vinfo.runtime_stats.target_name;
-
- auto events = std::make_shared<ProfileEvents::Counters>(performance_counters.getPartiallyAtomicSnapshot());
- element.read_rows = progress_in.read_rows.load(std::memory_order_relaxed);
- element.read_bytes = progress_in.read_bytes.load(std::memory_order_relaxed);
- element.written_rows = (*events)[ProfileEvents::InsertedRows];
- element.written_bytes = (*events)[ProfileEvents::InsertedBytes];
- element.peak_memory_usage = memory_tracker.getPeak() > 0 ? memory_tracker.getPeak() : 0;
- if (query_context_ptr->getSettingsRef().log_profile_events != 0)
- {
- element.profile_counters = events;
- }
-
- element.status = vinfo.runtime_stats.event_status;
- element.exception_code = 0;
- if (vinfo.exception)
- {
- element.exception_code = getExceptionErrorCode(vinfo.exception);
- element.exception = getExceptionMessage(vinfo.exception, false);
- if (query_context_ptr->getSettingsRef().calculate_text_stack_trace)
- element.stack_trace = getExceptionStackTraceString(vinfo.exception);
- }
-
- views_log->add(element);
-}
-
-void CurrentThread::initializeQuery()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->initializeQuery();
- current_thread->deleter = CurrentThread::defaultThreadDeleter;
-}
-
-void CurrentThread::attachTo(const ThreadGroupStatusPtr & thread_group)
-{
- if (unlikely(!current_thread))
- return;
- current_thread->attachQuery(thread_group, true);
- current_thread->deleter = CurrentThread::defaultThreadDeleter;
-}
-
-void CurrentThread::attachToIfDetached(const ThreadGroupStatusPtr & thread_group)
-{
- if (unlikely(!current_thread))
- return;
- current_thread->attachQuery(thread_group, false);
- current_thread->deleter = CurrentThread::defaultThreadDeleter;
-}
-
-void CurrentThread::attachQueryContext(ContextPtr query_context)
-{
- if (unlikely(!current_thread))
- return;
- current_thread->attachQueryContext(query_context);
-}
-
-void CurrentThread::finalizePerformanceCounters()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->finalizePerformanceCounters();
-}
-
-void CurrentThread::detachQuery()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->detachQuery(false);
-}
-
-void CurrentThread::detachQueryIfNotDetached()
-{
- if (unlikely(!current_thread))
- return;
- current_thread->detachQuery(true);
-}
-
-
-CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context)
-{
- CurrentThread::initializeQuery();
- CurrentThread::attachQueryContext(query_context);
- if (!query_context->hasQueryContext())
- query_context->makeQueryContext();
-}
-
-void CurrentThread::QueryScope::logPeakMemoryUsage()
-{
- auto group = CurrentThread::getGroup();
- if (!group)
- return;
-
- log_peak_memory_usage_in_destructor = false;
- group->memory_tracker.logPeakMemoryUsage();
-}
-
-CurrentThread::QueryScope::~QueryScope()
-{
- try
- {
- if (log_peak_memory_usage_in_destructor)
- logPeakMemoryUsage();
-
- CurrentThread::detachQueryIfNotDetached();
- }
- catch (...)
- {
- tryLogCurrentException("CurrentThread", __PRETTY_FUNCTION__);
- }
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h
deleted file mode 100644
index f8fe0f27f17..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Interpreters/join_common.h
+++ /dev/null
@@ -1,121 +0,0 @@
-#pragma once
-
-#include <Columns/ColumnsNumber.h>
-#include <Core/Block.h>
-#include <Interpreters/IJoin.h>
-#include <Interpreters/ActionsDAG.h>
-#include <Interpreters/ExpressionActions.h>
-
-namespace NDB
-{
-
-struct ColumnWithTypeAndName;
-class TableJoin;
-class IColumn;
-using ColumnRawPtrs = std::vector<const IColumn *>;
-using UInt8ColumnDataPtr = const ColumnUInt8::Container *;
-
-namespace JoinCommon
-{
-bool canBecomeNullable(const DataTypePtr & type);
-DataTypePtr convertTypeToNullable(const DataTypePtr & type);
-void convertColumnToNullable(ColumnWithTypeAndName & column);
-void convertColumnsToNullable(Block & block, size_t starting_pos = 0);
-void removeColumnNullability(ColumnWithTypeAndName & column);
-void changeColumnRepresentation(const ColumnPtr & src_column, ColumnPtr & dst_column);
-ColumnPtr emptyNotNullableClone(const ColumnPtr & column);
-ColumnPtr materializeColumn(const Block & block, const String & name);
-Columns materializeColumns(const Block & block, const Names & names);
-ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names);
-ColumnRawPtrs getRawPointers(const Columns & columns);
-void removeLowCardinalityInplace(Block & block);
-void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type = true);
-void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys);
-
-ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right);
-
-/// Throw an exception if join condition column is not UIint8
-void checkTypesOfMasks(const Block & block_left, const String & condition_name_left,
- const Block & block_right, const String & condition_name_right);
-
-/// Throw an exception if blocks have different types of key columns . Compare up to Nullability.
-void checkTypesOfKeys(const Block & block_left, const Names & key_names_left,
- const Block & block_right, const Names & key_names_right);
-
-/// Check both keys and conditions
-void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const String & condition_name_left,
- const Block & block_right, const Names & key_names_right, const String & condition_name_right);
-
-void createMissedColumns(Block & block);
-void joinTotals(Block left_totals, Block right_totals, const TableJoin & table_join, Block & out_block);
-
-void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count);
-
-bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type);
-
-/// Return mask array of type ColumnUInt8 for specified column. Source should have type UInt8 or Nullable(UInt8).
-ColumnPtr getColumnAsMask(const Block & block, const String & column_name);
-
-/// Split key and other columns by keys name list
-void splitAdditionalColumns(const Names & key_names, const Block & sample_block, Block & block_keys, Block & block_others);
-
-void changeLowCardinalityInplace(ColumnWithTypeAndName & column);
-
-}
-
-/// Creates result from right table data in RIGHT and FULL JOIN when keys are not present in left table.
-class NotJoinedBlocks final
-{
-public:
- using LeftToRightKeyRemap = std::unordered_map<String, String>;
-
- /// Returns non joined columns from right part of join
- class RightColumnsFiller
- {
- public:
- /// Create empty block for right part
- virtual Block getEmptyBlock() = 0;
- /// Fill columns from right part of join with not joined rows
- virtual size_t fillColumns(MutableColumns & columns_right) = 0;
-
- virtual ~RightColumnsFiller() = default;
- };
-
- NotJoinedBlocks(std::unique_ptr<RightColumnsFiller> filler_,
- const Block & result_sample_block_,
- size_t left_columns_count,
- const LeftToRightKeyRemap & left_to_right_key_remap);
-
- Block read();
-
-private:
- void extractColumnChanges(size_t right_pos, size_t result_pos);
- void correctLowcardAndNullability(Block & block);
- void addLeftColumns(Block & block, size_t rows_added) const;
- void addRightColumns(Block & block, MutableColumns & columns_right) const;
- void copySameKeys(Block & block) const;
-
- std::unique_ptr<RightColumnsFiller> filler;
-
- /// Right block saved in Join
- Block saved_block_sample;
-
- /// Output of join
- Block result_sample_block;
-
- /// Indices of columns in result_sample_block that should be generated
- std::vector<size_t> column_indices_left;
- /// Indices of columns that come from the right-side table: right_pos -> result_pos
- std::unordered_map<size_t, size_t> column_indices_right;
-
- std::unordered_map<size_t, size_t> same_result_keys;
-
- /// Which right columns (saved in parent) need Nullability/LowCardinality change
- /// before placing them in result block
- std::vector<std::pair<size_t, bool>> right_nullability_changes;
- std::vector<std::pair<size_t, bool>> right_lowcard_changes;
-
- void setRightIndex(size_t right_pos, size_t result_position);
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropFunctionQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropFunctionQuery.cpp
deleted file mode 100644
index 2244b5a3a08..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropFunctionQuery.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <Parsers/ASTDropFunctionQuery.h>
-#include <Common/quoteString.h>
-#include <IO/Operators.h>
-
-namespace NDB
-{
-
-ASTPtr ASTDropFunctionQuery::clone() const
-{
- return std::make_shared<ASTDropFunctionQuery>(*this);
-}
-
-void ASTDropFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
-{
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION " << (settings.hilite ? hilite_none : "");
- settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : "");
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropFunctionQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropFunctionQuery.h
deleted file mode 100644
index a6b9f4b7690..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTDropFunctionQuery.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include "IAST.h"
-
-namespace NDB
-{
-
-class ASTDropFunctionQuery : public IAST
-{
-public:
- String function_name;
-
- String getID(char) const override { return "DropFunctionQuery"; }
-
- ASTPtr clone() const override;
-
- void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h
deleted file mode 100644
index 91a05a70173..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowAccessQuery.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace NDB
-{
-
-struct ASTShowAccessQueryNames
-{
- static constexpr auto ID = "ShowAccessQuery";
- static constexpr auto Query = "SHOW ACCESS";
-};
-
-using ASTShowAccessQuery = ASTQueryWithOutputImpl<ASTShowAccessQueryNames>;
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h
deleted file mode 100644
index 6c08aa9006c..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ASTShowProcesslistQuery.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace NDB
-{
-
-struct ASTShowProcesslistIDAndQueryNames
-{
- static constexpr auto ID = "ShowProcesslistQuery";
- static constexpr auto Query = "SHOW PROCESSLIST";
-};
-
-using ASTShowProcesslistQuery = ASTQueryWithOutputImpl<ASTShowProcesslistIDAndQueryNames>;
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropFunctionQuery.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropFunctionQuery.cpp
deleted file mode 100644
index f0baf9f1fb8..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropFunctionQuery.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <Parsers/ASTDropFunctionQuery.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ParserDropFunctionQuery.h>
-
-namespace NDB
-{
-
-bool ParserDropFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
-{
- ParserKeyword s_drop("DROP");
- ParserKeyword s_function("FUNCTION");
- ParserIdentifier function_name_p;
-
- ASTPtr function_name;
-
- if (!s_drop.ignore(pos, expected))
- return false;
-
- if (!s_function.ignore(pos, expected))
- return false;
-
- if (!function_name_p.parse(pos, function_name, expected))
- return false;
-
- auto drop_function_query = std::make_shared<ASTDropFunctionQuery>();
- node = drop_function_query;
-
- drop_function_query->function_name = function_name->as<ASTIdentifier &>().name();
-
- return true;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropFunctionQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropFunctionQuery.h
deleted file mode 100644
index d8f06b81f25..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserDropFunctionQuery.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-#include "IParserBase.h"
-
-namespace NDB
-{
-/// DROP FUNCTION function1
-class ParserDropFunctionQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "DROP FUNCTION query"; }
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h
deleted file mode 100644
index 24a3ac5514d..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowAccessQuery.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTShowAccessQuery.h>
-
-
-namespace NDB
-{
-
-/** Query SHOW ACCESS
- */
-class ParserShowAccessQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SHOW ACCESS query"; }
-
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
- {
- auto query = std::make_shared<ASTShowAccessQuery>();
-
- if (!ParserKeyword("SHOW ACCESS").ignore(pos, expected))
- return false;
-
- node = query;
-
- return true;
- }
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h
deleted file mode 100644
index deb5439bf89..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Parsers/ParserShowProcesslistQuery.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-#include <Parsers/IParserBase.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ASTShowProcesslistQuery.h>
-
-
-namespace NDB
-{
-
-/** Query SHOW PROCESSLIST
- */
-class ParserShowProcesslistQuery : public IParserBase
-{
-protected:
- const char * getName() const override { return "SHOW PROCESSLIST query"; }
-
- bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
- {
- auto query = std::make_shared<ASTShowProcesslistQuery>();
-
- if (!ParserKeyword("SHOW PROCESSLIST").ignore(pos, expected))
- return false;
-
- node = query;
-
- return true;
- }
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Executors/OutputStreamToOutputFormat.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Executors/OutputStreamToOutputFormat.h
deleted file mode 100644
index 89c0844d979..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Executors/OutputStreamToOutputFormat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#pragma once
-#include <DataStreams/IBlockOutputStream.h>
-
-namespace NDB
-{
-
-
-class IOutputFormat;
-
-using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
-
-/// Wrapper. Implements IBlockOutputStream interface using IOutputFormat object.
-class OutputStreamToOutputFormat : public IBlockOutputStream
-{
-public:
- explicit OutputStreamToOutputFormat(OutputFormatPtr output_format_) : output_format(std::move(output_format_)) {}
-
- Block getHeader() const override;
-
- void write(const Block & block) override;
-
- void writePrefix() override;
- void writeSuffix() override;
-
- void flush() override;
-
- void setRowsBeforeLimit(size_t rows_before_limit) override;
- void setTotals(const Block & totals) override;
- void setExtremes(const Block & extremes) override;
-
- void onProgress(const Progress & progress) override;
-
- std::string getContentType() const override;
-
-private:
- OutputFormatPtr output_format;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
deleted file mode 100644
index 08b653bc543..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-#include <Processors/Formats/Impl/ParallelFormattingOutputFormat.h>
-
-#include <Common/setThreadName.h>
-
-namespace NDB
-{
- void ParallelFormattingOutputFormat::finalize()
- {
- need_flush = true;
- IOutputFormat::finalized = true;
- /// Don't throw any background_exception here, because we want to finalize the execution.
- /// Exception will be checked after main thread is finished.
- addChunk(Chunk{}, ProcessingUnitType::FINALIZE, /*can_throw_exception*/ false);
- collector_finished.wait();
-
- {
- std::lock_guard<std::mutex> lock(collector_thread_mutex);
- if (collector_thread.joinable())
- collector_thread.join();
- }
-
- {
- std::unique_lock<std::mutex> lock(mutex);
-
- if (background_exception)
- std::rethrow_exception(background_exception);
- }
- }
-
- void ParallelFormattingOutputFormat::addChunk(Chunk chunk, ProcessingUnitType type, bool can_throw_exception)
- {
- {
- std::unique_lock<std::mutex> lock(mutex);
- if (background_exception && can_throw_exception)
- std::rethrow_exception(background_exception);
- }
-
- const auto current_unit_number = writer_unit_number % processing_units.size();
- auto & unit = processing_units[current_unit_number];
-
- {
- std::unique_lock<std::mutex> lock(mutex);
- writer_condvar.wait(lock,
- [&]{ return unit.status == READY_TO_INSERT || emergency_stop; });
- }
-
- if (emergency_stop)
- return;
-
- assert(unit.status == READY_TO_INSERT);
- unit.chunk = std::move(chunk);
- /// Resize memory without deallocation.
- unit.segment.resize(0);
- unit.status = READY_TO_FORMAT;
- unit.type = type;
-
- scheduleFormatterThreadForUnitWithNumber(current_unit_number);
-
- ++writer_unit_number;
- }
-
-
- void ParallelFormattingOutputFormat::finishAndWait()
- {
- emergency_stop = true;
-
- {
- std::unique_lock<std::mutex> lock(mutex);
- collector_condvar.notify_all();
- writer_condvar.notify_all();
- }
-
- {
- std::lock_guard<std::mutex> lock(collector_thread_mutex);
- if (collector_thread.joinable())
- collector_thread.join();
- }
-
- try
- {
- pool.wait();
- }
- catch (...)
- {
- tryLogCurrentException(__PRETTY_FUNCTION__);
- }
- }
-
-
- void ParallelFormattingOutputFormat::collectorThreadFunction(const ThreadGroupStatusPtr & thread_group)
- {
- setThreadName("Collector");
- if (thread_group)
- CurrentThread::attachToIfDetached(thread_group);
-
- try
- {
- while (!emergency_stop)
- {
- const auto current_unit_number = collector_unit_number % processing_units.size();
- auto & unit = processing_units[current_unit_number];
-
- {
- std::unique_lock<std::mutex> lock(mutex);
- collector_condvar.wait(lock,
- [&]{ return unit.status == READY_TO_READ || emergency_stop; });
- }
-
- if (emergency_stop)
- break;
-
- assert(unit.status == READY_TO_READ);
-
- /// Use this copy to after notification to stop the execution.
- auto copy_if_unit_type = unit.type;
-
- /// Do main work here.
- out.write(unit.segment.data(), unit.actual_memory_size);
-
- if (need_flush.exchange(false) || auto_flush)
- IOutputFormat::flush();
-
- ++collector_unit_number;
-
- {
- /// Notify other threads.
- std::lock_guard<std::mutex> lock(mutex);
- unit.status = READY_TO_INSERT;
- writer_condvar.notify_all();
- }
- /// We can exit only after writing last piece of to out buffer.
- if (copy_if_unit_type == ProcessingUnitType::FINALIZE)
- {
- break;
- }
- }
- collector_finished.set();
- }
- catch (...)
- {
- collector_finished.set();
- onBackgroundException();
- }
- }
-
-
- void ParallelFormattingOutputFormat::formatterThreadFunction(size_t current_unit_number, const ThreadGroupStatusPtr & thread_group)
- {
- setThreadName("Formatter");
- if (thread_group)
- CurrentThread::attachToIfDetached(thread_group);
-
- try
- {
- auto & unit = processing_units[current_unit_number];
- assert(unit.status = READY_TO_FORMAT);
-
- /// We want to preallocate memory buffer (increase capacity)
- /// and put the pointer at the beginning of the buffer
- unit.segment.resize(DBMS_DEFAULT_BUFFER_SIZE);
-
- unit.actual_memory_size = 0;
- BufferWithOutsideMemory<WriteBuffer> out_buffer(unit.segment);
-
- /// The second invocation won't release memory, only set size equals to 0.
- unit.segment.resize(0);
-
- auto formatter = internal_formatter_creator(out_buffer);
-
- switch (unit.type)
- {
- case ProcessingUnitType::START :
- {
- formatter->doWritePrefix();
- break;
- }
- case ProcessingUnitType::PLAIN :
- {
- formatter->consume(std::move(unit.chunk));
- break;
- }
- case ProcessingUnitType::TOTALS :
- {
- formatter->consumeTotals(std::move(unit.chunk));
- break;
- }
- case ProcessingUnitType::EXTREMES :
- {
- formatter->consumeExtremes(std::move(unit.chunk));
- break;
- }
- case ProcessingUnitType::FINALIZE :
- {
- formatter->doWriteSuffix();
- break;
- }
- }
- /// Flush all the data to handmade buffer.
- formatter->flush();
- unit.actual_memory_size = out_buffer.getActualSize();
-
- {
- std::lock_guard<std::mutex> lock(mutex);
- unit.status = READY_TO_READ;
- collector_condvar.notify_all();
- }
- }
- catch (...)
- {
- onBackgroundException();
- }
- }
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
deleted file mode 100644
index a8ab5024093..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
+++ /dev/null
@@ -1,246 +0,0 @@
-#include <Processors/Formats/Impl/ParallelParsingInputFormat.h>
-#include <IO/ReadHelpers.h>
-#include <Common/CurrentThread.h>
-#include <Common/setThreadName.h>
-#include <common/scope_guard_safe.h>
-
-namespace NDB
-{
-
-void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr thread_group)
-{
- SCOPE_EXIT_SAFE(
- if (thread_group)
- CurrentThread::detachQueryIfNotDetached();
- );
- if (thread_group)
- CurrentThread::attachTo(thread_group);
-
- setThreadName("Segmentator");
- try
- {
- while (!parsing_finished)
- {
- const auto segmentator_unit_number = segmentator_ticket_number % processing_units.size();
- auto & unit = processing_units[segmentator_unit_number];
-
- {
- std::unique_lock<std::mutex> lock(mutex);
- segmentator_condvar.wait(lock,
- [&]{ return unit.status == READY_TO_INSERT || parsing_finished; });
- }
-
- if (parsing_finished)
- break;
-
- assert(unit.status == READY_TO_INSERT);
-
- // Segmentating the original input.
- unit.segment.resize(0);
-
- auto [have_more_data, currently_read_rows] = file_segmentation_engine(in, unit.segment, min_chunk_bytes);
-
- unit.offset = successfully_read_rows_count;
- successfully_read_rows_count += currently_read_rows;
-
- unit.is_last = !have_more_data;
- unit.status = READY_TO_PARSE;
- scheduleParserThreadForUnitWithNumber(segmentator_ticket_number);
- ++segmentator_ticket_number;
-
- if (!have_more_data)
- break;
- }
- }
- catch (...)
- {
- onBackgroundException(successfully_read_rows_count);
- }
-}
-
-void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number)
-{
- SCOPE_EXIT_SAFE(
- if (thread_group)
- CurrentThread::detachQueryIfNotDetached();
- );
- if (thread_group)
- CurrentThread::attachTo(thread_group);
-
- const auto parser_unit_number = current_ticket_number % processing_units.size();
- auto & unit = processing_units[parser_unit_number];
-
- try
- {
- setThreadName("ChunkParser");
-
- /*
- * This is kind of suspicious -- the input_process_creator contract with
- * respect to multithreaded use is not clear, but we hope that it is
- * just a 'normal' factory class that doesn't have any state, and so we
- * can use it from multiple threads simultaneously.
- */
- ReadBuffer read_buffer(unit.segment.data(), unit.segment.size(), 0);
-
- InputFormatPtr input_format = internal_parser_creator(read_buffer);
- input_format->setCurrentUnitNumber(current_ticket_number);
- InternalParser parser(input_format);
-
- unit.chunk_ext.chunk.clear();
- unit.chunk_ext.block_missing_values.clear();
-
- /// Propagate column_mapping to other parsers.
- /// Note: column_mapping is used only for *WithNames types
- if (current_ticket_number != 0)
- input_format->setColumnMapping(column_mapping);
-
- // We don't know how many blocks will be. So we have to read them all
- // until an empty block occurred.
- Chunk chunk;
- while (!parsing_finished && (chunk = parser.getChunk()) != Chunk())
- {
- /// Variable chunk is moved, but it is not really used in the next iteration.
- /// NOLINTNEXTLINE(bugprone-use-after-move)
- unit.chunk_ext.chunk.emplace_back(std::move(chunk));
- unit.chunk_ext.block_missing_values.emplace_back(parser.getMissingValues());
- }
-
- /// Extract column_mapping from first parser to propagate it to others
- if (current_ticket_number == 0)
- {
- column_mapping = input_format->getColumnMapping();
- column_mapping->is_set = true;
- first_parser_finished.set();
- }
-
- // We suppose we will get at least some blocks for a non-empty buffer,
- // except at the end of file. Also see a matching assert in readImpl().
- assert(unit.is_last || !unit.chunk_ext.chunk.empty() || parsing_finished);
-
- std::lock_guard<std::mutex> lock(mutex);
- unit.status = READY_TO_READ;
- reader_condvar.notify_all();
- }
- catch (...)
- {
- onBackgroundException(unit.offset);
- }
-}
-
-
-void ParallelParsingInputFormat::onBackgroundException(size_t offset)
-{
- std::unique_lock<std::mutex> lock(mutex);
- if (!background_exception)
- {
- background_exception = std::current_exception();
- if (ParsingException * e = exception_cast<ParsingException *>(background_exception))
- if (e->getLineNumber() != -1)
- e->setLineNumber(e->getLineNumber() + offset);
- }
- tryLogCurrentException(__PRETTY_FUNCTION__);
- parsing_finished = true;
- first_parser_finished.set();
- reader_condvar.notify_all();
- segmentator_condvar.notify_all();
-}
-
-Chunk ParallelParsingInputFormat::generate()
-{
- /// Delayed launching of segmentator thread
- if (unlikely(!parsing_started.exchange(true)))
- {
- segmentator_thread = ThreadFromGlobalPool(
- &ParallelParsingInputFormat::segmentatorThreadFunction, this, CurrentThread::getGroup());
- }
-
- if (isCancelled() || parsing_finished)
- {
- /**
- * Check for background exception and rethrow it before we return.
- */
- std::unique_lock<std::mutex> lock(mutex);
- if (background_exception)
- {
- lock.unlock();
- onCancel();
- std::rethrow_exception(background_exception);
- }
-
- return {};
- }
-
- const auto inserter_unit_number = reader_ticket_number % processing_units.size();
- auto & unit = processing_units[inserter_unit_number];
-
- if (!next_block_in_current_unit.has_value())
- {
- // We have read out all the Blocks from the previous Processing Unit,
- // wait for the current one to become ready.
- std::unique_lock<std::mutex> lock(mutex);
- reader_condvar.wait(lock, [&](){ return unit.status == READY_TO_READ || parsing_finished; });
-
- if (parsing_finished)
- {
- /**
- * Check for background exception and rethrow it before we return.
- */
- if (background_exception)
- {
- lock.unlock();
- cancel();
- std::rethrow_exception(background_exception);
- }
-
- return {};
- }
-
- assert(unit.status == READY_TO_READ);
- next_block_in_current_unit = 0;
- }
-
- if (unit.chunk_ext.chunk.empty())
- {
- /*
- * Can we get zero blocks for an entire segment, when the format parser
- * skips it entire content and does not create any blocks? Probably not,
- * but if we ever do, we should add a loop around the above if, to skip
- * these. Also see a matching assert in the parser thread.
- */
- assert(unit.is_last);
- parsing_finished = true;
- return {};
- }
-
- assert(next_block_in_current_unit.value() < unit.chunk_ext.chunk.size());
-
- Chunk res = std::move(unit.chunk_ext.chunk.at(*next_block_in_current_unit));
- last_block_missing_values = std::move(unit.chunk_ext.block_missing_values[*next_block_in_current_unit]);
-
- next_block_in_current_unit.value() += 1;
-
- if (*next_block_in_current_unit == unit.chunk_ext.chunk.size())
- {
- // parsing_finished reading this Processing Unit, move to the next one.
- next_block_in_current_unit.reset();
- ++reader_ticket_number;
-
- if (unit.is_last)
- {
- // It it was the last unit, we're parsing_finished.
- parsing_finished = true;
- }
- else
- {
- // Pass the unit back to the segmentator.
- std::unique_lock<std::mutex> lock(mutex);
- unit.status = READY_TO_INSERT;
- segmentator_condvar.notify_all();
- }
- }
-
- return res;
-}
-
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp
deleted file mode 100644
index ee43ef68e3e..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Pipe.cpp
+++ /dev/null
@@ -1,874 +0,0 @@
-#include <Processors/Pipe.h>
-#include <IO/WriteHelpers.h>
-#include <Processors/Sources/SourceFromInputStream.h>
-#include <Processors/ResizeProcessor.h>
-#include <Processors/ConcatProcessor.h>
-#include <Processors/LimitTransform.h>
-#include <Processors/Sinks/NullSink.h>
-#include <Processors/Sinks/EmptySink.h>
-#include <Processors/Transforms/ExtremesTransform.h>
-#include <Processors/Formats/IOutputFormat.h>
-#include <Processors/Sources/NullSource.h>
-#include <Columns/ColumnConst.h>
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-static void checkSource(const IProcessor & source)
-{
- if (!source.getInputs().empty())
- throw Exception("Source for pipe shouldn't have any input, but " + source.getName() + " has " +
- toString(source.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
-
- if (source.getOutputs().empty())
- throw Exception("Source for pipe should have single output, but it doesn't have any",
- ErrorCodes::LOGICAL_ERROR);
-
- if (source.getOutputs().size() > 1)
- throw Exception("Source for pipe should have single output, but " + source.getName() + " has " +
- toString(source.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
-}
-
-static OutputPort * uniteExtremes(const OutputPortRawPtrs & ports, const Block & header, Processors & processors)
-{
- if (ports.empty())
- return nullptr;
-
- if (ports.size() == 1)
- return ports.front();
-
- /// Here we calculate extremes for extremes in case we unite several pipelines.
- /// Example: select number from numbers(2) union all select number from numbers(3)
-
- /// ->> Resize -> Extremes --(output port)----> Empty
- /// --(extremes port)--> ...
-
- auto resize = std::make_shared<ResizeProcessor>(header, ports.size(), 1);
- auto extremes = std::make_shared<ExtremesTransform>(header);
- auto sink = std::make_shared<EmptySink>(header);
-
- auto * extremes_port = &extremes->getExtremesPort();
-
- auto in = resize->getInputs().begin();
- for (const auto & port : ports)
- connect(*port, *(in++));
-
- connect(resize->getOutputs().front(), extremes->getInputPort());
- connect(extremes->getOutputPort(), sink->getPort());
-
- processors.emplace_back(std::move(resize));
- processors.emplace_back(std::move(extremes));
- processors.emplace_back(std::move(sink));
-
- return extremes_port;
-}
-
-static OutputPort * uniteTotals(const OutputPortRawPtrs & ports, const Block & header, Processors & processors)
-{
- if (ports.empty())
- return nullptr;
-
- if (ports.size() == 1)
- return ports.front();
-
- /// Calculate totals from several streams.
- /// Take totals from first sources which has any, skip others.
-
- /// ->> Concat -> Limit
-
- auto concat = std::make_shared<ConcatProcessor>(header, ports.size());
- auto limit = std::make_shared<LimitTransform>(header, 1, 0);
-
- auto * totals_port = &limit->getOutputPort();
-
- auto in = concat->getInputs().begin();
- for (const auto & port : ports)
- connect(*port, *(in++));
-
- connect(concat->getOutputs().front(), limit->getInputPort());
-
- processors.emplace_back(std::move(concat));
- processors.emplace_back(std::move(limit));
-
- return totals_port;
-}
-
-Pipe::Holder & Pipe::Holder::operator=(Holder && rhs)
-{
- table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end());
- storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end());
- interpreter_context.insert(interpreter_context.end(),
- rhs.interpreter_context.begin(), rhs.interpreter_context.end());
- for (auto & plan : rhs.query_plans)
- query_plans.emplace_back(std::move(plan));
-
- query_id_holder = std::move(rhs.query_id_holder);
-
- return *this;
-}
-
-Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes)
-{
- if (!source->getInputs().empty())
- throw Exception("Source for pipe shouldn't have any input, but " + source->getName() + " has " +
- toString(source->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
-
- if (!output)
- throw Exception("Cannot create Pipe from source because specified output port is nullptr",
- ErrorCodes::LOGICAL_ERROR);
-
- if (output == totals || output == extremes || (totals && totals == extremes))
- throw Exception("Cannot create Pipe from source because some of specified ports are the same",
- ErrorCodes::LOGICAL_ERROR);
-
- header = output->getHeader();
-
- /// Check that ports belong to source and all ports from source were specified.
- {
- auto & outputs = source->getOutputs();
- size_t num_specified_ports = 0;
-
- auto check_port_from_source = [&](OutputPort * port, std::string name)
- {
- if (!port)
- return;
-
- assertBlocksHaveEqualStructure(header, port->getHeader(), name);
-
- ++num_specified_ports;
-
- auto it = std::find_if(outputs.begin(), outputs.end(), [port](const OutputPort & p) { return &p == port; });
- if (it == outputs.end())
- throw Exception("Cannot create Pipe because specified " + name + " port does not belong to source",
- ErrorCodes::LOGICAL_ERROR);
- };
-
- check_port_from_source(output, "output");
- check_port_from_source(totals, "totals");
- check_port_from_source(extremes, "extremes");
-
- if (num_specified_ports != outputs.size())
- throw Exception("Cannot create Pipe from source because it has " + std::to_string(outputs.size()) +
- " output ports, but " + std::to_string(num_specified_ports) + " were specified",
- ErrorCodes::LOGICAL_ERROR);
- }
-
- totals_port = totals;
- extremes_port = extremes;
- output_ports.push_back(output);
- processors.emplace_back(std::move(source));
- max_parallel_streams = 1;
-}
-
-Pipe::Pipe(ProcessorPtr source)
-{
- if (auto * source_from_input_stream = typeid_cast<SourceFromInputStream *>(source.get()))
- {
- /// Special case for SourceFromInputStream. Will remove it later.
- totals_port = source_from_input_stream->getTotalsPort();
- extremes_port = source_from_input_stream->getExtremesPort();
- }
- else if (source->getOutputs().size() != 1)
- checkSource(*source);
-
- if (collected_processors)
- collected_processors->emplace_back(source);
-
- output_ports.push_back(&source->getOutputs().front());
- header = output_ports.front()->getHeader();
- processors.emplace_back(std::move(source));
- max_parallel_streams = 1;
-}
-
-Pipe::Pipe(Processors processors_) : processors(std::move(processors_))
-{
- /// Create hash table with processors.
- std::unordered_set<const IProcessor *> set;
- for (const auto & processor : processors)
- set.emplace(processor.get());
-
- for (auto & processor : processors)
- {
- for (const auto & port : processor->getInputs())
- {
- if (!port.isConnected())
- throw Exception("Cannot create Pipe because processor " + processor->getName() +
- " has not connected input port", ErrorCodes::LOGICAL_ERROR);
-
- const auto * connected_processor = &port.getOutputPort().getProcessor();
- if (set.count(connected_processor) == 0)
- throw Exception("Cannot create Pipe because processor " + processor->getName() +
- " has input port which is connected with unknown processor " +
- connected_processor->getName(), ErrorCodes::LOGICAL_ERROR);
- }
-
- for (auto & port : processor->getOutputs())
- {
- if (!port.isConnected())
- {
- output_ports.push_back(&port);
- continue;
- }
-
- const auto * connected_processor = &port.getInputPort().getProcessor();
- if (set.count(connected_processor) == 0)
- throw Exception("Cannot create Pipe because processor " + processor->getName() +
- " has output port which is connected with unknown processor " +
- connected_processor->getName(), ErrorCodes::LOGICAL_ERROR);
- }
- }
-
- if (output_ports.empty())
- throw Exception("Cannot create Pipe because processors don't have any not-connected output ports",
- ErrorCodes::LOGICAL_ERROR);
-
- header = output_ports.front()->getHeader();
- for (size_t i = 1; i < output_ports.size(); ++i)
- assertBlocksHaveEqualStructure(header, output_ports[i]->getHeader(), "Pipe");
-
- max_parallel_streams = output_ports.size();
-
- if (collected_processors)
- for (const auto & processor : processors)
- collected_processors->emplace_back(processor);
-}
-
-static Pipes removeEmptyPipes(Pipes pipes)
-{
- Pipes res;
- res.reserve(pipes.size());
-
- for (auto & pipe : pipes)
- {
- if (!pipe.empty())
- res.emplace_back(std::move(pipe));
- }
-
- return res;
-}
-
-/// Calculate common header for pipes.
-/// This function is needed only to remove ColumnConst from common header in case if some columns are const, and some not.
-/// E.g. if the first header is `x, const y, const z` and the second is `const x, y, const z`, the common header will be `x, y, const z`.
-static Block getCommonHeader(const Pipes & pipes)
-{
- Block res;
-
- for (const auto & pipe : pipes)
- {
- if (const auto & header = pipe.getHeader())
- {
- res = header;
- break;
- }
- }
-
- for (const auto & pipe : pipes)
- {
- const auto & header = pipe.getHeader();
- for (size_t i = 0; i < res.columns(); ++i)
- {
- /// We do not check that headers are compatible here. Will do it later.
-
- if (i >= header.columns())
- break;
-
- auto & common = res.getByPosition(i).column;
- const auto & cur = header.getByPosition(i).column;
-
- /// Only remove const from common header if it is not const for current pipe.
- if (cur && common && !isColumnConst(*cur))
- {
- if (const auto * column_const = typeid_cast<const ColumnConst *>(common.get()))
- common = column_const->getDataColumnPtr();
- }
- }
- }
-
- return res;
-}
-
-Pipe Pipe::unitePipes(Pipes pipes)
-{
- return Pipe::unitePipes(std::move(pipes), nullptr, false);
-}
-
-Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow_empty_header)
-{
- Pipe res;
-
- for (auto & pipe : pipes)
- res.holder = std::move(pipe.holder); /// see move assignment for Pipe::Holder.
-
- pipes = removeEmptyPipes(std::move(pipes));
-
- if (pipes.empty())
- return res;
-
- if (pipes.size() == 1)
- {
- pipes[0].holder = std::move(res.holder);
- return std::move(pipes[0]);
- }
-
- OutputPortRawPtrs totals;
- OutputPortRawPtrs extremes;
- res.collected_processors = collected_processors;
- res.header = getCommonHeader(pipes);
-
- for (auto & pipe : pipes)
- {
- if (!allow_empty_header || pipe.header)
- assertCompatibleHeader(pipe.header, res.header, "Pipe::unitePipes");
-
- res.processors.insert(res.processors.end(), pipe.processors.begin(), pipe.processors.end());
- res.output_ports.insert(res.output_ports.end(), pipe.output_ports.begin(), pipe.output_ports.end());
-
- res.max_parallel_streams += pipe.max_parallel_streams;
-
- if (pipe.totals_port)
- totals.emplace_back(pipe.totals_port);
-
- if (pipe.extremes_port)
- extremes.emplace_back(pipe.extremes_port);
- }
-
- size_t num_processors = res.processors.size();
-
- res.totals_port = uniteTotals(totals, res.header, res.processors);
- res.extremes_port = uniteExtremes(extremes, res.header, res.processors);
-
- if (res.collected_processors)
- {
- for (; num_processors < res.processors.size(); ++num_processors)
- res.collected_processors->emplace_back(res.processors[num_processors]);
- }
-
- return res;
-}
-
-void Pipe::addSource(ProcessorPtr source)
-{
- checkSource(*source);
- const auto & source_header = source->getOutputs().front().getHeader();
-
- if (output_ports.empty())
- header = source_header;
- else
- assertBlocksHaveEqualStructure(header, source_header, "Pipes");
-
- if (collected_processors)
- collected_processors->emplace_back(source);
-
- output_ports.push_back(&source->getOutputs().front());
- processors.emplace_back(std::move(source));
-
- max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
-}
-
-void Pipe::addTotalsSource(ProcessorPtr source)
-{
- if (output_ports.empty())
- throw Exception("Cannot add totals source to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- if (totals_port)
- throw Exception("Totals source was already added to Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- checkSource(*source);
- const auto & source_header = output_ports.front()->getHeader();
-
- assertBlocksHaveEqualStructure(header, source_header, "Pipes");
-
- if (collected_processors)
- collected_processors->emplace_back(source);
-
- totals_port = &source->getOutputs().front();
- processors.emplace_back(std::move(source));
-}
-
-void Pipe::addExtremesSource(ProcessorPtr source)
-{
- if (output_ports.empty())
- throw Exception("Cannot add extremes source to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- if (extremes_port)
- throw Exception("Extremes source was already added to Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- checkSource(*source);
- const auto & source_header = output_ports.front()->getHeader();
-
- assertBlocksHaveEqualStructure(header, source_header, "Pipes");
-
- if (collected_processors)
- collected_processors->emplace_back(source);
-
- extremes_port = &source->getOutputs().front();
- processors.emplace_back(std::move(source));
-}
-
-static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors)
-{
- if (port == nullptr)
- return;
-
- auto null_sink = std::make_shared<NullSink>(port->getHeader());
- connect(*port, null_sink->getPort());
-
- if (collected_processors)
- collected_processors->emplace_back(null_sink);
-
- processors.emplace_back(std::move(null_sink));
- port = nullptr;
-}
-
-void Pipe::dropTotals()
-{
- dropPort(totals_port, processors, collected_processors);
-}
-
-void Pipe::dropExtremes()
-{
- dropPort(extremes_port, processors, collected_processors);
-}
-
-void Pipe::addTransform(ProcessorPtr transform)
-{
- addTransform(std::move(transform), static_cast<OutputPort *>(nullptr), static_cast<OutputPort *>(nullptr));
-}
-
-void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes)
-{
- if (output_ports.empty())
- throw Exception("Cannot add transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- auto & inputs = transform->getInputs();
- if (inputs.size() != output_ports.size())
- throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
- "Processor has " + std::to_string(inputs.size()) + " input ports, "
- "but " + std::to_string(output_ports.size()) + " expected", ErrorCodes::LOGICAL_ERROR);
-
- if (totals && totals_port)
- throw Exception("Cannot add transform with totals to Pipe because it already has totals.",
- ErrorCodes::LOGICAL_ERROR);
-
- if (extremes && extremes_port)
- throw Exception("Cannot add transform with extremes to Pipe because it already has extremes.",
- ErrorCodes::LOGICAL_ERROR);
-
- if (totals)
- totals_port = totals;
- if (extremes)
- extremes_port = extremes;
-
- size_t next_output = 0;
- for (auto & input : inputs)
- {
- connect(*output_ports[next_output], input);
- ++next_output;
- }
-
- auto & outputs = transform->getOutputs();
-
- output_ports.clear();
- output_ports.reserve(outputs.size());
-
- bool found_totals = false;
- bool found_extremes = false;
-
- for (auto & output : outputs)
- {
- if (&output == totals)
- found_totals = true;
- else if (&output == extremes)
- found_extremes = true;
- else
- output_ports.emplace_back(&output);
- }
-
- if (totals && !found_totals)
- throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
- "specified totals port does not belong to it", ErrorCodes::LOGICAL_ERROR);
-
- if (extremes && !found_extremes)
- throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
- "specified extremes port does not belong to it", ErrorCodes::LOGICAL_ERROR);
-
- if (output_ports.empty())
- throw Exception("Cannot add transform " + transform->getName() + " to Pipes because it has no outputs",
- ErrorCodes::LOGICAL_ERROR);
-
- header = output_ports.front()->getHeader();
- for (size_t i = 1; i < output_ports.size(); ++i)
- assertBlocksHaveEqualStructure(header, output_ports[i]->getHeader(), "Pipes");
-
- // Temporarily skip this check. TotaslHavingTransform may return finalized totals but not finalized data.
- // if (totals_port)
- // assertBlocksHaveEqualStructure(header, totals_port->getHeader(), "Pipes");
-
- if (extremes_port)
- assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes");
-
- if (collected_processors)
- collected_processors->emplace_back(transform);
-
- processors.emplace_back(std::move(transform));
-
- max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
-}
-
-void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes)
-{
- if (output_ports.empty())
- throw Exception("Cannot add transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- auto & inputs = transform->getInputs();
- size_t expected_inputs = output_ports.size() + (totals ? 1 : 0) + (extremes ? 1 : 0);
- if (inputs.size() != expected_inputs)
- throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
- "Processor has " + std::to_string(inputs.size()) + " input ports, "
- "but " + std::to_string(expected_inputs) + " expected", ErrorCodes::LOGICAL_ERROR);
-
- if (totals && !totals_port)
- throw Exception("Cannot add transform consuming totals to Pipe because Pipe does not have totals.",
- ErrorCodes::LOGICAL_ERROR);
-
- if (extremes && !extremes_port)
- throw Exception("Cannot add transform consuming extremes to Pipe because it already has extremes.",
- ErrorCodes::LOGICAL_ERROR);
-
- if (totals)
- {
- connect(*totals_port, *totals);
- totals_port = nullptr;
- }
- if (extremes)
- {
- connect(*extremes_port, *extremes);
- extremes_port = nullptr;
- }
-
- bool found_totals = false;
- bool found_extremes = false;
-
- size_t next_output = 0;
- for (auto & input : inputs)
- {
- if (&input == totals)
- found_totals = true;
- else if (&input == extremes)
- found_extremes = true;
- else
- {
- connect(*output_ports[next_output], input);
- ++next_output;
- }
- }
-
- if (totals && !found_totals)
- throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
- "specified totals port does not belong to it", ErrorCodes::LOGICAL_ERROR);
-
- if (extremes && !found_extremes)
- throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
- "specified extremes port does not belong to it", ErrorCodes::LOGICAL_ERROR);
-
- auto & outputs = transform->getOutputs();
- if (outputs.empty())
- throw Exception("Cannot add transform " + transform->getName() + " to Pipes because it has no outputs",
- ErrorCodes::LOGICAL_ERROR);
-
- output_ports.clear();
- output_ports.reserve(outputs.size());
-
- for (auto & output : outputs)
- output_ports.emplace_back(&output);
-
- header = output_ports.front()->getHeader();
- for (size_t i = 1; i < output_ports.size(); ++i)
- assertBlocksHaveEqualStructure(header, output_ports[i]->getHeader(), "Pipes");
-
- if (totals_port)
- assertBlocksHaveEqualStructure(header, totals_port->getHeader(), "Pipes");
-
- if (extremes_port)
- assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes");
-
- if (collected_processors)
- collected_processors->emplace_back(transform);
-
- processors.emplace_back(std::move(transform));
-
- max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
-}
-
-void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter)
-{
- if (output_ports.empty())
- throw Exception("Cannot add simple transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- Block new_header;
-
- auto add_transform = [&](OutputPort *& port, StreamType stream_type)
- {
- if (!port)
- return;
-
- auto transform = getter(port->getHeader(), stream_type);
-
- if (transform)
- {
- if (transform->getInputs().size() != 1)
- throw Exception("Processor for query pipeline transform should have single input, "
- "but " + transform->getName() + " has " +
- toString(transform->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
-
- if (transform->getOutputs().size() != 1)
- throw Exception("Processor for query pipeline transform should have single output, "
- "but " + transform->getName() + " has " +
- toString(transform->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
- }
-
- const auto & out_header = transform ? transform->getOutputs().front().getHeader()
- : port->getHeader();
-
- if (new_header)
- assertBlocksHaveEqualStructure(new_header, out_header, "QueryPipeline");
- else
- new_header = out_header;
-
- if (transform)
- {
- connect(*port, transform->getInputs().front());
- port = &transform->getOutputs().front();
-
- if (collected_processors)
- collected_processors->emplace_back(transform);
-
- processors.emplace_back(std::move(transform));
- }
- };
-
- for (auto & port : output_ports)
- add_transform(port, StreamType::Main);
-
- add_transform(totals_port, StreamType::Totals);
- add_transform(extremes_port, StreamType::Extremes);
-
- header = std::move(new_header);
-}
-
-void Pipe::addSimpleTransform(const ProcessorGetter & getter)
-{
- addSimpleTransform([&](const Block & stream_header, StreamType) { return getter(stream_header); });
-}
-
-void Pipe::resize(size_t num_streams, bool force, bool strict)
-{
- if (output_ports.empty())
- throw Exception("Cannot resize an empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- if (!force && num_streams == numOutputPorts())
- return;
-
- ProcessorPtr resize;
-
- if (strict)
- resize = std::make_shared<StrictResizeProcessor>(getHeader(), numOutputPorts(), num_streams);
- else
- resize = std::make_shared<ResizeProcessor>(getHeader(), numOutputPorts(), num_streams);
-
- addTransform(std::move(resize));
-}
-
-void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter)
-{
- if (output_ports.empty())
- throw Exception("Cannot set sink to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- auto add_transform = [&](OutputPort *& stream, Pipe::StreamType stream_type)
- {
- if (!stream)
- return;
-
- auto transform = getter(stream->getHeader(), stream_type);
-
- if (transform)
- {
- if (transform->getInputs().size() != 1)
- throw Exception("Sink for query pipeline transform should have single input, "
- "but " + transform->getName() + " has " +
- toString(transform->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
-
- if (!transform->getOutputs().empty())
- throw Exception("Sink for query pipeline transform should have no outputs, "
- "but " + transform->getName() + " has " +
- toString(transform->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
- }
-
- if (!transform)
- transform = std::make_shared<NullSink>(stream->getHeader());
-
- connect(*stream, transform->getInputs().front());
- processors.emplace_back(std::move(transform));
- };
-
- for (auto & port : output_ports)
- add_transform(port, StreamType::Main);
-
- add_transform(totals_port, StreamType::Totals);
- add_transform(extremes_port, StreamType::Extremes);
-
- output_ports.clear();
- header.clear();
-}
-
-void Pipe::setOutputFormat(ProcessorPtr output)
-{
- if (output_ports.empty())
- throw Exception("Cannot set output format to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- if (output_ports.size() != 1)
- throw Exception("Cannot set output format to Pipe because single output port is expected, "
- "but it has " + std::to_string(output_ports.size()) + " ports", ErrorCodes::LOGICAL_ERROR);
-
- auto * format = dynamic_cast<IOutputFormat * >(output.get());
-
- if (!format)
- throw Exception("IOutputFormat processor expected for QueryPipeline::setOutputFormat.",
- ErrorCodes::LOGICAL_ERROR);
-
- auto & main = format->getPort(IOutputFormat::PortKind::Main);
- auto & totals = format->getPort(IOutputFormat::PortKind::Totals);
- auto & extremes = format->getPort(IOutputFormat::PortKind::Extremes);
-
- if (!totals_port)
- addTotalsSource(std::make_shared<NullSource>(totals.getHeader()));
-
- if (!extremes_port)
- addExtremesSource(std::make_shared<NullSource>(extremes.getHeader()));
-
- if (collected_processors)
- collected_processors->emplace_back(output);
-
- processors.emplace_back(std::move(output));
-
- connect(*output_ports.front(), main);
- connect(*totals_port, totals);
- connect(*extremes_port, extremes);
-
- output_ports.clear();
- header.clear();
-}
-
-void Pipe::transform(const Transformer & transformer)
-{
- if (output_ports.empty())
- throw Exception("Cannot transform empty Pipe.", ErrorCodes::LOGICAL_ERROR);
-
- auto new_processors = transformer(output_ports);
-
- /// Create hash table with new processors.
- std::unordered_set<const IProcessor *> set;
- for (const auto & processor : new_processors)
- set.emplace(processor.get());
-
- for (const auto & port : output_ports)
- {
- if (!port->isConnected())
- throw Exception("Transformation of Pipe is not valid because output port (" +
- port->getHeader().dumpStructure() + ") is not connected", ErrorCodes::LOGICAL_ERROR);
-
- set.emplace(&port->getProcessor());
- }
-
- output_ports.clear();
-
- for (const auto & processor : new_processors)
- {
- for (const auto & port : processor->getInputs())
- {
- if (!port.isConnected())
- throw Exception("Transformation of Pipe is not valid because processor " + processor->getName() +
- " has not connected input port", ErrorCodes::LOGICAL_ERROR);
-
- const auto * connected_processor = &port.getOutputPort().getProcessor();
- if (set.count(connected_processor) == 0)
- throw Exception("Transformation of Pipe is not valid because processor " + processor->getName() +
- " has input port which is connected with unknown processor " +
- connected_processor->getName(), ErrorCodes::LOGICAL_ERROR);
- }
-
- for (auto & port : processor->getOutputs())
- {
- if (!port.isConnected())
- {
- output_ports.push_back(&port);
- continue;
- }
-
- const auto * connected_processor = &port.getInputPort().getProcessor();
- if (set.count(connected_processor) == 0)
- throw Exception("Transformation of Pipe is not valid because processor " + processor->getName() +
- " has output port which is connected with unknown processor " +
- connected_processor->getName(), ErrorCodes::LOGICAL_ERROR);
- }
- }
-
- if (output_ports.empty())
- throw Exception("Transformation of Pipe is not valid because processors don't have any "
- "not-connected output ports", ErrorCodes::LOGICAL_ERROR);
-
- header = output_ports.front()->getHeader();
- for (size_t i = 1; i < output_ports.size(); ++i)
- assertBlocksHaveEqualStructure(header, output_ports[i]->getHeader(), "Pipe");
-
- if (totals_port)
- assertBlocksHaveEqualStructure(header, totals_port->getHeader(), "Pipes");
-
- if (extremes_port)
- assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes");
-
- if (collected_processors)
- {
- for (const auto & processor : new_processors)
- collected_processors->emplace_back(processor);
- }
-
- processors.insert(processors.end(), new_processors.begin(), new_processors.end());
-
- max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
-}
-
-void Pipe::setLimits(const StreamLocalLimits & limits)
-{
- for (auto & processor : processors)
- {
- if (auto * source_with_progress = dynamic_cast<ISourceWithProgress *>(processor.get()))
- source_with_progress->setLimits(limits);
- }
-}
-
-void Pipe::setLeafLimits(const SizeLimits & leaf_limits)
-{
- for (auto & processor : processors)
- {
- if (auto * source_with_progress = dynamic_cast<ISourceWithProgress *>(processor.get()))
- source_with_progress->setLeafLimits(leaf_limits);
- }
-}
-
-void Pipe::setQuota(const std::shared_ptr<const EnabledQuota> & quota)
-{
- for (auto & processor : processors)
- {
- if (auto * source_with_progress = dynamic_cast<ISourceWithProgress *>(processor.get()))
- source_with_progress->setQuota(quota);
- }
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sinks/EmptySink.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sinks/EmptySink.h
deleted file mode 100644
index 87ad5ecccf7..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sinks/EmptySink.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-#include <Processors/ISink.h>
-
-namespace NDB
-{
-
-/// Sink which reads everything and do nothing with it.
-class EmptySink : public ISink
-{
-public:
- explicit EmptySink(Block header) : ISink(std::move(header)) {}
- String getName() const override { return "EmptySink"; }
-
-protected:
- void consume(Chunk) override {}
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sinks/NullSink.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sinks/NullSink.h
deleted file mode 100644
index e07338b6425..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sinks/NullSink.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#pragma once
-#include <Processors/ISink.h>
-
-namespace NDB
-{
-
-/// Sink which closes input port and reads nothing.
-class NullSink : public ISink
-{
-public:
- explicit NullSink(Block header) : ISink(std::move(header)) {}
- String getName() const override { return "NullSink"; }
-
- Status prepare() override
- {
- input.close();
- return Status::Finished;
- }
-protected:
- void consume(Chunk) override {}
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h
deleted file mode 100644
index 8a3b9f89273..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/NullSource.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-#include <Processors/ISource.h>
-
-
-namespace NDB
-{
-
-class NullSource : public ISource
-{
-public:
- explicit NullSource(Block header) : ISource(std::move(header)) {}
- String getName() const override { return "NullSource"; }
-
-protected:
- Chunk generate() override { return Chunk(); }
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp
deleted file mode 100644
index 6af84e2cc31..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-#include <Processors/Sources/SourceFromInputStream.h>
-#include <Processors/Transforms/AggregatingTransform.h>
-#include <DataTypes/DataTypeAggregateFunction.h>
-#include <DataStreams/RemoteBlockInputStream.h>
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_)
- : ISourceWithProgress(stream_->getHeader())
- , force_add_aggregating_info(force_add_aggregating_info_)
- , stream(std::move(stream_))
-{
- init();
-}
-
-void SourceFromInputStream::init()
-{
- const auto & sample = getPort().getHeader();
- for (auto & type : sample.getDataTypes())
- if (typeid_cast<const DataTypeAggregateFunction *>(type.get()))
- has_aggregate_functions = true;
-}
-
-void SourceFromInputStream::addTotalsPort()
-{
- if (totals_port)
- throw Exception("Totals port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR);
-
- outputs.emplace_back(outputs.front().getHeader(), this);
- totals_port = &outputs.back();
-}
-
-void SourceFromInputStream::addExtremesPort()
-{
- if (extremes_port)
- throw Exception("Extremes port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR);
-
- outputs.emplace_back(outputs.front().getHeader(), this);
- extremes_port = &outputs.back();
-}
-
-IProcessor::Status SourceFromInputStream::prepare()
-{
- auto status = ISource::prepare();
-
- if (status == Status::Finished)
- {
- is_generating_finished = true;
-
- /// Read postfix and get totals if needed.
- if (!is_stream_finished && !isCancelled())
- return Status::Ready;
-
- if (totals_port && !totals_port->isFinished())
- {
- if (has_totals)
- {
- if (!totals_port->canPush())
- return Status::PortFull;
-
- totals_port->push(std::move(totals));
- has_totals = false;
- }
-
- totals_port->finish();
- }
-
- if (extremes_port && !extremes_port->isFinished())
- {
- if (has_extremes)
- {
- if (!extremes_port->canPush())
- return Status::PortFull;
-
- extremes_port->push(std::move(extremes));
- has_extremes = false;
- }
-
- extremes_port->finish();
- }
- }
-
- return status;
-}
-
-void SourceFromInputStream::work()
-{
- if (!is_generating_finished)
- {
- try
- {
- ISource::work();
- }
- catch (...)
- {
- /// Won't read suffix in case of exception.
- is_stream_finished = true;
- throw;
- }
-
- return;
- }
-
- if (is_stream_finished)
- return;
-
- /// Don't cancel for RemoteBlockInputStream (otherwise readSuffix can stack)
- if (!typeid_cast<const RemoteBlockInputStream *>(stream.get()))
- stream->cancel(false);
-
- if (rows_before_limit)
- {
- const auto & info = stream->getProfileInfo();
- if (info.hasAppliedLimit())
- rows_before_limit->add(info.getRowsBeforeLimit());
- }
-
- stream->readSuffix();
-
- if (auto totals_block = stream->getTotals())
- {
- totals.setColumns(totals_block.getColumns(), 1);
- has_totals = true;
- }
-
- is_stream_finished = true;
-}
-
-Chunk SourceFromInputStream::generate()
-{
- if (is_stream_finished)
- return {};
-
- if (!is_stream_started)
- {
- stream->readPrefix();
- is_stream_started = true;
- }
-
- auto block = stream->read();
- if (!block && !isCancelled())
- {
- if (rows_before_limit)
- {
- const auto & info = stream->getProfileInfo();
- if (info.hasAppliedLimit())
- rows_before_limit->add(info.getRowsBeforeLimit());
- }
-
- stream->readSuffix();
-
- if (auto totals_block = stream->getTotals())
- {
- if (totals_block.rows() > 0) /// Sometimes we can get empty totals. Skip it.
- {
- totals.setColumns(totals_block.getColumns(), totals_block.rows());
- has_totals = true;
- }
- }
-
- if (auto extremes_block = stream->getExtremes())
- {
- if (extremes_block.rows() > 0) /// Sometimes we can get empty extremes. Skip it.
- {
- extremes.setColumns(extremes_block.getColumns(), extremes_block.rows());
- has_extremes = true;
- }
- }
-
- is_stream_finished = true;
- return {};
- }
-
- if (isCancelled())
- return {};
-
-#ifndef NDEBUG
- assertBlocksHaveEqualStructure(getPort().getHeader(), block, "SourceFromInputStream");
-#endif
-
- UInt64 num_rows = block.rows();
- Chunk chunk(block.getColumns(), num_rows);
-
- if (force_add_aggregating_info || has_aggregate_functions)
- {
- auto info = std::make_shared<AggregatedChunkInfo>();
- info->bucket_num = block.info.bucket_num;
- info->is_overflows = block.info.is_overflows;
- chunk.setChunkInfo(std::move(info));
- }
-
- return chunk;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h
deleted file mode 100644
index aaf7398e3c5..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromInputStream.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#pragma once
-
-#include <Processors/Sources/SourceWithProgress.h>
-#include <Processors/RowsBeforeLimitCounter.h>
-#include <DataStreams/IBlockInputStream.h>
-
-
-namespace NDB
-{
-
-class IBlockInputStream;
-using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
-
-/// Wrapper for IBlockInputStream which implements ISourceWithProgress.
-class SourceFromInputStream : public ISourceWithProgress
-{
-public:
- /// If force_add_aggregating_info is enabled, AggregatedChunkInfo (with bucket number and is_overflows flag) will be added to result chunk.
- explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false);
- String getName() const override { return "SourceFromInputStream"; }
-
- Status prepare() override;
- void work() override;
-
- Chunk generate() override;
-
- BlockInputStreamPtr & getStream() { return stream; }
-
- void addTotalsPort();
- void addExtremesPort();
-
- OutputPort * getTotalsPort() const { return totals_port; }
- OutputPort * getExtremesPort() const { return extremes_port; }
-
- void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); }
-
- /// Implementation for methods from ISourceWithProgress.
- void setLimits(const StreamLocalLimits & limits_) final { stream->setLimits(limits_); }
- void setLeafLimits(const SizeLimits &) final { }
- void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { stream->setQuota(quota_); }
- void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); }
- void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); }
- void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); }
-
- /// Stop reading from stream if output port is finished.
- void onUpdatePorts() override
- {
- if (getPort().isFinished())
- cancel();
- }
-
-protected:
- void onCancel() override { stream->cancel(false); }
-
-private:
- bool has_aggregate_functions = false;
- bool force_add_aggregating_info = false;
- BlockInputStreamPtr stream;
-
- RowsBeforeLimitCounterPtr rows_before_limit;
-
- Chunk totals;
- OutputPort * totals_port = nullptr;
- bool has_totals = false;
-
- Chunk extremes;
- OutputPort * extremes_port = nullptr;
- bool has_extremes = false;
-
- bool is_generating_finished = false;
- bool is_stream_finished = false;
- bool is_stream_started = false;
-
- void init();
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h
deleted file mode 100644
index 96b70ce7390..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceFromSingleChunk.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#pragma once
-#include <Processors/Sources/SourceWithProgress.h>
-
-
-namespace NDB
-{
-
-class SourceFromSingleChunk : public SourceWithProgress
-{
-public:
- explicit SourceFromSingleChunk(Block header, Chunk chunk_) : SourceWithProgress(std::move(header)), chunk(std::move(chunk_)) {}
- String getName() const override { return "SourceFromSingleChunk"; }
-
-protected:
- Chunk generate() override { return std::move(chunk); }
-
-private:
- Chunk chunk;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp
deleted file mode 100644
index 8f1da8527e5..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-#include <Processors/Sources/SourceWithProgress.h>
-
-#include <Interpreters/ProcessList.h>
-#include <Access/EnabledQuota.h>
-
-namespace ProfileEvents
-{
- extern const Event SelectedRows;
- extern const Event SelectedBytes;
-}
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int TOO_MANY_ROWS;
- extern const int TOO_MANY_BYTES;
-}
-
-SourceWithProgress::SourceWithProgress(Block header, bool enable_auto_progress)
- : ISourceWithProgress(header), auto_progress(enable_auto_progress)
-{
-}
-
-void SourceWithProgress::setProcessListElement(QueryStatus * elem)
-{
- process_list_elem = elem;
-
- /// Update total_rows_approx as soon as possible.
- ///
- /// It is important to do this, since you will not get correct
- /// total_rows_approx until the query will start reading all parts (in case
- /// of query needs to read from multiple parts), and this is especially a
- /// problem in case of max_threads=1.
- ///
- /// NOTE: This can be done only if progress callback already set, since
- /// otherwise total_rows_approx will lost.
- if (total_rows_approx != 0 && progress_callback)
- {
- Progress total_rows_progress = {0, 0, total_rows_approx};
-
- progress_callback(total_rows_progress);
- process_list_elem->updateProgressIn(total_rows_progress);
-
- total_rows_approx = 0;
- }
-}
-
-void SourceWithProgress::work()
-{
- if (!limits.speed_limits.checkTimeLimit(total_stopwatch, limits.timeout_overflow_mode))
- {
- cancel();
- }
- else
- {
- was_progress_called = false;
-
- ISourceWithProgress::work();
-
- if (auto_progress && !was_progress_called && has_input)
- progress({ current_chunk.chunk.getNumRows(), current_chunk.chunk.bytes() });
- }
-}
-
-/// Aggregated copy-paste from IBlockInputStream::progressImpl.
-/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream.
-void SourceWithProgress::progress(const Progress & value)
-{
- was_progress_called = true;
-
- if (total_rows_approx != 0)
- {
- Progress total_rows_progress = {0, 0, total_rows_approx};
-
- if (progress_callback)
- progress_callback(total_rows_progress);
-
- if (process_list_elem)
- process_list_elem->updateProgressIn(total_rows_progress);
-
- total_rows_approx = 0;
- }
-
- if (progress_callback)
- progress_callback(value);
-
- if (process_list_elem)
- {
- if (!process_list_elem->updateProgressIn(value))
- cancel();
-
- /// The total amount of data processed or intended for processing in all sources, possibly on remote servers.
-
- ProgressValues progress = process_list_elem->getProgressIn();
-
- /// If the mode is "throw" and estimate of total rows is known, then throw early if an estimate is too high.
- /// If the mode is "break", then allow to read before limit even if estimate is very high.
-
- size_t rows_to_check_limit = progress.read_rows;
- if (limits.size_limits.overflow_mode == OverflowMode::THROW && progress.total_rows_to_read > progress.read_rows)
- rows_to_check_limit = progress.total_rows_to_read;
-
- /// Check the restrictions on the
- /// * amount of data to read
- /// * speed of the query
- /// * quota on the amount of data to read
- /// NOTE: Maybe it makes sense to have them checked directly in ProcessList?
-
- if (limits.mode == LimitsMode::LIMITS_TOTAL)
- {
- if (!limits.size_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read",
- ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
- {
- cancel();
- }
- }
-
- if (!leaf_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read on leaf node",
- ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
- {
- cancel();
- }
-
- size_t total_rows = progress.total_rows_to_read;
-
- constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds
- UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds();
-
- if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds)
- {
- /// Should be done in PipelineExecutor.
- /// It is here for compatibility with IBlockInputsStream.
- CurrentThread::updatePerformanceCounters();
- last_profile_events_update_time = total_elapsed_microseconds;
- }
-
- /// Should be done in PipelineExecutor.
- /// It is here for compatibility with IBlockInputsStream.
- limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
-
- if (quota && limits.mode == LimitsMode::LIMITS_TOTAL)
- quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes});
- }
-
- ProfileEvents::increment(ProfileEvents::SelectedRows, value.read_rows);
- ProfileEvents::increment(ProfileEvents::SelectedBytes, value.read_bytes);
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h
deleted file mode 100644
index 5dd2e30bcf0..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Sources/SourceWithProgress.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#pragma once
-#include <Processors/ISource.h>
-#include <Common/Stopwatch.h>
-#include <DataStreams/StreamLocalLimits.h>
-#include <IO/Progress.h>
-
-
-namespace NDB
-{
-
-class QueryStatus;
-class EnabledQuota;
-
-/// Adds progress to ISource.
-/// This class takes care of limits, quotas, callback on progress and updating performance counters for current thread.
-class ISourceWithProgress : public ISource
-{
-public:
- using ISource::ISource;
-
- /// Set limitations that checked on each chunk.
- virtual void setLimits(const StreamLocalLimits & limits_) = 0;
-
- /// Set limitations that checked on each chunk for distributed queries on leaf nodes.
- virtual void setLeafLimits(const SizeLimits & leaf_limits_) = 0;
-
- /// Set the quota. If you set a quota on the amount of raw data,
- /// then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits.
- virtual void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) = 0;
-
- /// Set the pointer to the process list item.
- /// General information about the resources spent on the request will be written into it.
- /// Based on this information, the quota and some restrictions will be checked.
- /// This information will also be available in the SHOW PROCESSLIST request.
- virtual void setProcessListElement(QueryStatus * elem) = 0;
-
- /// Set the execution progress bar callback.
- /// It is called after each chunk.
- /// The function takes the number of rows in the last chunk, the number of bytes in the last chunk.
- /// Note that the callback can be called from different threads.
- virtual void setProgressCallback(const ProgressCallback & callback) = 0;
-
- /// Set the approximate total number of rows to read.
- virtual void addTotalRowsApprox(size_t value) = 0;
-};
-
-/// Implementation for ISourceWithProgress
-class SourceWithProgress : public ISourceWithProgress
-{
-public:
- using ISourceWithProgress::ISourceWithProgress;
- /// If enable_auto_progress flag is set, progress() will be automatically called on each generated chunk.
- SourceWithProgress(Block header, bool enable_auto_progress);
-
- void setLimits(const StreamLocalLimits & limits_) final { limits = limits_; }
- void setLeafLimits(const SizeLimits & leaf_limits_) final {leaf_limits = leaf_limits_; }
- void setQuota(const std::shared_ptr<const EnabledQuota> & quota_) final { quota = quota_; }
- void setProcessListElement(QueryStatus * elem) final;
- void setProgressCallback(const ProgressCallback & callback) final { progress_callback = callback; }
- void addTotalRowsApprox(size_t value) final { total_rows_approx += value; }
-
-protected:
- /// Call this method to provide information about progress.
- void progress(const Progress & value);
-
- void work() override;
-
-private:
- StreamLocalLimits limits;
- SizeLimits leaf_limits;
- std::shared_ptr<const EnabledQuota> quota;
- ProgressCallback progress_callback;
- QueryStatus * process_list_elem = nullptr;
-
- /// The approximate total number of rows to read. For progress bar.
- size_t total_rows_approx = 0;
-
- Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time.
- /// According to total_stopwatch in microseconds.
- UInt64 last_profile_events_update_time = 0;
-
- /// This flag checks if progress() was manually called at generate() call.
- /// If not, it will be called for chunk after generate() was finished.
- bool was_progress_called = false;
-
- /// If enabled, progress() will be automatically called on each generated chunk.
- bool auto_progress = true;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp
deleted file mode 100644
index d74b75179b8..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.cpp
+++ /dev/null
@@ -1,634 +0,0 @@
-#include <Processors/Transforms/AggregatingTransform.h>
-
-#include <DataStreams/NativeBlockInputStream.h>
-#include <Processors/ISource.h>
-#include <Processors/Pipe.h>
-#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
-#include <DataStreams/materializeBlock.h>
-
-namespace ProfileEvents
-{
- extern const Event ExternalAggregationMerge;
-}
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
- extern const int LOGICAL_ERROR;
-}
-
-/// Convert block to chunk.
-/// Adds additional info about aggregation.
-Chunk convertToChunk(const Block & block)
-{
- auto info = std::make_shared<AggregatedChunkInfo>();
- info->bucket_num = block.info.bucket_num;
- info->is_overflows = block.info.is_overflows;
-
- UInt64 num_rows = block.rows();
- Chunk chunk(block.getColumns(), num_rows);
- chunk.setChunkInfo(std::move(info));
-
- return chunk;
-}
-
-namespace
-{
- const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
- {
- const auto & info = chunk.getChunkInfo();
- if (!info)
- throw Exception("Chunk info was not set for chunk.", ErrorCodes::LOGICAL_ERROR);
-
- const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
- if (!agg_info)
- throw Exception("Chunk should have AggregatedChunkInfo.", ErrorCodes::LOGICAL_ERROR);
-
- return agg_info;
- }
-
- /// Reads chunks from file in native format. Provide chunks with aggregation info.
- class SourceFromNativeStream : public ISource
- {
- public:
- SourceFromNativeStream(const Block & header, const std::string & path)
- : ISource(header), file_in(path), compressed_in(file_in),
- block_in(std::make_shared<NativeBlockInputStream>(compressed_in, DBMS_TCP_PROTOCOL_VERSION))
- {
- block_in->readPrefix();
- }
-
- String getName() const override { return "SourceFromNativeStream"; }
-
- Chunk generate() override
- {
- if (!block_in)
- return {};
-
- auto block = block_in->read();
- if (!block)
- {
- block_in->readSuffix();
- block_in.reset();
- return {};
- }
-
- return convertToChunk(block);
- }
-
- private:
- ReadBufferFromFile file_in;
- CompressedReadBuffer compressed_in;
- BlockInputStreamPtr block_in;
- };
-}
-
-/// Worker which merges buckets for two-level aggregation.
-/// Atomically increments bucket counter and returns merged result.
-class ConvertingAggregatedToChunksSource : public ISource
-{
-public:
- static constexpr UInt32 NUM_BUCKETS = 256;
-
- struct SharedData
- {
- std::atomic<UInt32> next_bucket_to_merge = 0;
- std::array<std::atomic<bool>, NUM_BUCKETS> is_bucket_processed{};
- std::atomic<bool> is_cancelled = false;
-
- SharedData()
- {
- for (auto & flag : is_bucket_processed)
- flag = false;
- }
- };
-
- using SharedDataPtr = std::shared_ptr<SharedData>;
-
- ConvertingAggregatedToChunksSource(
- AggregatingTransformParamsPtr params_,
- ManyAggregatedDataVariantsPtr data_,
- SharedDataPtr shared_data_,
- Arena * arena_)
- : ISource(params_->getHeader())
- , params(std::move(params_))
- , data(std::move(data_))
- , shared_data(std::move(shared_data_))
- , arena(arena_)
- {}
-
- String getName() const override { return "ConvertingAggregatedToChunksSource"; }
-
-protected:
- Chunk generate() override
- {
- UInt32 bucket_num = shared_data->next_bucket_to_merge.fetch_add(1);
-
- if (bucket_num >= NUM_BUCKETS)
- return {};
-
- Block block = params->aggregator.mergeAndConvertOneBucketToBlock(*data, arena, params->final, bucket_num, &shared_data->is_cancelled);
- Chunk chunk = convertToChunk(block);
-
- shared_data->is_bucket_processed[bucket_num] = true;
-
- return chunk;
- }
-
-private:
- AggregatingTransformParamsPtr params;
- ManyAggregatedDataVariantsPtr data;
- SharedDataPtr shared_data;
- Arena * arena;
-};
-
-/// Generates chunks with aggregated data.
-/// In single level case, aggregates data itself.
-/// In two-level case, creates `ConvertingAggregatedToChunksSource` workers:
-///
-/// ConvertingAggregatedToChunksSource ->
-/// ConvertingAggregatedToChunksSource -> ConvertingAggregatedToChunksTransform -> AggregatingTransform
-/// ConvertingAggregatedToChunksSource ->
-///
-/// Result chunks guaranteed to be sorted by bucket number.
-class ConvertingAggregatedToChunksTransform : public IProcessor
-{
-public:
- ConvertingAggregatedToChunksTransform(AggregatingTransformParamsPtr params_, ManyAggregatedDataVariantsPtr data_, size_t num_threads_)
- : IProcessor({}, {params_->getHeader()})
- , params(std::move(params_)), data(std::move(data_)), num_threads(num_threads_) {}
-
- String getName() const override { return "ConvertingAggregatedToChunksTransform"; }
-
- void work() override
- {
- if (data->empty())
- {
- finished = true;
- return;
- }
-
- if (!is_initialized)
- {
- initialize();
- return;
- }
-
- if (data->at(0)->isTwoLevel())
- {
- /// In two-level case will only create sources.
- if (inputs.empty())
- createSources();
- }
- else
- {
- mergeSingleLevel();
- }
- }
-
- Processors expandPipeline() override
- {
- for (auto & source : processors)
- {
- auto & out = source->getOutputs().front();
- inputs.emplace_back(out.getHeader(), this);
- connect(out, inputs.back());
- inputs.back().setNeeded();
- }
-
- return std::move(processors);
- }
-
- IProcessor::Status prepare() override
- {
- auto & output = outputs.front();
-
- if (finished && !has_input)
- {
- output.finish();
- return Status::Finished;
- }
-
- /// Check can output.
- if (output.isFinished())
- {
- for (auto & input : inputs)
- input.close();
-
- if (shared_data)
- shared_data->is_cancelled.store(true);
-
- return Status::Finished;
- }
-
- if (!output.canPush())
- return Status::PortFull;
-
- if (!is_initialized)
- return Status::Ready;
-
- if (!processors.empty())
- return Status::ExpandPipeline;
-
- if (has_input)
- return preparePushToOutput();
-
- /// Single level case.
- if (inputs.empty())
- return Status::Ready;
-
- /// Two-level case.
- return prepareTwoLevel();
- }
-
-private:
- IProcessor::Status preparePushToOutput()
- {
- auto & output = outputs.front();
- output.push(std::move(current_chunk));
- has_input = false;
-
- if (finished)
- {
- output.finish();
- return Status::Finished;
- }
-
- return Status::PortFull;
- }
-
- /// Read all sources and try to push current bucket.
- IProcessor::Status prepareTwoLevel()
- {
- auto & output = outputs.front();
-
- for (auto & input : inputs)
- {
- if (!input.isFinished() && input.hasData())
- {
- auto chunk = input.pull();
- auto bucket = getInfoFromChunk(chunk)->bucket_num;
- chunks[bucket] = std::move(chunk);
- }
- }
-
- if (!shared_data->is_bucket_processed[current_bucket_num])
- return Status::NeedData;
-
- if (!chunks[current_bucket_num])
- return Status::NeedData;
-
- output.push(std::move(chunks[current_bucket_num]));
-
- ++current_bucket_num;
- if (current_bucket_num == NUM_BUCKETS)
- {
- output.finish();
- /// Do not close inputs, they must be finished.
- return Status::Finished;
- }
-
- return Status::PortFull;
- }
-
- AggregatingTransformParamsPtr params;
- ManyAggregatedDataVariantsPtr data;
- ConvertingAggregatedToChunksSource::SharedDataPtr shared_data;
-
- size_t num_threads;
-
- bool is_initialized = false;
- bool has_input = false;
- bool finished = false;
-
- Chunk current_chunk;
-
- UInt32 current_bucket_num = 0;
- static constexpr Int32 NUM_BUCKETS = 256;
- std::array<Chunk, NUM_BUCKETS> chunks;
-
- Processors processors;
-
- void setCurrentChunk(Chunk chunk)
- {
- if (has_input)
- throw Exception("Current chunk was already set in "
- "ConvertingAggregatedToChunksTransform.", ErrorCodes::LOGICAL_ERROR);
-
- has_input = true;
- current_chunk = std::move(chunk);
- }
-
- void initialize()
- {
- is_initialized = true;
-
- AggregatedDataVariantsPtr & first = data->at(0);
-
- /// At least we need one arena in first data item per thread
- if (num_threads > first->aggregates_pools.size())
- {
- Arenas & first_pool = first->aggregates_pools;
- for (size_t j = first_pool.size(); j < num_threads; j++)
- first_pool.emplace_back(std::make_shared<Arena>());
- }
-
- if (first->type == AggregatedDataVariants::Type::without_key || params->params.overflow_row)
- {
- params->aggregator.mergeWithoutKeyDataImpl(*data);
- auto block = params->aggregator.prepareBlockAndFillWithoutKey(
- *first, params->final, first->type != AggregatedDataVariants::Type::without_key);
-
- setCurrentChunk(convertToChunk(block));
- }
- }
-
- void mergeSingleLevel()
- {
- AggregatedDataVariantsPtr & first = data->at(0);
-
- if (current_bucket_num > 0 || first->type == AggregatedDataVariants::Type::without_key)
- {
- finished = true;
- return;
- }
-
- ++current_bucket_num;
-
- #define M(NAME) \
- else if (first->type == AggregatedDataVariants::Type::NAME) \
- params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data);
- if (false) {} // NOLINT
- APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
- #undef M
- else
- throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
-
- auto block = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final);
-
- setCurrentChunk(convertToChunk(block));
- finished = true;
- }
-
- void createSources()
- {
- AggregatedDataVariantsPtr & first = data->at(0);
- shared_data = std::make_shared<ConvertingAggregatedToChunksSource::SharedData>();
-
- for (size_t thread = 0; thread < num_threads; ++thread)
- {
- /// Select Arena to avoid race conditions
- Arena * arena = first->aggregates_pools.at(thread).get();
- auto source = std::make_shared<ConvertingAggregatedToChunksSource>(params, data, shared_data, arena);
-
- processors.emplace_back(std::move(source));
- }
- }
-};
-
-AggregatingTransform::AggregatingTransform(Block header, AggregatingTransformParamsPtr params_)
- : AggregatingTransform(std::move(header), std::move(params_)
- , std::make_unique<ManyAggregatedData>(1), 0, 1, 1)
-{
-}
-
-AggregatingTransform::AggregatingTransform(
- Block header,
- AggregatingTransformParamsPtr params_,
- ManyAggregatedDataPtr many_data_,
- size_t current_variant,
- size_t max_threads_,
- size_t temporary_data_merge_threads_)
- : IProcessor({std::move(header)}, {params_->getHeader()})
- , params(std::move(params_))
- , key_columns(params->params.keys_size)
- , aggregate_columns(params->params.aggregates_size)
- , many_data(std::move(many_data_))
- , variants(*many_data->variants[current_variant])
- , max_threads(std::min(many_data->variants.size(), max_threads_))
- , temporary_data_merge_threads(temporary_data_merge_threads_)
-{
-}
-
-AggregatingTransform::~AggregatingTransform() = default;
-
-IProcessor::Status AggregatingTransform::prepare()
-{
- /// There are one or two input ports.
- /// The first one is used at aggregation step, the second one - while reading merged data from ConvertingAggregated
-
- auto & output = outputs.front();
- /// Last output is current. All other outputs should already be closed.
- auto & input = inputs.back();
-
- /// Check can output.
- if (output.isFinished())
- {
- input.close();
- return Status::Finished;
- }
-
- if (!output.canPush())
- {
- input.setNotNeeded();
- return Status::PortFull;
- }
-
- /// Finish data processing, prepare to generating.
- if (is_consume_finished && !is_generate_initialized)
- {
- /// Close input port in case max_rows_to_group_by was reached but not all data was read.
- inputs.front().close();
-
- return Status::Ready;
- }
-
- if (is_generate_initialized && !is_pipeline_created && !processors.empty())
- return Status::ExpandPipeline;
-
- /// Only possible while consuming.
- if (read_current_chunk)
- return Status::Ready;
-
- /// Get chunk from input.
- if (input.isFinished())
- {
- if (is_consume_finished)
- {
- output.finish();
- return Status::Finished;
- }
- else
- {
- /// Finish data processing and create another pipe.
- is_consume_finished = true;
- return Status::Ready;
- }
- }
-
- if (!input.hasData())
- {
- input.setNeeded();
- return Status::NeedData;
- }
-
- if (is_consume_finished)
- input.setNeeded();
-
- current_chunk = input.pull(/*set_not_needed = */ !is_consume_finished);
- read_current_chunk = true;
-
- if (is_consume_finished)
- {
- output.push(std::move(current_chunk));
- read_current_chunk = false;
- return Status::PortFull;
- }
-
- return Status::Ready;
-}
-
-void AggregatingTransform::work()
-{
- if (is_consume_finished)
- initGenerate();
- else
- {
- consume(std::move(current_chunk));
- read_current_chunk = false;
- }
-}
-
-Processors AggregatingTransform::expandPipeline()
-{
- auto & out = processors.back()->getOutputs().front();
- inputs.emplace_back(out.getHeader(), this);
- connect(out, inputs.back());
- is_pipeline_created = true;
- return std::move(processors);
-}
-
-void AggregatingTransform::consume(Chunk chunk)
-{
- const UInt64 num_rows = chunk.getNumRows();
-
- if (num_rows == 0 && params->params.empty_result_for_aggregation_by_empty_set)
- return;
-
- if (!is_consume_started)
- {
- LOG_TRACE(log, "Aggregating");
- is_consume_started = true;
- }
-
- src_rows += num_rows;
- src_bytes += chunk.bytes();
-
- if (params->only_merge)
- {
- auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns());
- block = materializeBlock(block);
- if (!params->aggregator.mergeOnBlock(block, variants, no_more_keys))
- is_consume_finished = true;
- }
- else
- {
- if (!params->aggregator.executeOnBlock(chunk.detachColumns(), num_rows, variants, key_columns, aggregate_columns, no_more_keys))
- is_consume_finished = true;
- }
-}
-
-void AggregatingTransform::initGenerate()
-{
- if (is_generate_initialized)
- return;
-
- is_generate_initialized = true;
-
- /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation.
- /// To do this, we pass a block with zero rows to aggregate.
- if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set)
- {
- if (params->only_merge)
- params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys);
- else
- params->aggregator.executeOnBlock(getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys);
- }
-
- double elapsed_seconds = watch.elapsedSeconds();
- size_t rows = variants.sizeWithoutOverflowRow();
-
- LOG_DEBUG(log, "Aggregated. {} to {} rows (from {}) in {} sec. ({:.3f} rows/sec., {}/sec.)",
- src_rows, rows, ReadableSize(src_bytes),
- elapsed_seconds, src_rows / elapsed_seconds,
- ReadableSize(src_bytes / elapsed_seconds));
-
- if (params->aggregator.hasTemporaryFiles())
- {
- if (variants.isConvertibleToTwoLevel())
- variants.convertToTwoLevel();
-
- /// Flush data in the RAM to disk also. It's easier than merging on-disk and RAM data.
- if (!variants.empty())
- params->aggregator.writeToTemporaryFile(variants);
- }
-
- if (many_data->num_finished.fetch_add(1) + 1 < many_data->variants.size())
- return;
-
- if (!params->aggregator.hasTemporaryFiles())
- {
- auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants);
- auto prepared_data_ptr = std::make_shared<ManyAggregatedDataVariants>(std::move(prepared_data));
- processors.emplace_back(std::make_shared<ConvertingAggregatedToChunksTransform>(params, std::move(prepared_data_ptr), max_threads));
- }
- else
- {
- /// If there are temporary files with partially-aggregated data on the disk,
- /// then read and merge them, spending the minimum amount of memory.
-
- ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge);
-
- if (many_data->variants.size() > 1)
- {
- /// It may happen that some data has not yet been flushed,
- /// because at the time thread has finished, no data has been flushed to disk, and then some were.
- for (auto & cur_variants : many_data->variants)
- {
- if (cur_variants->isConvertibleToTwoLevel())
- cur_variants->convertToTwoLevel();
-
- if (!cur_variants->empty())
- params->aggregator.writeToTemporaryFile(*cur_variants);
- }
- }
-
- const auto & files = params->aggregator.getTemporaryFiles();
- Pipe pipe;
-
- {
- auto header = params->aggregator.getHeader(false);
- Pipes pipes;
-
- for (const auto & file : files.files)
- pipes.emplace_back(Pipe(std::make_unique<SourceFromNativeStream>(header, file->path())));
-
- pipe = Pipe::unitePipes(std::move(pipes));
- }
-
- LOG_DEBUG(
- log,
- "Will merge {} temporary files of size {} compressed, {} uncompressed.",
- files.files.size(),
- ReadableSize(files.sum_size_compressed),
- ReadableSize(files.sum_size_uncompressed));
-
- addMergingAggregatedMemoryEfficientTransform(pipe, params, temporary_data_merge_threads);
-
- processors = Pipe::detachProcessors(std::move(pipe));
- }
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h
deleted file mode 100644
index d9fd8dc49f1..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/AggregatingTransform.h
+++ /dev/null
@@ -1,167 +0,0 @@
-#pragma once
-#include <Processors/IAccumulatingTransform.h>
-#include <Interpreters/Aggregator.h>
-#include <IO/ReadBufferFromFile.h>
-#include <Compression/CompressedReadBuffer.h>
-#include <Common/Stopwatch.h>
-
-namespace NDB
-{
-
-class AggregatedArenasChunkInfo : public ChunkInfo
-{
-public:
- Arenas arenas;
- AggregatedArenasChunkInfo(Arenas arenas_)
- : arenas(std::move(arenas_))
- {}
-};
-
-class AggregatedChunkInfo : public ChunkInfo
-{
-public:
- bool is_overflows = false;
- Int32 bucket_num = -1;
-};
-
-class IBlockInputStream;
-using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
-
-using AggregatorList = std::list<Aggregator>;
-using AggregatorListPtr = std::shared_ptr<AggregatorList>;
-
-struct AggregatingTransformParams
-{
- Aggregator::Params params;
-
- /// Each params holds a list of aggregators which are used in query. It's needed because we need
- /// to use a pointer of aggregator to proper destroy complex aggregation states on exception
- /// (See comments in AggregatedDataVariants). However, this pointer might not be valid because
- /// we can have two different aggregators at the same time due to mixed pipeline of aggregate
- /// projections, and one of them might gets destroyed before used.
- AggregatorListPtr aggregator_list_ptr;
- Aggregator & aggregator;
- bool final;
- bool only_merge = false;
-
- AggregatingTransformParams(const Aggregator::Params & params_, bool final_)
- : params(params_)
- , aggregator_list_ptr(std::make_shared<AggregatorList>())
- , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), params))
- , final(final_)
- {
- }
-
- AggregatingTransformParams(const Aggregator::Params & params_, const AggregatorListPtr & aggregator_list_ptr_, bool final_)
- : params(params_)
- , aggregator_list_ptr(aggregator_list_ptr_)
- , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), params))
- , final(final_)
- {
- }
-
- Block getHeader() const { return aggregator.getHeader(final); }
-
- Block getCustomHeader(bool final_) const { return aggregator.getHeader(final_); }
-};
-
-struct ManyAggregatedData
-{
- ManyAggregatedDataVariants variants;
- std::vector<std::unique_ptr<std::mutex>> mutexes;
- std::atomic<UInt32> num_finished = 0;
-
- explicit ManyAggregatedData(size_t num_threads = 0) : variants(num_threads), mutexes(num_threads)
- {
- for (auto & elem : variants)
- elem = std::make_shared<AggregatedDataVariants>();
-
- for (auto & mut : mutexes)
- mut = std::make_unique<std::mutex>();
- }
-};
-
-using AggregatingTransformParamsPtr = std::shared_ptr<AggregatingTransformParams>;
-using ManyAggregatedDataPtr = std::shared_ptr<ManyAggregatedData>;
-
-/** Aggregates the stream of blocks using the specified key columns and aggregate functions.
- * Columns with aggregate functions adds to the end of the block.
- * If final = false, the aggregate functions are not finalized, that is, they are not replaced by their value, but contain an intermediate state of calculations.
- * This is necessary so that aggregation can continue (for example, by combining streams of partially aggregated data).
- *
- * For every separate stream of data separate AggregatingTransform is created.
- * Every AggregatingTransform reads data from the first port till is is not run out, or max_rows_to_group_by reached.
- * When the last AggregatingTransform finish reading, the result of aggregation is needed to be merged together.
- * This task is performed by ConvertingAggregatedToChunksTransform.
- * Last AggregatingTransform expands pipeline and adds second input port, which reads from ConvertingAggregated.
- *
- * Aggregation data is passed by ManyAggregatedData structure, which is shared between all aggregating transforms.
- * At aggregation step, every transform uses it's own AggregatedDataVariants structure.
- * At merging step, all structures pass to ConvertingAggregatedToChunksTransform.
- */
-class AggregatingTransform : public IProcessor
-{
-public:
- AggregatingTransform(Block header, AggregatingTransformParamsPtr params_);
-
- /// For Parallel aggregating.
- AggregatingTransform(
- Block header,
- AggregatingTransformParamsPtr params_,
- ManyAggregatedDataPtr many_data,
- size_t current_variant,
- size_t max_threads,
- size_t temporary_data_merge_threads);
- ~AggregatingTransform() override;
-
- String getName() const override { return "AggregatingTransform"; }
- Status prepare() override;
- void work() override;
- Processors expandPipeline() override;
-
-protected:
- void consume(Chunk chunk);
-
-private:
- /// To read the data that was flushed into the temporary data file.
- Processors processors;
-
- AggregatingTransformParamsPtr params;
- Poco::Logger * log = &Poco::Logger::get("AggregatingTransform");
-
- ColumnRawPtrs key_columns;
- Aggregator::AggregateColumns aggregate_columns;
-
- /** Used if there is a limit on the maximum number of rows in the aggregation,
- * and if group_by_overflow_mode == ANY.
- * In this case, new keys are not added to the set, but aggregation is performed only by
- * keys that have already managed to get into the set.
- */
- bool no_more_keys = false;
-
- ManyAggregatedDataPtr many_data;
- AggregatedDataVariants & variants;
- size_t max_threads = 1;
- size_t temporary_data_merge_threads = 1;
-
- /// TODO: calculate time only for aggregation.
- Stopwatch watch;
-
- UInt64 src_rows = 0;
- UInt64 src_bytes = 0;
-
- bool is_generate_initialized = false;
- bool is_consume_finished = false;
- bool is_pipeline_created = false;
-
- Chunk current_chunk;
- bool read_current_chunk = false;
-
- bool is_consume_started = false;
-
- void initGenerate();
-};
-
-Chunk convertToChunk(const Block & block);
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp
deleted file mode 100644
index 0195b8d0fa8..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-#include <Processors/Transforms/ExtremesTransform.h>
-
-#include <Core/Field.h>
-
-namespace NDB
-{
-
-ExtremesTransform::ExtremesTransform(const Block & header)
- : ISimpleTransform(header, header, true)
-{
- /// Port for Extremes.
- outputs.emplace_back(outputs.front().getHeader(), this);
-}
-
-IProcessor::Status ExtremesTransform::prepare()
-{
- if (!finished_transform)
- {
- auto status = ISimpleTransform::prepare();
-
- if (status != Status::Finished)
- return status;
-
- finished_transform = true;
- }
-
- auto & totals_output = getExtremesPort();
-
- /// Check can output.
- if (totals_output.isFinished())
- return Status::Finished;
-
- if (!totals_output.canPush())
- return Status::PortFull;
-
- if (!extremes && !extremes_columns.empty())
- return Status::Ready;
-
- if (extremes)
- totals_output.push(std::move(extremes));
-
- totals_output.finish();
- return Status::Finished;
-}
-
-void ExtremesTransform::work()
-{
- if (finished_transform)
- {
- if (!extremes && !extremes_columns.empty())
- extremes.setColumns(std::move(extremes_columns), 2);
- }
- else
- ISimpleTransform::work();
-}
-
-void ExtremesTransform::transform(DB::Chunk & chunk)
-{
-
- if (chunk.getNumRows() == 0)
- return;
-
- size_t num_columns = chunk.getNumColumns();
- const auto & columns = chunk.getColumns();
-
- if (extremes_columns.empty())
- {
- extremes_columns.resize(num_columns);
-
- for (size_t i = 0; i < num_columns; ++i)
- {
- const ColumnPtr & src = columns[i];
-
- if (isColumnConst(*src))
- {
- /// Equal min and max.
- extremes_columns[i] = src->cloneResized(2);
- }
- else
- {
- Field min_value;
- Field max_value;
-
- src->getExtremes(min_value, max_value);
-
- extremes_columns[i] = src->cloneEmpty();
-
- extremes_columns[i]->insert(min_value);
- extremes_columns[i]->insert(max_value);
- }
- }
- }
- else
- {
- for (size_t i = 0; i < num_columns; ++i)
- {
- if (isColumnConst(*extremes_columns[i]))
- continue;
-
- Field min_value = (*extremes_columns[i])[0];
- Field max_value = (*extremes_columns[i])[1];
-
- Field cur_min_value;
- Field cur_max_value;
-
- columns[i]->getExtremes(cur_min_value, cur_max_value);
-
- if (cur_min_value < min_value)
- min_value = cur_min_value;
- if (cur_max_value > max_value)
- max_value = cur_max_value;
-
- MutableColumnPtr new_extremes = extremes_columns[i]->cloneEmpty();
-
- new_extremes->insert(min_value);
- new_extremes->insert(max_value);
-
- extremes_columns[i] = std::move(new_extremes);
- }
- }
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h
deleted file mode 100644
index 522308370f9..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/ExtremesTransform.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-#include <Processors/ISimpleTransform.h>
-
-namespace NDB
-{
-
-class ExtremesTransform : public ISimpleTransform
-{
-
-public:
- explicit ExtremesTransform(const Block & header);
-
- String getName() const override { return "ExtremesTransform"; }
-
- OutputPort & getExtremesPort() { return outputs.back(); }
-
- Status prepare() override;
- void work() override;
-
-protected:
- void transform(Chunk & chunk) override;
-
- bool finished_transform = false;
- Chunk extremes;
-
-private:
- MutableColumns extremes_columns;
-};
-
-}
-
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
deleted file mode 100644
index 78e77ffb6be..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
+++ /dev/null
@@ -1,525 +0,0 @@
-#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
-
-#include <Interpreters/Aggregator.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/ResizeProcessor.h>
-#include <Processors/Pipe.h>
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-struct ChunksToMerge : public ChunkInfo
-{
- std::unique_ptr<Chunks> chunks;
- Int32 bucket_num = -1;
- bool is_overflows = false;
-};
-
-GroupingAggregatedTransform::GroupingAggregatedTransform(
- const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_)
- : IProcessor(InputPorts(num_inputs_, header_), { Block() })
- , num_inputs(num_inputs_)
- , params(std::move(params_))
- , last_bucket_number(num_inputs, -1)
- , read_from_input(num_inputs, false)
-{
-}
-
-void GroupingAggregatedTransform::readFromAllInputs()
-{
- auto in = inputs.begin();
- read_from_all_inputs = true;
-
- for (size_t i = 0; i < num_inputs; ++i, ++in)
- {
- if (in->isFinished())
- continue;
-
- if (read_from_input[i])
- continue;
-
- in->setNeeded();
-
- if (!in->hasData())
- {
- read_from_all_inputs = false;
- continue;
- }
-
- auto chunk = in->pull();
- read_from_input[i] = true;
- addChunk(std::move(chunk), i);
- }
-}
-
-void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_overflows)
-{
- auto & output = outputs.front();
-
- auto info = std::make_shared<ChunksToMerge>();
- info->bucket_num = bucket;
- info->is_overflows = is_overflows;
- info->chunks = std::make_unique<Chunks>(std::move(chunks));
-
- Chunk chunk;
- chunk.setChunkInfo(std::move(info));
- output.push(std::move(chunk));
-}
-
-bool GroupingAggregatedTransform::tryPushTwoLevelData()
-{
- auto try_push_by_iter = [&](auto batch_it)
- {
- if (batch_it == chunks_map.end())
- return false;
-
- Chunks & cur_chunks = batch_it->second;
- if (cur_chunks.empty())
- {
- chunks_map.erase(batch_it);
- return false;
- }
-
- pushData(std::move(cur_chunks), batch_it->first, false);
- chunks_map.erase(batch_it);
- return true;
- };
-
- if (all_inputs_finished)
- {
- /// Chunks are sorted by bucket.
- while (!chunks_map.empty())
- if (try_push_by_iter(chunks_map.begin()))
- return true;
- }
- else
- {
- for (; next_bucket_to_push < current_bucket; ++next_bucket_to_push)
- if (try_push_by_iter(chunks_map.find(next_bucket_to_push)))
- return true;
- }
-
- return false;
-}
-
-bool GroupingAggregatedTransform::tryPushSingleLevelData()
-{
- if (single_level_chunks.empty())
- return false;
-
- pushData(std::move(single_level_chunks), -1, false);
- return true;
-}
-
-bool GroupingAggregatedTransform::tryPushOverflowData()
-{
- if (overflow_chunks.empty())
- return false;
-
- pushData(std::move(overflow_chunks), -1, true);
- return true;
-}
-
-IProcessor::Status GroupingAggregatedTransform::prepare()
-{
- /// Check can output.
- auto & output = outputs.front();
-
- if (output.isFinished())
- {
- for (auto & input : inputs)
- input.close();
-
- chunks_map.clear();
- last_bucket_number.clear();
- return Status::Finished;
- }
-
- /// Read first time from each input to understand if we have two-level aggregation.
- if (!read_from_all_inputs)
- {
- readFromAllInputs();
- if (!read_from_all_inputs)
- return Status::NeedData;
- }
-
- /// Convert single level to two levels if have two-level input.
- if (has_two_level && !single_level_chunks.empty())
- return Status::Ready;
-
- /// Check can push (to avoid data caching).
- if (!output.canPush())
- {
- for (auto & input : inputs)
- input.setNotNeeded();
-
- return Status::PortFull;
- }
-
- bool pushed_to_output = false;
-
- /// Output if has data.
- if (has_two_level)
- pushed_to_output = tryPushTwoLevelData();
-
- auto need_input = [this](size_t input_num)
- {
- if (last_bucket_number[input_num] < current_bucket)
- return true;
-
- return expect_several_chunks_for_single_bucket_per_source && last_bucket_number[input_num] == current_bucket;
- };
-
- /// Read next bucket if can.
- for (; ; ++current_bucket)
- {
- bool finished = true;
- bool need_data = false;
-
- auto in = inputs.begin();
- for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in)
- {
- if (in->isFinished())
- continue;
-
- finished = false;
-
- if (!need_input(input_num))
- continue;
-
- in->setNeeded();
-
- if (!in->hasData())
- {
- need_data = true;
- continue;
- }
-
- auto chunk = in->pull();
- addChunk(std::move(chunk), input_num);
-
- if (has_two_level && !single_level_chunks.empty())
- return Status::Ready;
-
- if (!in->isFinished() && need_input(input_num))
- need_data = true;
- }
-
- if (finished)
- {
- all_inputs_finished = true;
- break;
- }
-
- if (need_data)
- return Status::NeedData;
- }
-
- if (pushed_to_output)
- return Status::PortFull;
-
- if (has_two_level)
- {
- if (tryPushTwoLevelData())
- return Status::PortFull;
-
- /// Sanity check. If new bucket was read, we should be able to push it.
- /// This is always false, but we still keep this condition in case the code will be changed.
- if (!all_inputs_finished) // -V547
- throw Exception("GroupingAggregatedTransform has read new two-level bucket, but couldn't push it.",
- ErrorCodes::LOGICAL_ERROR);
- }
- else
- {
- if (!all_inputs_finished) // -V547
- throw Exception("GroupingAggregatedTransform should have read all chunks for single level aggregation, "
- "but not all of the inputs are finished.", ErrorCodes::LOGICAL_ERROR);
-
- if (tryPushSingleLevelData())
- return Status::PortFull;
- }
-
- /// If we haven't pushed to output, then all data was read. Push overflows if have.
- if (tryPushOverflowData())
- return Status::PortFull;
-
- output.finish();
- return Status::Finished;
-}
-
-void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input)
-{
- const auto & info = chunk.getChunkInfo();
- if (!info)
- throw Exception("Chunk info was not set for chunk in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
-
- const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
- if (!agg_info)
- throw Exception("Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
-
- Int32 bucket = agg_info->bucket_num;
- bool is_overflows = agg_info->is_overflows;
-
- if (is_overflows)
- overflow_chunks.emplace_back(std::move(chunk));
- else if (bucket < 0)
- single_level_chunks.emplace_back(std::move(chunk));
- else
- {
- chunks_map[bucket].emplace_back(std::move(chunk));
- has_two_level = true;
- last_bucket_number[input] = bucket;
- }
-}
-
-void GroupingAggregatedTransform::work()
-{
- /// Convert single level data to two level.
- if (!single_level_chunks.empty())
- {
- const auto & header = getInputs().front().getHeader(); /// Take header from input port. Output header is empty.
- auto block = header.cloneWithColumns(single_level_chunks.back().detachColumns());
- single_level_chunks.pop_back();
- auto blocks = params->aggregator.convertBlockToTwoLevel(block);
-
- for (auto & cur_block : blocks)
- {
- if (!cur_block)
- continue;
-
- Int32 bucket = cur_block.info.bucket_num;
- auto chunk_info = std::make_shared<AggregatedChunkInfo>();
- chunk_info->bucket_num = bucket;
- chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info)));
- }
- }
-}
-
-
-MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params_)
- : ISimpleTransform({}, params_->getHeader(), false), params(std::move(params_))
-{
- setInputNotNeededAfterRead(true);
-}
-
-void MergingAggregatedBucketTransform::transform(Chunk & chunk)
-{
- const auto & info = chunk.getChunkInfo();
- const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
-
- if (!chunks_to_merge)
- throw Exception("MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.",
- ErrorCodes::LOGICAL_ERROR);
-
- auto header = params->aggregator.getHeader(false);
-
- BlocksList blocks_list;
- for (auto & cur_chunk : *chunks_to_merge->chunks)
- {
- const auto & cur_info = cur_chunk.getChunkInfo();
- if (!cur_info)
- throw Exception("Chunk info was not set for chunk in MergingAggregatedBucketTransform.",
- ErrorCodes::LOGICAL_ERROR);
-
- const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(cur_info.get());
- if (!agg_info)
- throw Exception("Chunk should have AggregatedChunkInfo in MergingAggregatedBucketTransform.",
- ErrorCodes::LOGICAL_ERROR);
-
- Block block = header.cloneWithColumns(cur_chunk.detachColumns());
- block.info.is_overflows = agg_info->is_overflows;
- block.info.bucket_num = agg_info->bucket_num;
-
- blocks_list.emplace_back(std::move(block));
- }
-
- auto res_info = std::make_shared<AggregatedChunkInfo>();
- res_info->is_overflows = chunks_to_merge->is_overflows;
- res_info->bucket_num = chunks_to_merge->bucket_num;
- chunk.setChunkInfo(std::move(res_info));
-
- auto block = params->aggregator.mergeBlocks(blocks_list, params->final);
- size_t num_rows = block.rows();
- chunk.setColumns(block.getColumns(), num_rows);
-}
-
-
-SortingAggregatedTransform::SortingAggregatedTransform(size_t num_inputs_, AggregatingTransformParamsPtr params_)
- : IProcessor(InputPorts(num_inputs_, params_->getHeader()), {params_->getHeader()})
- , num_inputs(num_inputs_)
- , params(std::move(params_))
- , last_bucket_number(num_inputs, -1)
- , is_input_finished(num_inputs, false)
-{
-}
-
-bool SortingAggregatedTransform::tryPushChunk()
-{
- auto & output = outputs.front();
-
- if (chunks.empty())
- return false;
-
- /// Chunk with min current bucket.
- auto it = chunks.begin();
- auto cur_bucket = it->first;
-
- /// Check that can push it
- for (size_t input = 0; input < num_inputs; ++input)
- if (!is_input_finished[input] && last_bucket_number[input] < cur_bucket)
- return false;
-
- output.push(std::move(it->second));
- chunks.erase(it);
- return true;
-}
-
-void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input)
-{
- const auto & info = chunk.getChunkInfo();
- if (!info)
- throw Exception("Chunk info was not set for chunk in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
-
- const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
- if (!agg_info)
- throw Exception("Chunk should have AggregatedChunkInfo in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR);
-
- Int32 bucket = agg_info->bucket_num;
- bool is_overflows = agg_info->is_overflows;
-
- if (is_overflows)
- overflow_chunk = std::move(chunk);
- else
- {
- if (chunks[bucket])
- throw Exception("SortingAggregatedTransform already got bucket with number " + toString(bucket),
- ErrorCodes::LOGICAL_ERROR);
-
- chunks[bucket] = std::move(chunk);
- last_bucket_number[from_input] = bucket;
- }
-}
-
-IProcessor::Status SortingAggregatedTransform::prepare()
-{
- /// Check can output.
- auto & output = outputs.front();
-
- if (output.isFinished())
- {
- for (auto & input : inputs)
- input.close();
-
- chunks.clear();
- last_bucket_number.clear();
- return Status::Finished;
- }
-
- /// Check can push (to avoid data caching).
- if (!output.canPush())
- {
- for (auto & input : inputs)
- input.setNotNeeded();
-
- return Status::PortFull;
- }
-
- /// Push if have min version.
- bool pushed_to_output = tryPushChunk();
-
- bool need_data = false;
- bool all_finished = true;
-
- /// Try read anything.
- auto in = inputs.begin();
- for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in)
- {
- if (in->isFinished())
- {
- is_input_finished[input_num] = true;
- continue;
- }
-
- //all_finished = false;
-
- in->setNeeded();
-
- if (!in->hasData())
- {
- need_data = true;
- all_finished = false;
- continue;
- }
-
- auto chunk = in->pull();
- addChunk(std::move(chunk), input_num);
-
- if (in->isFinished())
- {
- is_input_finished[input_num] = true;
- }
- else
- {
- /// If chunk was pulled, then we need data from this port.
- need_data = true;
- all_finished = false;
- }
- }
-
- if (pushed_to_output)
- return Status::PortFull;
-
- if (tryPushChunk())
- return Status::PortFull;
-
- if (need_data)
- return Status::NeedData;
-
- if (!all_finished)
- throw Exception("SortingAggregatedTransform has read bucket, but couldn't push it.",
- ErrorCodes::LOGICAL_ERROR);
-
- if (overflow_chunk)
- {
- output.push(std::move(overflow_chunk));
- return Status::PortFull;
- }
-
- output.finish();
- return Status::Finished;
-}
-
-
-void addMergingAggregatedMemoryEfficientTransform(
- Pipe & pipe,
- AggregatingTransformParamsPtr params,
- size_t num_merging_processors)
-{
- pipe.addTransform(std::make_shared<GroupingAggregatedTransform>(pipe.getHeader(), pipe.numOutputPorts(), params));
-
- if (num_merging_processors <= 1)
- {
- /// --> GroupingAggregated --> MergingAggregatedBucket -->
- pipe.addTransform(std::make_shared<MergingAggregatedBucketTransform>(params));
- return;
- }
-
- /// --> --> MergingAggregatedBucket -->
- /// --> GroupingAggregated --> ResizeProcessor --> MergingAggregatedBucket --> SortingAggregated -->
- /// --> --> MergingAggregatedBucket -->
-
- pipe.resize(num_merging_processors);
-
- pipe.addSimpleTransform([params](const Block &)
- {
- return std::make_shared<MergingAggregatedBucketTransform>(params);
- });
-
- pipe.addTransform(std::make_shared<SortingAggregatedTransform>(num_merging_processors, params));
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
deleted file mode 100644
index 7d1202f98a0..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
+++ /dev/null
@@ -1,149 +0,0 @@
-#pragma once
-#include <Processors/IProcessor.h>
-#include <Interpreters/Aggregator.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/Transforms/AggregatingTransform.h>
-#include <Processors/ResizeProcessor.h>
-
-
-namespace NDB
-{
-
-/** Pre-aggregates data from ports, holding in RAM only one or more (up to merging_threads) blocks from each source.
- * This saves RAM in case of using two-level aggregation, where in each source there will be up to 256 blocks with parts of the result.
- *
- * Aggregate functions in blocks should not be finalized so that their states can be combined.
- *
- * Used to solve two tasks:
- *
- * 1. External aggregation with data flush to disk.
- * Partially aggregated data (previously divided into 256 buckets) is flushed to some number of files on the disk.
- * We need to read them and merge them by buckets - keeping only a few buckets from each file in RAM simultaneously.
- *
- * 2. Merge aggregation results for distributed query processing.
- * Partially aggregated data arrives from different servers, which can be split down or not, into 256 buckets,
- * and these buckets are passed to us by the network from each server in sequence, one by one.
- * You should also read and merge by the buckets.
- *
- * The essence of the work:
- *
- * There are a number of sources. They give out blocks with partially aggregated data.
- * Each source can return one of the following block sequences:
- * 1. "unsplitted" block with bucket_num = -1;
- * 2. "split" (two_level) blocks with bucket_num from 0 to 255;
- * In both cases, there may also be a block of "overflows" with bucket_num = -1 and is_overflows = true;
- *
- * We start from the convention that split blocks are always passed in the order of bucket_num.
- * That is, if a < b, then the bucket_num = a block goes before bucket_num = b.
- * This is needed for a memory-efficient merge
- * - so that you do not need to read the blocks up front, but go all the way up by bucket_num.
- *
- * In this case, not all bucket_num from the range of 0..255 can be present.
- * The overflow block can be presented in any order relative to other blocks (but it can be only one).
- *
- * It is necessary to combine these sequences of blocks and return the result as a sequence with the same properties.
- * That is, at the output, if there are "split" blocks in the sequence, then they should go in the order of bucket_num.
- *
- * The merge can be performed using several (merging_threads) threads.
- * For this, receiving of a set of blocks for the next bucket_num should be done sequentially,
- * and then, when we have several received sets, they can be merged in parallel.
- *
- * When you receive next blocks from different sources,
- * data from sources can also be read in several threads (reading_threads)
- * for optimal performance in the presence of a fast network or disks (from where these blocks are read).
- */
-
-/// Has several inputs and single output.
-/// Read from inputs chunks with partially aggregated data, group them by bucket number
-/// and write data from single bucket as single chunk.
-class GroupingAggregatedTransform : public IProcessor
-{
-public:
- GroupingAggregatedTransform(const Block & header_, size_t num_inputs_, AggregatingTransformParamsPtr params_);
- String getName() const override { return "GroupingAggregatedTransform"; }
-
- /// Special setting: in case if single source can return several chunks with same bucket.
- void allowSeveralChunksForSingleBucketPerSource() { expect_several_chunks_for_single_bucket_per_source = true; }
-
-protected:
- Status prepare() override;
- void work() override;
-
-private:
- size_t num_inputs;
- AggregatingTransformParamsPtr params;
-
- std::vector<Int32> last_bucket_number; /// Last bucket read from each input.
- std::map<Int32, Chunks> chunks_map; /// bucket -> chunks
- Chunks overflow_chunks;
- Chunks single_level_chunks;
- Int32 current_bucket = 0; /// Currently processing bucket.
- Int32 next_bucket_to_push = 0; /// Always <= current_bucket.
- bool has_two_level = false;
-
- bool all_inputs_finished = false;
- bool read_from_all_inputs = false;
- std::vector<bool> read_from_input;
-
- bool expect_several_chunks_for_single_bucket_per_source = false;
-
- /// Add chunk read from input to chunks_map, overflow_chunks or single_level_chunks according to it's chunk info.
- void addChunk(Chunk chunk, size_t input);
- /// Read from all inputs first chunk. It is needed to detect if any source has two-level aggregation.
- void readFromAllInputs();
- /// Push chunks if all inputs has single level.
- bool tryPushSingleLevelData();
- /// Push chunks from ready bucket if has one.
- bool tryPushTwoLevelData();
- /// Push overflow chunks if has any.
- bool tryPushOverflowData();
- /// Push chunks from bucket to output port.
- void pushData(Chunks chunks, Int32 bucket, bool is_overflows);
-};
-
-/// Merge aggregated data from single bucket.
-class MergingAggregatedBucketTransform : public ISimpleTransform
-{
-public:
- explicit MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params);
- String getName() const override { return "MergingAggregatedBucketTransform"; }
-
-protected:
- void transform(Chunk & chunk) override;
-
-private:
- AggregatingTransformParamsPtr params;
-};
-
-/// Has several inputs and single output.
-/// Read from inputs merged bucket with aggregated data, sort them by bucket number and write to output.
-/// Presumption: inputs return chunks with increasing bucket number, there is at most one chunk per bucket.
-class SortingAggregatedTransform : public IProcessor
-{
-public:
- SortingAggregatedTransform(size_t num_inputs, AggregatingTransformParamsPtr params);
- String getName() const override { return "SortingAggregatedTransform"; }
- Status prepare() override;
-
-private:
- size_t num_inputs;
- AggregatingTransformParamsPtr params;
- std::vector<Int32> last_bucket_number;
- std::vector<bool> is_input_finished;
- std::map<Int32, Chunk> chunks;
- Chunk overflow_chunk;
-
- bool tryPushChunk();
- void addChunk(Chunk chunk, size_t from_input);
-};
-
-class Pipe;
-
-/// Adds processors to pipe which performs memory efficient merging of partially aggregated data from several sources.
-void addMergingAggregatedMemoryEfficientTransform(
- Pipe & pipe,
- AggregatingTransformParamsPtr params,
- size_t num_merging_processors);
-
-}
-
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ColumnDefault.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ColumnDefault.cpp
deleted file mode 100644
index 153247d3c9b..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ColumnDefault.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <Storages/ColumnDefault.h>
-#include <Parsers/queryToString.h>
-
-namespace
-{
-
-struct AliasNames
-{
- static constexpr const char * DEFAULT = "DEFAULT";
- static constexpr const char * MATERIALIZED = "MATERIALIZED";
- static constexpr const char * ALIAS = "ALIAS";
-};
-
-}
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
-}
-
-
-ColumnDefaultKind columnDefaultKindFromString(const std::string & str)
-{
- static const std::unordered_map<std::string, ColumnDefaultKind> map{
- { AliasNames::DEFAULT, ColumnDefaultKind::Default },
- { AliasNames::MATERIALIZED, ColumnDefaultKind::Materialized },
- { AliasNames::ALIAS, ColumnDefaultKind::Alias }
- };
-
- const auto it = map.find(str);
- if (it != std::end(map))
- return it->second;
-
- throw Exception{"Unknown column default specifier: " + str, ErrorCodes::LOGICAL_ERROR};
-}
-
-
-std::string toString(const ColumnDefaultKind kind)
-{
- static const std::unordered_map<ColumnDefaultKind, std::string> map{
- { ColumnDefaultKind::Default, AliasNames::DEFAULT },
- { ColumnDefaultKind::Materialized, AliasNames::MATERIALIZED },
- { ColumnDefaultKind::Alias, AliasNames::ALIAS }
- };
-
- const auto it = map.find(kind);
- if (it != std::end(map))
- return it->second;
-
- throw Exception{"Invalid ColumnDefaultKind", ErrorCodes::LOGICAL_ERROR};
-}
-
-
-bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs)
-{
- auto expression_str = [](const ASTPtr & expr) { return expr ? queryToString(expr) : String(); };
- return lhs.kind == rhs.kind && expression_str(lhs.expression) == expression_str(rhs.expression);
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ColumnsDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ColumnsDescription.cpp
deleted file mode 100644
index c58acdacf5e..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ColumnsDescription.cpp
+++ /dev/null
@@ -1,691 +0,0 @@
-#include <Storages/ColumnsDescription.h>
-
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/parseQuery.h>
-#include <Parsers/queryToString.h>
-#include <Parsers/ASTSubquery.h>
-#include <Parsers/ASTSelectQuery.h>
-#include <Parsers/ASTSelectWithUnionQuery.h>
-#include <IO/WriteBuffer.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadBuffer.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/ReadBufferFromString.h>
-#include <DataTypes/DataTypeFactory.h>
-#include <DataTypes/NestedUtils.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeNested.h>
-#include <Common/Exception.h>
-#include <Interpreters/Context.h>
-#include <Storages/IStorage.h>
-#include <Common/typeid_cast.h>
-#include <Core/Defines.h>
-#include <Compression/CompressionFactory.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/TreeRewriter.h>
-#include <Interpreters/ExpressionActions.h>
-
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int NO_SUCH_COLUMN_IN_TABLE;
- extern const int ILLEGAL_COLUMN;
- extern const int CANNOT_PARSE_TEXT;
- extern const int THERE_IS_NO_DEFAULT_VALUE;
- extern const int LOGICAL_ERROR;
-}
-
-ColumnDescription::ColumnDescription(String name_, DataTypePtr type_)
- : name(std::move(name_)), type(std::move(type_))
-{
-}
-
-bool ColumnDescription::operator==(const ColumnDescription & other) const
-{
- auto ast_to_str = [](const ASTPtr & ast) { return ast ? queryToString(ast) : String{}; };
-
- return name == other.name
- && type->equals(*other.type)
- && default_desc == other.default_desc
- && comment == other.comment
- && ast_to_str(codec) == ast_to_str(other.codec)
- && ast_to_str(ttl) == ast_to_str(other.ttl);
-}
-
-void ColumnDescription::writeText(WriteBuffer & buf) const
-{
- /// NOTE: Serialization format is insane.
-
- writeBackQuotedString(name, buf);
- writeChar(' ', buf);
- writeEscapedString(type->getName(), buf);
-
- if (default_desc.expression)
- {
- writeChar('\t', buf);
- DB::writeText(DB::toString(default_desc.kind), buf);
- writeChar('\t', buf);
- writeEscapedString(queryToString(default_desc.expression), buf);
- }
-
- if (!comment.empty())
- {
- writeChar('\t', buf);
- DB::writeText("COMMENT ", buf);
- writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf);
- }
-
- if (codec)
- {
- writeChar('\t', buf);
- writeEscapedString(queryToString(codec), buf);
- }
-
- if (ttl)
- {
- writeChar('\t', buf);
- DB::writeText("TTL ", buf);
- writeEscapedString(queryToString(ttl), buf);
- }
-
- writeChar('\n', buf);
-}
-
-void ColumnDescription::readText(ReadBuffer & buf)
-{
- readBackQuotedString(name, buf);
- assertChar(' ', buf);
-
- String type_string;
- readEscapedString(type_string, buf);
- type = DataTypeFactory::instance().get(type_string);
-
- if (checkChar('\t', buf))
- {
- String modifiers;
- readEscapedStringUntilEOL(modifiers, buf);
-
- ParserColumnDeclaration column_parser(/* require type */ true);
- ASTPtr ast = parseQuery(column_parser, "x T " + modifiers, "column parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-
- if (const auto * col_ast = ast->as<ASTColumnDeclaration>())
- {
- if (col_ast->default_expression)
- {
- default_desc.kind = columnDefaultKindFromString(col_ast->default_specifier);
- default_desc.expression = std::move(col_ast->default_expression);
- }
-
- if (col_ast->comment)
- comment = col_ast->comment->as<ASTLiteral &>().value.get<String>();
-
- if (col_ast->codec)
- codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true);
-
- if (col_ast->ttl)
- ttl = col_ast->ttl;
- }
- else
- throw Exception("Cannot parse column description", ErrorCodes::CANNOT_PARSE_TEXT);
- }
-}
-
-
-ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary)
-{
- for (auto & elem : ordinary)
- add(ColumnDescription(std::move(elem.name), std::move(elem.type)));
-}
-
-ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, NamesAndAliases aliases)
-{
- for (auto & elem : ordinary)
- add(ColumnDescription(std::move(elem.name), std::move(elem.type)));
-
- for (auto & alias : aliases)
- {
- ColumnDescription description(std::move(alias.name), std::move(alias.type));
- description.default_desc.kind = ColumnDefaultKind::Alias;
-
- const char * alias_expression_pos = alias.expression.data();
- const char * alias_expression_end = alias_expression_pos + alias.expression.size();
- ParserExpression expression_parser;
- description.default_desc.expression = parseQuery(expression_parser, alias_expression_pos, alias_expression_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-
- add(std::move(description));
- }
-}
-
-
-/// We are trying to find first column from end with name `column_name` or with a name beginning with `column_name` and ".".
-/// For example "fruits.bananas"
-/// names are considered the same if they completely match or `name_without_dot` matches the part of the name to the point
-static auto getNameRange(const ColumnsDescription::ColumnsContainer & columns, const String & name_without_dot)
-{
- String name_with_dot = name_without_dot + ".";
-
- auto begin = columns.begin();
- for (; begin != columns.end(); ++begin)
- {
- if (begin->name == name_without_dot)
- return std::make_pair(begin, std::next(begin));
-
- if (begin->name.starts_with(name_with_dot))
- break;
- }
-
- if (begin == columns.end())
- return std::make_pair(begin, begin);
-
- auto end = std::next(begin);
- for (; end != columns.end(); ++end)
- {
- if (!end->name.starts_with(name_with_dot))
- break;
- }
-
- return std::make_pair(begin, end);
-}
-
-void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first)
-{
- if (has(column.name))
- throw Exception("Cannot add column " + column.name + ": column with this name already exists",
- ErrorCodes::ILLEGAL_COLUMN);
-
- auto insert_it = columns.cend();
-
- if (first)
- insert_it = columns.cbegin();
- else if (!after_column.empty())
- {
- auto range = getNameRange(columns, after_column);
- if (range.first == range.second)
- throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after",
- ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
- insert_it = range.second;
- }
-
- addSubcolumns(column.name, column.type);
- columns.get<0>().insert(insert_it, std::move(column));
-}
-
-void ColumnsDescription::remove(const String & column_name)
-{
- auto range = getNameRange(columns, column_name);
- if (range.first == range.second)
- throw Exception("There is no column " + column_name + " in table.",
- ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
- for (auto list_it = range.first; list_it != range.second;)
- {
- removeSubcolumns(list_it->name);
- list_it = columns.get<0>().erase(list_it);
- }
-}
-
-void ColumnsDescription::rename(const String & column_from, const String & column_to)
-{
- auto it = columns.get<1>().find(column_from);
- if (it == columns.get<1>().end())
- throw Exception("Cannot find column " + column_from + " in ColumnsDescription", ErrorCodes::LOGICAL_ERROR);
-
- columns.get<1>().modify_key(it, [&column_to] (String & old_name)
- {
- old_name = column_to;
- });
-}
-
-void ColumnsDescription::modifyColumnOrder(const String & column_name, const String & after_column, bool first)
-{
- const auto & reorder_column = [&](auto get_new_pos)
- {
- auto column_range = getNameRange(columns, column_name);
-
- if (column_range.first == column_range.second)
- throw Exception("There is no column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
- std::vector<ColumnDescription> moving_columns;
- for (auto list_it = column_range.first; list_it != column_range.second;)
- {
- moving_columns.emplace_back(*list_it);
- list_it = columns.get<0>().erase(list_it);
- }
-
- columns.get<0>().insert(get_new_pos(), moving_columns.begin(), moving_columns.end());
- };
-
- if (first)
- reorder_column([&]() { return columns.cbegin(); });
- else if (!after_column.empty() && column_name != after_column)
- {
- /// Checked first
- auto range = getNameRange(columns, after_column);
- if (range.first == range.second)
- throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after",
- ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
- reorder_column([&]() { return getNameRange(columns, after_column).second; });
- }
-}
-
-void ColumnsDescription::flattenNested()
-{
- for (auto it = columns.begin(); it != columns.end();)
- {
- const auto * type_arr = typeid_cast<const DataTypeArray *>(it->type.get());
- if (!type_arr)
- {
- ++it;
- continue;
- }
-
- const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get());
- if (!type_tuple)
- {
- ++it;
- continue;
- }
-
- if (!type_tuple->haveExplicitNames())
- {
- ++it;
- continue;
- }
-
- ColumnDescription column = std::move(*it);
- removeSubcolumns(column.name);
- it = columns.get<0>().erase(it);
-
- const DataTypes & elements = type_tuple->getElements();
- const Strings & names = type_tuple->getElementNames();
- size_t tuple_size = elements.size();
-
- for (size_t i = 0; i < tuple_size; ++i)
- {
- auto nested_column = column;
- /// TODO: what to do with default expressions?
- nested_column.name = Nested::concatenateName(column.name, names[i]);
- nested_column.type = std::make_shared<DataTypeArray>(elements[i]);
-
- addSubcolumns(nested_column.name, nested_column.type);
- columns.get<0>().insert(it, std::move(nested_column));
- }
- }
-}
-
-
-NamesAndTypesList ColumnsDescription::getOrdinary() const
-{
- NamesAndTypesList ret;
- for (const auto & col : columns)
- if (col.default_desc.kind == ColumnDefaultKind::Default)
- ret.emplace_back(col.name, col.type);
- return ret;
-}
-
-NamesAndTypesList ColumnsDescription::getMaterialized() const
-{
- NamesAndTypesList ret;
- for (const auto & col : columns)
- if (col.default_desc.kind == ColumnDefaultKind::Materialized)
- ret.emplace_back(col.name, col.type);
- return ret;
-}
-
-NamesAndTypesList ColumnsDescription::getAliases() const
-{
- NamesAndTypesList ret;
- for (const auto & col : columns)
- if (col.default_desc.kind == ColumnDefaultKind::Alias)
- ret.emplace_back(col.name, col.type);
- return ret;
-}
-
-NamesAndTypesList ColumnsDescription::getAll() const
-{
- NamesAndTypesList ret;
- for (const auto & col : columns)
- ret.emplace_back(col.name, col.type);
- return ret;
-}
-
-bool ColumnsDescription::has(const String & column_name) const
-{
- return columns.get<1>().find(column_name) != columns.get<1>().end();
-}
-
-bool ColumnsDescription::hasNested(const String & column_name) const
-{
- auto range = getNameRange(columns, column_name);
- return range.first != range.second && range.first->name.length() > column_name.length();
-}
-
-bool ColumnsDescription::hasSubcolumn(const String & column_name) const
-{
- return subcolumns.get<0>().count(column_name);
-}
-
-const ColumnDescription & ColumnsDescription::get(const String & column_name) const
-{
- auto it = columns.get<1>().find(column_name);
- if (it == columns.get<1>().end())
- throw Exception("There is no column " + column_name + " in table.",
- ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
- return *it;
-}
-
-static ColumnsDescription::GetFlags defaultKindToGetFlag(ColumnDefaultKind kind)
-{
- switch (kind)
- {
- case ColumnDefaultKind::Default:
- return ColumnsDescription::Ordinary;
- case ColumnDefaultKind::Materialized:
- return ColumnsDescription::Materialized;
- case ColumnDefaultKind::Alias:
- return ColumnsDescription::Aliases;
- }
- __builtin_unreachable();
-}
-
-NamesAndTypesList ColumnsDescription::getByNames(GetFlags flags, const Names & names, bool with_subcolumns) const
-{
- NamesAndTypesList res;
- for (const auto & name : names)
- {
- if (auto it = columns.get<1>().find(name); it != columns.get<1>().end())
- {
- auto kind = defaultKindToGetFlag(it->default_desc.kind);
- if (flags & kind)
- {
- res.emplace_back(name, it->type);
- continue;
- }
- }
- else if (with_subcolumns)
- {
- auto jt = subcolumns.get<0>().find(name);
- if (jt != subcolumns.get<0>().end())
- {
- res.push_back(*jt);
- continue;
- }
- }
-
- throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", name);
- }
-
- return res;
-}
-
-
-NamesAndTypesList ColumnsDescription::getAllPhysical() const
-{
- NamesAndTypesList ret;
- for (const auto & col : columns)
- if (col.default_desc.kind != ColumnDefaultKind::Alias)
- ret.emplace_back(col.name, col.type);
- return ret;
-}
-
-Names ColumnsDescription::getNamesOfPhysical() const
-{
- Names ret;
- for (const auto & col : columns)
- if (col.default_desc.kind != ColumnDefaultKind::Alias)
- ret.emplace_back(col.name);
- return ret;
-}
-
-std::optional<NameAndTypePair> ColumnsDescription::tryGetColumnOrSubcolumn(GetFlags flags, const String & column_name) const
-{
- auto it = columns.get<1>().find(column_name);
- if (it != columns.get<1>().end() && (defaultKindToGetFlag(it->default_desc.kind) & flags))
- return NameAndTypePair(it->name, it->type);
-
- auto jt = subcolumns.get<0>().find(column_name);
- if (jt != subcolumns.get<0>().end())
- return *jt;
-
- return {};
-}
-
-NameAndTypePair ColumnsDescription::getColumnOrSubcolumn(GetFlags flags, const String & column_name) const
-{
- auto column = tryGetColumnOrSubcolumn(flags, column_name);
- if (!column)
- throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
- "There is no column or subcolumn {} in table.", column_name);
-
- return *column;
-}
-
-std::optional<NameAndTypePair> ColumnsDescription::tryGetPhysical(const String & column_name) const
-{
- auto it = columns.get<1>().find(column_name);
- if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias)
- return {};
-
- return NameAndTypePair(it->name, it->type);
-}
-
-NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) const
-{
- auto column = tryGetPhysical(column_name);
- if (!column)
- throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
- "There is no physical column {} in table.", column_name);
-
- return *column;
-}
-
-bool ColumnsDescription::hasPhysical(const String & column_name) const
-{
- auto it = columns.get<1>().find(column_name);
- return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias;
-}
-
-bool ColumnsDescription::hasColumnOrSubcolumn(GetFlags flags, const String & column_name) const
-{
- auto it = columns.get<1>().find(column_name);
- return (it != columns.get<1>().end()
- && (defaultKindToGetFlag(it->default_desc.kind) & flags))
- || hasSubcolumn(column_name);
-}
-
-void ColumnsDescription::addSubcolumnsToList(NamesAndTypesList & source_list) const
-{
- NamesAndTypesList subcolumns_list;
- for (const auto & col : source_list)
- {
- auto range = subcolumns.get<1>().equal_range(col.name);
- if (range.first != range.second)
- subcolumns_list.insert(subcolumns_list.end(), range.first, range.second);
- }
-
- source_list.splice(source_list.end(), std::move(subcolumns_list));
-}
-
-NamesAndTypesList ColumnsDescription::getAllWithSubcolumns() const
-{
- auto columns_list = getAll();
- addSubcolumnsToList(columns_list);
- return columns_list;
-}
-
-NamesAndTypesList ColumnsDescription::getAllPhysicalWithSubcolumns() const
-{
- auto columns_list = getAllPhysical();
- addSubcolumnsToList(columns_list);
- return columns_list;
-}
-
-bool ColumnsDescription::hasDefaults() const
-{
- for (const auto & column : columns)
- if (column.default_desc.expression)
- return true;
- return false;
-}
-
-ColumnDefaults ColumnsDescription::getDefaults() const
-{
- ColumnDefaults ret;
- for (const auto & column : columns)
- if (column.default_desc.expression)
- ret.emplace(column.name, column.default_desc);
-
- return ret;
-}
-
-bool ColumnsDescription::hasDefault(const String & column_name) const
-{
- auto it = columns.get<1>().find(column_name);
- return it != columns.get<1>().end() && it->default_desc.expression;
-}
-
-std::optional<ColumnDefault> ColumnsDescription::getDefault(const String & column_name) const
-{
- auto it = columns.get<1>().find(column_name);
- if (it != columns.get<1>().end() && it->default_desc.expression)
- return it->default_desc;
-
- return {};
-}
-
-
-bool ColumnsDescription::hasCompressionCodec(const String & column_name) const
-{
- const auto it = columns.get<1>().find(column_name);
-
- return it != columns.get<1>().end() && it->codec != nullptr;
-}
-
-CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
-{
- const auto it = columns.get<1>().find(column_name);
-
- if (it == columns.get<1>().end() || !it->codec)
- return default_codec;
-
- return CompressionCodecFactory::instance().get(it->codec, it->type, default_codec);
-}
-
-CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const
-{
- return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec());
-}
-
-ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
-{
- const auto it = columns.get<1>().find(column_name);
-
- if (it == columns.get<1>().end() || !it->codec)
- return default_codec->getFullCodecDesc();
-
- return it->codec;
-}
-
-ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const
-{
- ColumnTTLs ret;
- for (const auto & column : columns)
- if (column.ttl)
- ret.emplace(column.name, column.ttl);
- return ret;
-}
-
-
-String ColumnsDescription::toString() const
-{
- WriteBufferFromOwnString buf;
-
- writeCString("columns format version: 1\n", buf);
- DB::writeText(columns.size(), buf);
- writeCString(" columns:\n", buf);
-
- for (const ColumnDescription & column : columns)
- column.writeText(buf);
-
- return buf.str();
-}
-
-ColumnsDescription ColumnsDescription::parse(const String & str)
-{
- ReadBufferFromString buf{str};
-
- assertString("columns format version: 1\n", buf);
- size_t count{};
- readText(count, buf);
- assertString(" columns:\n", buf);
-
- ColumnsDescription result;
- for (size_t i = 0; i < count; ++i)
- {
- ColumnDescription column;
- column.readText(buf);
- buf.ignore(1); /// ignore new line
- result.add(column);
- }
-
- assertEOF(buf);
- return result;
-}
-
-void ColumnsDescription::addSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage)
-{
- for (const auto & subcolumn_name : type_in_storage->getSubcolumnNames())
- {
- auto subcolumn = NameAndTypePair(name_in_storage, subcolumn_name,
- type_in_storage, type_in_storage->getSubcolumnType(subcolumn_name));
-
- if (has(subcolumn.name))
- throw Exception(ErrorCodes::ILLEGAL_COLUMN,
- "Cannot add subcolumn {}: column with this name already exists", subcolumn.name);
-
- subcolumns.get<0>().insert(std::move(subcolumn));
- }
-}
-
-void ColumnsDescription::removeSubcolumns(const String & name_in_storage)
-{
- auto range = subcolumns.get<1>().equal_range(name_in_storage);
- if (range.first != range.second)
- subcolumns.get<1>().erase(range.first, range.second);
-}
-
-Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, ContextPtr context)
-{
- for (const auto & child : default_expr_list->children)
- if (child->as<ASTSelectQuery>() || child->as<ASTSelectWithUnionQuery>() || child->as<ASTSubquery>())
- throw Exception("Select query is not allowed in columns DEFAULT expression", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
-
- try
- {
- auto syntax_analyzer_result = TreeRewriter(context).analyze(default_expr_list, all_columns, {}, {}, false, /* allow_self_aliases = */ false);
- const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true);
- for (const auto & action : actions->getActions())
- if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN)
- throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
-
- return actions->getSampleBlock();
- }
- catch (Exception & ex)
- {
- ex.addMessage("default expression and column type are incompatible.");
- throw;
- }
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/IStorage.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/IStorage.cpp
deleted file mode 100644
index 9caea4d3fde..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/IStorage.cpp
+++ /dev/null
@@ -1,260 +0,0 @@
-#include <Storages/IStorage.h>
-
-#include <Common/quoteString.h>
-#include <IO/Operators.h>
-#include <IO/WriteBufferFromString.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/InterpreterSelectQuery.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Parsers/ASTSetQuery.h>
-#include <Processors/Pipe.h>
-#include <Processors/QueryPlan/ReadFromPreparedSource.h>
-#include <Storages/AlterCommands.h>
-
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int TABLE_IS_DROPPED;
- extern const int NOT_IMPLEMENTED;
- extern const int DEADLOCK_AVOIDED;
-}
-
-bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const
-{
- /// Virtual column maybe overridden by real column
- return !metadata_snapshot->getColumns().has(column_name) && getVirtuals().contains(column_name);
-}
-
-RWLockImpl::LockHolder IStorage::tryLockTimed(
- const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const
-{
- auto lock_holder = rwlock->getLock(type, query_id, acquire_timeout);
- if (!lock_holder)
- {
- const String type_str = type == RWLockImpl::Type::Read ? "READ" : "WRITE";
- throw Exception(
- type_str + " locking attempt on \"" + getStorageID().getFullTableName() + "\" has timed out! ("
- + std::to_string(acquire_timeout.count())
- + "ms) "
- "Possible deadlock avoided. Client should retry.",
- ErrorCodes::DEADLOCK_AVOIDED);
- }
- return lock_holder;
-}
-
-TableLockHolder IStorage::lockForShare(const String & query_id, const std::chrono::milliseconds & acquire_timeout)
-{
- TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout);
-
- if (is_dropped)
- {
- auto table_id = getStorageID();
- throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name);
- }
-
- return result;
-}
-
-TableLockHolder IStorage::lockForAlter(const String & query_id, const std::chrono::milliseconds & acquire_timeout)
-{
- TableLockHolder result = tryLockTimed(alter_lock, RWLockImpl::Write, query_id, acquire_timeout);
-
- if (is_dropped)
- throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED);
-
- return result;
-}
-
-
-TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, const std::chrono::milliseconds & acquire_timeout)
-{
- TableExclusiveLockHolder result;
- result.alter_lock = tryLockTimed(alter_lock, RWLockImpl::Write, query_id, acquire_timeout);
-
- if (is_dropped)
- throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED);
-
- result.drop_lock = tryLockTimed(drop_lock, RWLockImpl::Write, query_id, acquire_timeout);
-
- return result;
-}
-
-Pipe IStorage::read(
- const Names & /*column_names*/,
- const StorageMetadataPtr & /*metadata_snapshot*/,
- SelectQueryInfo & /*query_info*/,
- ContextPtr /*context*/,
- QueryProcessingStage::Enum /*processed_stage*/,
- size_t /*max_block_size*/,
- unsigned /*num_streams*/)
-{
- throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
-}
-
-void IStorage::read(
- QueryPlan & query_plan,
- const Names & column_names,
- const StorageMetadataPtr & metadata_snapshot,
- SelectQueryInfo & query_info,
- ContextPtr context,
- QueryProcessingStage::Enum processed_stage,
- size_t max_block_size,
- unsigned num_streams)
-{
- auto pipe = read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
- if (pipe.empty())
- {
- auto header = (query_info.projection ? query_info.projection->desc->metadata : metadata_snapshot)
- ->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID());
- InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, context);
- }
- else
- {
- auto read_step = std::make_unique<ReadFromStorageStep>(std::move(pipe), getName());
- query_plan.addStep(std::move(read_step));
- }
-}
-
-Pipe IStorage::alterPartition(
- const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, ContextPtr /* context */)
-{
- throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
-}
-
-void IStorage::alter(const AlterCommands & params, ContextPtr context, TableLockHolder &)
-{
- auto table_id = getStorageID();
- StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
- params.apply(new_metadata, context);
- DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata);
- setInMemoryMetadata(new_metadata);
-}
-
-
-void IStorage::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /* context */) const
-{
- for (const auto & command : commands)
- {
- if (!command.isCommentAlter())
- throw Exception(
- "Alter of type '" + alterTypeToString(command.type) + "' is not supported by storage " + getName(),
- ErrorCodes::NOT_IMPLEMENTED);
- }
-}
-
-void IStorage::checkMutationIsPossible(const MutationCommands & /*commands*/, const Settings & /*settings*/) const
-{
- throw Exception("Table engine " + getName() + " doesn't support mutations", ErrorCodes::NOT_IMPLEMENTED);
-}
-
-void IStorage::checkAlterPartitionIsPossible(
- const PartitionCommands & /*commands*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & /*settings*/) const
-{
- throw Exception("Table engine " + getName() + " doesn't support partitioning", ErrorCodes::NOT_IMPLEMENTED);
-}
-
-StorageID IStorage::getStorageID() const
-{
- std::lock_guard lock(id_mutex);
- return storage_id;
-}
-
-void IStorage::renameInMemory(const StorageID & new_table_id)
-{
- std::lock_guard lock(id_mutex);
- storage_id = new_table_id;
-}
-
-NamesAndTypesList IStorage::getVirtuals() const
-{
- return {};
-}
-
-Names IStorage::getAllRegisteredNames() const
-{
- Names result;
- auto getter = [](const auto & column) { return column.name; };
- const NamesAndTypesList & available_columns = getInMemoryMetadata().getColumns().getAllPhysical();
- std::transform(available_columns.begin(), available_columns.end(), std::back_inserter(result), getter);
- return result;
-}
-
-NameDependencies IStorage::getDependentViewsByColumn(ContextPtr context) const
-{
- NameDependencies name_deps;
- auto dependencies = DatabaseCatalog::instance().getDependencies(storage_id);
- for (const auto & depend_id : dependencies)
- {
- auto depend_table = DatabaseCatalog::instance().getTable(depend_id, context);
- if (depend_table->getInMemoryMetadataPtr()->select.inner_query)
- {
- const auto & select_query = depend_table->getInMemoryMetadataPtr()->select.inner_query;
- auto required_columns = InterpreterSelectQuery(select_query, context, SelectQueryOptions{}.noModify()).getRequiredColumns();
- for (const auto & col_name : required_columns)
- name_deps[col_name].push_back(depend_id.table_name);
- }
- }
- return name_deps;
-}
-
-bool IStorage::isReadOnly() const
-{
- auto storage_policy = getStoragePolicy();
- if (storage_policy)
- {
- for (const auto & disk : storage_policy->getDisks())
- if (!disk->isReadOnly())
- return false;
- return true;
- }
- return false;
-}
-
-BackupEntries IStorage::backup(const ASTs &, ContextPtr) const
-{
- throw Exception("Table engine " + getName() + " doesn't support backups", ErrorCodes::NOT_IMPLEMENTED);
-}
-
-RestoreDataTasks IStorage::restoreFromBackup(const BackupPtr &, const String &, const ASTs &, ContextMutablePtr)
-{
- throw Exception("Table engine " + getName() + " doesn't support restoring", ErrorCodes::NOT_IMPLEMENTED);
-}
-
-std::string PrewhereInfo::dump() const
-{
- WriteBufferFromOwnString ss;
- ss << "PrewhereDagInfo\n";
-
- if (alias_actions)
- {
- ss << "alias_actions " << alias_actions->dumpDAG() << "\n";
- }
-
- if (prewhere_actions)
- {
- ss << "prewhere_actions " << prewhere_actions->dumpDAG() << "\n";
- }
-
- ss << "remove_prewhere_column " << remove_prewhere_column
- << ", need_filter " << need_filter << "\n";
-
- return ss.str();
-}
-
-std::string FilterDAGInfo::dump() const
-{
- WriteBufferFromOwnString ss;
- ss << "FilterDAGInfo for column '" << column_name <<"', do_remove_column "
- << do_remove_column << "\n";
- if (actions)
- {
- ss << "actions " << actions->dumpDAG() << "\n";
- }
-
- return ss.str();
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/IndicesDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/IndicesDescription.cpp
deleted file mode 100644
index f537077b684..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/IndicesDescription.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/TreeRewriter.h>
-#include <Storages/IndicesDescription.h>
-
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTIndexDeclaration.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/parseQuery.h>
-#include <Storages/extractKeyExpressionList.h>
-
-#include <Core/Defines.h>
-
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int INCORRECT_QUERY;
- extern const int LOGICAL_ERROR;
-};
-
-IndexDescription::IndexDescription(const IndexDescription & other)
- : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
- , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr)
- , name(other.name)
- , type(other.type)
- , arguments(other.arguments)
- , column_names(other.column_names)
- , data_types(other.data_types)
- , sample_block(other.sample_block)
- , granularity(other.granularity)
-{
- if (other.expression)
- expression = other.expression->clone();
-}
-
-
-IndexDescription & IndexDescription::operator=(const IndexDescription & other)
-{
- if (&other == this)
- return *this;
-
- if (other.definition_ast)
- definition_ast = other.definition_ast->clone();
- else
- definition_ast.reset();
-
- if (other.expression_list_ast)
- expression_list_ast = other.expression_list_ast->clone();
- else
- expression_list_ast.reset();
-
- name = other.name;
- type = other.type;
-
- if (other.expression)
- expression = other.expression->clone();
- else
- expression.reset();
-
- arguments = other.arguments;
- column_names = other.column_names;
- data_types = other.data_types;
- sample_block = other.sample_block;
- granularity = other.granularity;
- return *this;
-}
-
-IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context)
-{
- const auto * index_definition = definition_ast->as<ASTIndexDeclaration>();
- if (!index_definition)
- throw Exception("Cannot create skip index from non ASTIndexDeclaration AST", ErrorCodes::LOGICAL_ERROR);
-
- if (index_definition->name.empty())
- throw Exception("Skip index must have name in definition.", ErrorCodes::INCORRECT_QUERY);
-
- if (!index_definition->type)
- throw Exception("TYPE is required for index", ErrorCodes::INCORRECT_QUERY);
-
- if (index_definition->type->parameters && !index_definition->type->parameters->children.empty())
- throw Exception("Index type cannot have parameters", ErrorCodes::INCORRECT_QUERY);
-
- IndexDescription result;
- result.definition_ast = index_definition->clone();
- result.name = index_definition->name;
- result.type = Poco::toLower(index_definition->type->name);
- result.granularity = index_definition->granularity;
-
- ASTPtr expr_list = extractKeyExpressionList(index_definition->expr->clone());
- result.expression_list_ast = expr_list->clone();
-
- auto syntax = TreeRewriter(context).analyze(expr_list, columns.getAllPhysical());
- result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true);
- Block block_without_columns = result.expression->getSampleBlock();
-
- for (size_t i = 0; i < block_without_columns.columns(); ++i)
- {
- const auto & column = block_without_columns.getByPosition(i);
- result.column_names.emplace_back(column.name);
- result.data_types.emplace_back(column.type);
- result.sample_block.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name));
- }
-
- const auto & definition_arguments = index_definition->type->arguments;
- if (definition_arguments)
- {
- for (size_t i = 0; i < definition_arguments->children.size(); ++i)
- {
- const auto * argument = definition_arguments->children[i]->as<ASTLiteral>();
- if (!argument)
- throw Exception("Only literals can be skip index arguments", ErrorCodes::INCORRECT_QUERY);
- result.arguments.emplace_back(argument->value);
- }
- }
-
- return result;
-}
-
-void IndexDescription::recalculateWithNewColumns(const ColumnsDescription & new_columns, ContextPtr context)
-{
- *this = getIndexFromAST(definition_ast, new_columns, context);
-}
-
-bool IndicesDescription::has(const String & name) const
-{
- for (const auto & index : *this)
- if (index.name == name)
- return true;
- return false;
-}
-
-String IndicesDescription::toString() const
-{
- if (empty())
- return {};
-
- ASTExpressionList list;
- for (const auto & index : *this)
- list.children.push_back(index.definition_ast);
-
- return serializeAST(list, true);
-}
-
-
-IndicesDescription IndicesDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context)
-{
- IndicesDescription result;
- if (str.empty())
- return result;
-
- ParserIndexDeclarationList parser;
- ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-
- for (const auto & index : list->children)
- result.emplace_back(IndexDescription::getIndexFromAST(index, columns, context));
-
- return result;
-}
-
-
-ExpressionActionsPtr IndicesDescription::getSingleExpressionForIndices(const ColumnsDescription & columns, ContextPtr context) const
-{
- ASTPtr combined_expr_list = std::make_shared<ASTExpressionList>();
- for (const auto & index : *this)
- for (const auto & index_expr : index.expression_list_ast->children)
- combined_expr_list->children.push_back(index_expr->clone());
-
- auto syntax_result = TreeRewriter(context).analyze(combined_expr_list, columns.getAllPhysical());
- return ExpressionAnalyzer(combined_expr_list, syntax_result, context).getActions(false);
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/KeyDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/KeyDescription.cpp
deleted file mode 100644
index f39be58dced..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/KeyDescription.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-#include <Storages/KeyDescription.h>
-
-#include <Functions/IFunction.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTFunction.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/TreeRewriter.h>
-#include <Storages/extractKeyExpressionList.h>
-#include <Common/quoteString.h>
-
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
- extern const int LOGICAL_ERROR;
- extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY;
-}
-
-KeyDescription::KeyDescription(const KeyDescription & other)
- : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
- , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr)
- , sample_block(other.sample_block)
- , column_names(other.column_names)
- , data_types(other.data_types)
- , additional_column(other.additional_column)
-{
- if (other.expression)
- expression = other.expression->clone();
-}
-
-KeyDescription & KeyDescription::operator=(const KeyDescription & other)
-{
- if (&other == this)
- return *this;
-
- if (other.definition_ast)
- definition_ast = other.definition_ast->clone();
- else
- definition_ast.reset();
-
- if (other.expression_list_ast)
- expression_list_ast = other.expression_list_ast->clone();
- else
- expression_list_ast.reset();
-
-
- if (other.expression)
- expression = other.expression->clone();
- else
- expression.reset();
-
- sample_block = other.sample_block;
- column_names = other.column_names;
- data_types = other.data_types;
-
- /// additional_column is constant property It should never be lost.
- if (additional_column.has_value() && !other.additional_column.has_value())
- throw Exception("Wrong key assignment, losing additional_column", ErrorCodes::LOGICAL_ERROR);
- additional_column = other.additional_column;
- return *this;
-}
-
-
-void KeyDescription::recalculateWithNewAST(
- const ASTPtr & new_ast,
- const ColumnsDescription & columns,
- ContextPtr context)
-{
- *this = getSortingKeyFromAST(new_ast, columns, context, additional_column);
-}
-
-void KeyDescription::recalculateWithNewColumns(
- const ColumnsDescription & new_columns,
- ContextPtr context)
-{
- *this = getSortingKeyFromAST(definition_ast, new_columns, context, additional_column);
-}
-
-KeyDescription KeyDescription::getKeyFromAST(
- const ASTPtr & definition_ast,
- const ColumnsDescription & columns,
- ContextPtr context)
-{
- return getSortingKeyFromAST(definition_ast, columns, context, {});
-}
-
-bool KeyDescription::moduloToModuloLegacyRecursive(ASTPtr node_expr)
-{
- if (!node_expr)
- return false;
-
- auto * function_expr = node_expr->as<ASTFunction>();
- bool modulo_in_ast = false;
- if (function_expr)
- {
- if (function_expr->name == "modulo")
- {
- function_expr->name = "moduloLegacy";
- modulo_in_ast = true;
- }
- if (function_expr->arguments)
- {
- auto children = function_expr->arguments->children;
- for (const auto & child : children)
- modulo_in_ast |= moduloToModuloLegacyRecursive(child);
- }
- }
- return modulo_in_ast;
-}
-
-KeyDescription KeyDescription::getSortingKeyFromAST(
- const ASTPtr & definition_ast,
- const ColumnsDescription & columns,
- ContextPtr context,
- const std::optional<String> & additional_column)
-{
- KeyDescription result;
- result.definition_ast = definition_ast;
- result.expression_list_ast = extractKeyExpressionList(definition_ast);
-
- if (additional_column)
- {
- result.additional_column = additional_column;
- ASTPtr column_identifier = std::make_shared<ASTIdentifier>(*additional_column);
- result.expression_list_ast->children.push_back(column_identifier);
- }
-
- const auto & children = result.expression_list_ast->children;
- for (const auto & child : children)
- result.column_names.emplace_back(child->getColumnName());
-
- {
- auto expr = result.expression_list_ast->clone();
- auto syntax_result = TreeRewriter(context).analyze(expr, columns.getAllPhysical());
- /// In expression we also need to store source columns
- result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(false);
- /// In sample block we use just key columns
- result.sample_block = ExpressionAnalyzer(expr, syntax_result, context).getActions(true)->getSampleBlock();
- }
-
- for (size_t i = 0; i < result.sample_block.columns(); ++i)
- {
- result.data_types.emplace_back(result.sample_block.getByPosition(i).type);
- if (!result.data_types.back()->isComparable())
- throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY,
- "Column {} with type {} is not allowed in key expression, it's not comparable",
- backQuote(result.sample_block.getByPosition(i).name), result.data_types.back()->getName());
- }
-
- return result;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp
deleted file mode 100644
index 537bb469da5..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include <vector>
-#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
-
-
-namespace NDB
-{
-
-std::vector<UUID> PartUUIDs::add(const std::vector<UUID> & new_uuids)
-{
- std::lock_guard lock(mutex);
- std::vector<UUID> intersection;
-
- /// First check any presence of uuids in a uuids, return duplicates back if any
- for (const auto & uuid : new_uuids)
- {
- if (uuids.find(uuid) != uuids.end())
- intersection.emplace_back(uuid);
- }
-
- if (intersection.empty())
- {
- for (const auto & uuid : new_uuids)
- uuids.emplace(uuid);
- }
- return intersection;
-}
-
-std::vector<UUID> PartUUIDs::get() const
-{
- std::lock_guard lock(mutex);
- return std::vector<UUID>(uuids.begin(), uuids.end());
-}
-
-bool PartUUIDs::has(const UUID & uuid) const
-{
- std::lock_guard lock(mutex);
- return uuids.find(uuid) != uuids.end();
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/MergeTree/MergeTreeDataPartUUID.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/MergeTree/MergeTreeDataPartUUID.h
deleted file mode 100644
index 9534284f973..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/MergeTree/MergeTreeDataPartUUID.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-
-#include <memory>
-#include <mutex>
-#include <unordered_set>
-#include <Core/UUID.h>
-
-namespace NDB
-{
-
-/** PartUUIDs is a uuid set to control query deduplication.
- * The object is used in query context in both direction:
- * Server->Client to send all parts' UUIDs that have been read during the query
- * Client->Server to ignored specified parts from being processed.
- *
- * Current implementation assumes a user setting allow_experimental_query_deduplication=1 is set.
- */
-struct PartUUIDs
-{
-public:
- /// Add new UUIDs if not duplicates found otherwise return duplicated UUIDs
- std::vector<UUID> add(const std::vector<UUID> & uuids);
- /// Get accumulated UUIDs
- std::vector<UUID> get() const;
- bool has(const UUID & uuid) const;
-
-private:
- mutable std::mutex mutex;
- std::unordered_set<UUID> uuids;
-};
-
-using PartUUIDsPtr = std::shared_ptr<PartUUIDs>;
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ProjectionsDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ProjectionsDescription.cpp
deleted file mode 100644
index 5170b459c7c..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/ProjectionsDescription.cpp
+++ /dev/null
@@ -1,370 +0,0 @@
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/TreeRewriter.h>
-#include <Storages/ProjectionsDescription.h>
-
-#include <Parsers/ASTProjectionDeclaration.h>
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/parseQuery.h>
-#include <Parsers/queryToString.h>
-
-#include <Core/Defines.h>
-#include <Interpreters/InterpreterSelectQuery.h>
-#include <Parsers/ASTProjectionSelectQuery.h>
-#include <Parsers/ASTSubquery.h>
-#include <Processors/Pipe.h>
-#include <Processors/Sources/SourceFromSingleChunk.h>
-
-#include <DataStreams/SquashingBlockInputStream.h>
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int INCORRECT_QUERY;
- extern const int NO_SUCH_PROJECTION_IN_TABLE;
- extern const int ILLEGAL_PROJECTION;
- extern const int NOT_IMPLEMENTED;
- extern const int LOGICAL_ERROR;
-};
-
-const char * ProjectionDescription::typeToString(Type type)
-{
- switch (type)
- {
- case Type::Normal:
- return "normal";
- case Type::Aggregate:
- return "aggregate";
- }
-
- __builtin_unreachable();
-}
-
-
-bool ProjectionDescription::isPrimaryKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const
-{
- const String column_name = node->getColumnName();
-
- for (const auto & key_name : metadata->getPrimaryKeyColumns())
- if (column_name == key_name)
- return true;
-
- if (const auto * func = node->as<ASTFunction>())
- if (func->arguments->children.size() == 1)
- return isPrimaryKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front());
-
- return false;
-}
-
-
-ProjectionDescription ProjectionDescription::clone() const
-{
- ProjectionDescription other;
- if (definition_ast)
- other.definition_ast = definition_ast->clone();
- if (query_ast)
- other.query_ast = query_ast->clone();
-
- other.name = name;
- other.type = type;
- other.required_columns = required_columns;
- other.column_names = column_names;
- other.data_types = data_types;
- other.sample_block = sample_block;
- other.sample_block_for_keys = sample_block_for_keys;
- other.metadata = metadata;
- other.key_size = key_size;
- other.is_minmax_count_projection = is_minmax_count_projection;
-
- return other;
-}
-
-ProjectionsDescription ProjectionsDescription::clone() const
-{
- ProjectionsDescription other;
- for (const auto & projection : projections)
- other.add(projection.clone());
-
- return other;
-}
-
-bool ProjectionDescription::operator==(const ProjectionDescription & other) const
-{
- return name == other.name && queryToString(definition_ast) == queryToString(other.definition_ast);
-}
-
-ProjectionDescription
-ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr query_context)
-{
- const auto * projection_definition = definition_ast->as<ASTProjectionDeclaration>();
-
- if (!projection_definition)
- throw Exception("Cannot create projection from non ASTProjectionDeclaration AST", ErrorCodes::INCORRECT_QUERY);
-
- if (projection_definition->name.empty())
- throw Exception("Projection must have name in definition.", ErrorCodes::INCORRECT_QUERY);
-
- if (projection_definition->name.starts_with("tmp_"))
- throw Exception("Projection's name cannot start with 'tmp_'", ErrorCodes::INCORRECT_QUERY);
-
- if (!projection_definition->query)
- throw Exception("QUERY is required for projection", ErrorCodes::INCORRECT_QUERY);
-
- ProjectionDescription result;
- result.definition_ast = projection_definition->clone();
- result.name = projection_definition->name;
-
- auto query = projection_definition->query->as<ASTProjectionSelectQuery &>();
- result.query_ast = query.cloneToASTSelect();
-
- auto external_storage_holder = std::make_shared<TemporaryTableHolder>(query_context, columns, ConstraintsDescription{});
- StoragePtr storage = external_storage_holder->getTable();
- InterpreterSelectQuery select(
- result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias());
-
- result.required_columns = select.getRequiredColumns();
- result.sample_block = select.getSampleBlock();
-
- const auto & analysis_result = select.getAnalysisResult();
- if (analysis_result.need_aggregate)
- {
- for (const auto & key : select.getQueryAnalyzer()->aggregationKeys())
- result.sample_block_for_keys.insert({nullptr, key.type, key.name});
- }
-
- for (size_t i = 0; i < result.sample_block.columns(); ++i)
- {
- const auto & column_with_type_name = result.sample_block.getByPosition(i);
-
- if (column_with_type_name.column && isColumnConst(*column_with_type_name.column))
- throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Projections cannot contain constant columns: {}", column_with_type_name.name);
-
- result.column_names.emplace_back(column_with_type_name.name);
- result.data_types.emplace_back(column_with_type_name.type);
- }
-
- StorageInMemoryMetadata metadata;
- metadata.setColumns(ColumnsDescription(result.sample_block.getNamesAndTypesList()));
- metadata.partition_key = KeyDescription::getSortingKeyFromAST({}, metadata.columns, query_context, {});
-
- const auto & query_select = result.query_ast->as<const ASTSelectQuery &>();
- if (select.hasAggregation())
- {
- result.type = ProjectionDescription::Type::Aggregate;
- if (const auto & group_expression_list = query_select.groupBy())
- {
- ASTPtr order_expression;
- if (group_expression_list->children.size() == 1)
- {
- result.key_size = 1;
- order_expression = std::make_shared<ASTIdentifier>(group_expression_list->children.front()->getColumnName());
- }
- else
- {
- auto function_node = std::make_shared<ASTFunction>();
- function_node->name = "tuple";
- function_node->arguments = group_expression_list->clone();
- result.key_size = function_node->arguments->children.size();
- for (auto & child : function_node->arguments->children)
- child = std::make_shared<ASTIdentifier>(child->getColumnName());
- function_node->children.push_back(function_node->arguments);
- order_expression = function_node;
- }
- metadata.sorting_key = KeyDescription::getSortingKeyFromAST(order_expression, metadata.columns, query_context, {});
- metadata.primary_key = KeyDescription::getKeyFromAST(order_expression, metadata.columns, query_context);
- }
- else
- {
- metadata.sorting_key = KeyDescription::getSortingKeyFromAST({}, metadata.columns, query_context, {});
- metadata.primary_key = KeyDescription::getKeyFromAST({}, metadata.columns, query_context);
- }
- if (query.orderBy())
- throw Exception(
- "When aggregation is used in projection, ORDER BY cannot be specified", ErrorCodes::ILLEGAL_PROJECTION);
- }
- else
- {
- result.type = ProjectionDescription::Type::Normal;
- metadata.sorting_key = KeyDescription::getSortingKeyFromAST(query.orderBy(), metadata.columns, query_context, {});
- metadata.primary_key = KeyDescription::getKeyFromAST(query.orderBy(), metadata.columns, query_context);
- }
- metadata.primary_key.definition_ast = nullptr;
- result.metadata = std::make_shared<StorageInMemoryMetadata>(metadata);
- return result;
-}
-
-ProjectionDescription
-ProjectionDescription::getMinMaxCountProjection(const ColumnsDescription & columns, const Names & minmax_columns, ContextPtr query_context)
-{
- auto select_query = std::make_shared<ASTProjectionSelectQuery>();
- ASTPtr select_expression_list = std::make_shared<ASTExpressionList>();
- for (const auto & column : minmax_columns)
- {
- select_expression_list->children.push_back(makeASTFunction("min", std::make_shared<ASTIdentifier>(column)));
- select_expression_list->children.push_back(makeASTFunction("max", std::make_shared<ASTIdentifier>(column)));
- }
- select_expression_list->children.push_back(makeASTFunction("count"));
- select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list));
-
- ProjectionDescription result;
- result.definition_ast = select_query;
- result.name = MINMAX_COUNT_PROJECTION_NAME;
- result.query_ast = select_query->cloneToASTSelect();
-
- auto external_storage_holder = std::make_shared<TemporaryTableHolder>(query_context, columns, ConstraintsDescription{});
- StoragePtr storage = external_storage_holder->getTable();
- InterpreterSelectQuery select(
- result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias());
- result.required_columns = select.getRequiredColumns();
- result.sample_block = select.getSampleBlock();
-
- for (size_t i = 0; i < result.sample_block.columns(); ++i)
- {
- const auto & column_with_type_name = result.sample_block.getByPosition(i);
-
- if (column_with_type_name.column && isColumnConst(*column_with_type_name.column))
- throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Projections cannot contain constant columns: {}", column_with_type_name.name);
-
- result.column_names.emplace_back(column_with_type_name.name);
- result.data_types.emplace_back(column_with_type_name.type);
- }
- result.type = ProjectionDescription::Type::Aggregate;
- StorageInMemoryMetadata metadata;
- metadata.setColumns(ColumnsDescription(result.sample_block.getNamesAndTypesList()));
- metadata.partition_key = KeyDescription::getSortingKeyFromAST({}, metadata.columns, query_context, {});
- metadata.sorting_key = KeyDescription::getSortingKeyFromAST({}, metadata.columns, query_context, {});
- metadata.primary_key = KeyDescription::getKeyFromAST({}, metadata.columns, query_context);
- metadata.primary_key.definition_ast = nullptr;
- result.metadata = std::make_shared<StorageInMemoryMetadata>(metadata);
- result.is_minmax_count_projection = true;
- return result;
-}
-
-
-void ProjectionDescription::recalculateWithNewColumns(const ColumnsDescription & new_columns, ContextPtr query_context)
-{
- *this = getProjectionFromAST(definition_ast, new_columns, query_context);
-}
-
-
-Block ProjectionDescription::calculate(const Block & block, ContextPtr context) const
-{
- auto in = InterpreterSelectQuery(
- query_ast,
- context,
- Pipe(std::make_shared<SourceFromSingleChunk>(block, Chunk(block.getColumns(), block.rows()))),
- SelectQueryOptions{
- type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns
- : QueryProcessingStage::WithMergeableState})
- .execute()
- .getInputStream();
- in = std::make_shared<SquashingBlockInputStream>(in, block.rows(), 0);
- in->readPrefix();
- auto ret = in->read();
- if (in->read())
- throw Exception("Projection cannot increase the number of rows in a block", ErrorCodes::LOGICAL_ERROR);
- in->readSuffix();
- return ret;
-}
-
-
-String ProjectionsDescription::toString() const
-{
- if (empty())
- return {};
-
- ASTExpressionList list;
- for (const auto & projection : projections)
- list.children.push_back(projection.definition_ast);
-
- return serializeAST(list, true);
-}
-
-ProjectionsDescription ProjectionsDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr query_context)
-{
- ProjectionsDescription result;
- if (str.empty())
- return result;
-
- ParserProjectionDeclarationList parser;
- ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-
- for (const auto & projection_ast : list->children)
- {
- auto projection = ProjectionDescription::getProjectionFromAST(projection_ast, columns, query_context);
- result.add(std::move(projection));
- }
-
- return result;
-}
-
-bool ProjectionsDescription::has(const String & projection_name) const
-{
- return map.count(projection_name) > 0;
-}
-
-const ProjectionDescription & ProjectionsDescription::get(const String & projection_name) const
-{
- auto it = map.find(projection_name);
- if (it == map.end())
- throw Exception("There is no projection " + projection_name + " in table", ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE);
-
- return *(it->second);
-}
-
-void ProjectionsDescription::add(ProjectionDescription && projection, const String & after_projection, bool first, bool if_not_exists)
-{
- if (has(projection.name))
- {
- if (if_not_exists)
- return;
- throw Exception(
- "Cannot add projection " + projection.name + ": projection with this name already exists", ErrorCodes::ILLEGAL_PROJECTION);
- }
-
- auto insert_it = projections.cend();
-
- if (first)
- insert_it = projections.cbegin();
- else if (!after_projection.empty())
- {
- auto it = std::find_if(projections.cbegin(), projections.cend(), [&after_projection](const auto & projection_)
- {
- return projection_.name == after_projection;
- });
- if (it != projections.cend())
- ++it;
- insert_it = it;
- }
-
- auto it = projections.insert(insert_it, std::move(projection));
- map[it->name] = it;
-}
-
-void ProjectionsDescription::remove(const String & projection_name, bool if_exists)
-{
- auto it = map.find(projection_name);
- if (it == map.end())
- {
- if (if_exists)
- return;
- throw Exception("There is no projection " + projection_name + " in table.", ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE);
- }
-
- projections.erase(it->second);
- map.erase(it);
-}
-
-ExpressionActionsPtr
-ProjectionsDescription::getSingleExpressionForProjections(const ColumnsDescription & columns, ContextPtr query_context) const
-{
- ASTPtr combined_expr_list = std::make_shared<ASTExpressionList>();
- for (const auto & projection : projections)
- for (const auto & projection_expr : projection.query_ast->children)
- combined_expr_list->children.push_back(projection_expr->clone());
-
- auto syntax_result = TreeRewriter(query_context).analyze(combined_expr_list, columns.getAllPhysical());
- return ExpressionAnalyzer(combined_expr_list, syntax_result, query_context).getActions(false);
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryDescription.cpp
deleted file mode 100644
index 78cd0cbbbdb..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryDescription.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-#include <Storages/SelectQueryDescription.h>
-
-#include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Parsers/ASTSelectQuery.h>
-#include <Interpreters/getTableExpressions.h>
-#include <Interpreters/AddDefaultDatabaseVisitor.h>
-#include <Interpreters/Context.h>
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
-extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW;
-extern const int LOGICAL_ERROR;
-}
-
-SelectQueryDescription::SelectQueryDescription(const SelectQueryDescription & other)
- : select_table_id(other.select_table_id)
- , select_query(other.select_query ? other.select_query->clone() : nullptr)
- , inner_query(other.inner_query ? other.inner_query->clone() : nullptr)
-{
-}
-
-SelectQueryDescription & SelectQueryDescription::SelectQueryDescription::operator=(const SelectQueryDescription & other)
-{
- if (&other == this)
- return *this;
-
- select_table_id = other.select_table_id;
- if (other.select_query)
- select_query = other.select_query->clone();
- else
- select_query.reset();
-
- if (other.inner_query)
- inner_query = other.inner_query->clone();
- else
- inner_query.reset();
- return *this;
-}
-
-
-namespace
-{
-
-StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, ContextPtr context, bool add_default_db = true)
-{
- if (add_default_db)
- {
- AddDefaultDatabaseVisitor visitor(context->getCurrentDatabase(), false, nullptr);
- visitor.visit(query);
- }
-
- if (auto db_and_table = getDatabaseAndTable(query, 0))
- {
- return StorageID(db_and_table->database, db_and_table->table/*, db_and_table->uuid*/);
- }
- else if (auto subquery = extractTableExpression(query, 0))
- {
- auto * ast_select = subquery->as<ASTSelectWithUnionQuery>();
- if (!ast_select)
- throw Exception("Logical error while creating StorageMaterializedView. "
- "Could not retrieve table name from select query.",
- DB::ErrorCodes::LOGICAL_ERROR);
- if (ast_select->list_of_selects->children.size() != 1)
- throw Exception("UNION is not supported for MATERIALIZED VIEW",
- ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW);
-
- auto & inner_query = ast_select->list_of_selects->children.at(0);
-
- return extractDependentTableFromSelectQuery(inner_query->as<ASTSelectQuery &>(), context, false);
- }
- else
- return StorageID::createEmpty();
-}
-
-
-void checkAllowedQueries(const ASTSelectQuery & query)
-{
- if (query.prewhere() || query.final() || query.sampleSize())
- throw Exception("MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW);
-
- ASTPtr subquery = extractTableExpression(query, 0);
- if (!subquery)
- return;
-
- if (const auto * ast_select = subquery->as<ASTSelectWithUnionQuery>())
- {
- if (ast_select->list_of_selects->children.size() != 1)
- throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW);
-
- const auto & inner_query = ast_select->list_of_selects->children.at(0);
-
- checkAllowedQueries(inner_query->as<ASTSelectQuery &>());
- }
-}
-
-}
-
-/// check if only one single select query in SelectWithUnionQuery
-static bool isSingleSelect(const ASTPtr & select, ASTPtr & res)
-{
- auto new_select = select->as<ASTSelectWithUnionQuery &>();
- if (new_select.list_of_selects->children.size() != 1)
- return false;
- auto & new_inner_query = new_select.list_of_selects->children.at(0);
- if (new_inner_query->as<ASTSelectQuery>())
- {
- res = new_inner_query;
- return true;
- }
- else
- return isSingleSelect(new_inner_query, res);
-}
-
-SelectQueryDescription SelectQueryDescription::getSelectQueryFromASTForMatView(const ASTPtr & select, ContextPtr context)
-{
- ASTPtr new_inner_query;
-
- if (!isSingleSelect(select, new_inner_query))
- throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW);
-
- auto & select_query = new_inner_query->as<ASTSelectQuery &>();
- checkAllowedQueries(select_query);
-
- SelectQueryDescription result;
- result.select_table_id = extractDependentTableFromSelectQuery(select_query, context);
- result.select_query = select->as<ASTSelectWithUnionQuery &>().clone();
- result.inner_query = new_inner_query->clone();
-
- return result;
-}
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h
deleted file mode 100644
index 0d69aedd522..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/SelectQueryInfo.h
+++ /dev/null
@@ -1,171 +0,0 @@
-#pragma once
-
-#include <Interpreters/PreparedSets.h>
-#include <Interpreters/DatabaseAndTableWithAlias.h>
-#include <Core/SortDescription.h>
-#include <Core/Names.h>
-#include <Storages/ProjectionsDescription.h>
-#include <Interpreters/AggregateDescription.h>
-
-#include <memory>
-
-namespace NDB
-{
-
-class ExpressionActions;
-using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
-
-class ActionsDAG;
-using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
-
-struct PrewhereInfo;
-using PrewhereInfoPtr = std::shared_ptr<PrewhereInfo>;
-
-struct FilterInfo;
-using FilterInfoPtr = std::shared_ptr<FilterInfo>;
-
-struct FilterDAGInfo;
-using FilterDAGInfoPtr = std::shared_ptr<FilterDAGInfo>;
-
-struct InputOrderInfo;
-using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
-
-struct TreeRewriterResult;
-using TreeRewriterResultPtr = std::shared_ptr<const TreeRewriterResult>;
-
-class ReadInOrderOptimizer;
-using ReadInOrderOptimizerPtr = std::shared_ptr<const ReadInOrderOptimizer>;
-
-class Cluster;
-using ClusterPtr = std::shared_ptr<Cluster>;
-
-struct MergeTreeDataSelectAnalysisResult;
-using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr<MergeTreeDataSelectAnalysisResult>;
-
-struct PrewhereInfo
-{
- /// Actions which are executed in order to alias columns are used for prewhere actions.
- ActionsDAGPtr alias_actions;
- /// Actions for row level security filter. Applied separately before prewhere_actions.
- /// This actions are separate because prewhere condition should not be executed over filtered rows.
- ActionsDAGPtr row_level_filter;
- /// Actions which are executed on block in order to get filter column for prewhere step.
- ActionsDAGPtr prewhere_actions;
- String row_level_column_name;
- String prewhere_column_name;
- bool remove_prewhere_column = false;
- bool need_filter = false;
-
- PrewhereInfo() = default;
- explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_)
- : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {}
-
- std::string dump() const;
-};
-
-/// Helper struct to store all the information about the filter expression.
-struct FilterInfo
-{
- ExpressionActionsPtr alias_actions;
- ExpressionActionsPtr actions;
- String column_name;
- bool do_remove_column = false;
-};
-
-/// Same as FilterInfo, but with ActionsDAG.
-struct FilterDAGInfo
-{
- ActionsDAGPtr actions;
- String column_name;
- bool do_remove_column = false;
-
- std::string dump() const;
-};
-
-struct InputOrderInfo
-{
- SortDescription order_key_prefix_descr;
- int direction;
- UInt64 limit;
-
- InputOrderInfo(const SortDescription & order_key_prefix_descr_, int direction_, UInt64 limit_)
- : order_key_prefix_descr(order_key_prefix_descr_), direction(direction_), limit(limit_) {}
-
- bool operator ==(const InputOrderInfo & other) const
- {
- return order_key_prefix_descr == other.order_key_prefix_descr && direction == other.direction;
- }
-
- bool operator !=(const InputOrderInfo & other) const { return !(*this == other); }
-};
-
-class IMergeTreeDataPart;
-
-using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
-
-// The projection selected to execute current query
-struct ProjectionCandidate
-{
- ProjectionDescriptionRawPtr desc{};
- PrewhereInfoPtr prewhere_info;
- ActionsDAGPtr before_where;
- String where_column_name;
- bool remove_where_filter = false;
- ActionsDAGPtr before_aggregation;
- Names required_columns;
- NamesAndTypesList aggregation_keys;
- AggregateDescriptions aggregate_descriptions;
- bool aggregate_overflow_row = false;
- bool aggregate_final = false;
- bool complete = false;
- ReadInOrderOptimizerPtr order_optimizer;
- InputOrderInfoPtr input_order_info;
- ManyExpressionActions group_by_elements_actions;
- MergeTreeDataSelectAnalysisResultPtr merge_tree_projection_select_result_ptr;
- MergeTreeDataSelectAnalysisResultPtr merge_tree_normal_select_result_ptr;
-};
-
-/** Query along with some additional data,
- * that can be used during query processing
- * inside storage engines.
- */
-struct SelectQueryInfo
-{
- ASTPtr query;
- ASTPtr view_query; /// Optimized VIEW query
-
- /// Cluster for the query.
- ClusterPtr cluster;
- /// Optimized cluster for the query.
- /// In case of optimize_skip_unused_shards it may differs from original cluster.
- ///
- /// Configured in StorageDistributed::getQueryProcessingStage()
- ClusterPtr optimized_cluster;
-
- TreeRewriterResultPtr syntax_analyzer_result;
-
- PrewhereInfoPtr prewhere_info;
-
- ReadInOrderOptimizerPtr order_optimizer;
- /// Can be modified while reading from storage
- InputOrderInfoPtr input_order_info;
-
- /// Prepared sets are used for indices by storage engine.
- /// Example: x IN (1, 2, 3)
- PreparedSets sets;
-
- /// Cached value of ExpressionAnalysisResult::has_window
- bool has_window = false;
-
- ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; }
-
- /// If not null, it means we choose a projection to execute current query.
- std::optional<ProjectionCandidate> projection;
- bool ignore_projections = false;
- bool is_projection_query = false;
- bool merge_tree_empty_result = false;
- Block minmax_count_projection_block;
- MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr;
-};
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/StorageInMemoryMetadata.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/StorageInMemoryMetadata.cpp
deleted file mode 100644
index f4ceb684785..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/StorageInMemoryMetadata.cpp
+++ /dev/null
@@ -1,682 +0,0 @@
-#include <Storages/StorageInMemoryMetadata.h>
-
-#include <Common/HashTable/HashMap.h>
-#include <Common/HashTable/HashSet.h>
-#include <Common/quoteString.h>
-#include <Core/ColumnWithTypeAndName.h>
-#include <DataTypes/DataTypeEnum.h>
-#include <IO/ReadBufferFromString.h>
-#include <IO/ReadHelpers.h>
-#include <IO/Operators.h>
-
-
-namespace NDB
-{
-namespace ErrorCodes
-{
- extern const int COLUMN_QUERIED_MORE_THAN_ONCE;
- extern const int DUPLICATE_COLUMN;
- extern const int EMPTY_LIST_OF_COLUMNS_QUERIED;
- extern const int NO_SUCH_COLUMN_IN_TABLE;
- extern const int NOT_FOUND_COLUMN_IN_BLOCK;
- extern const int TYPE_MISMATCH;
- extern const int EMPTY_LIST_OF_COLUMNS_PASSED;
-}
-
-StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & other)
- : columns(other.columns)
- , secondary_indices(other.secondary_indices)
- , constraints(other.constraints)
- , projections(other.projections.clone())
- , minmax_count_projection(
- other.minmax_count_projection ? std::optional<ProjectionDescription>(other.minmax_count_projection->clone()) : std::nullopt)
- , partition_key(other.partition_key)
- , primary_key(other.primary_key)
- , sorting_key(other.sorting_key)
- , sampling_key(other.sampling_key)
- , column_ttls_by_name(other.column_ttls_by_name)
- , table_ttl(other.table_ttl)
- , settings_changes(other.settings_changes ? other.settings_changes->clone() : nullptr)
- , select(other.select)
- , comment(other.comment)
-{
-}
-
-StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemoryMetadata & other)
-{
- if (&other == this)
- return *this;
-
- columns = other.columns;
- secondary_indices = other.secondary_indices;
- constraints = other.constraints;
- projections = other.projections.clone();
- if (other.minmax_count_projection)
- minmax_count_projection = other.minmax_count_projection->clone();
- else
- minmax_count_projection = std::nullopt;
- partition_key = other.partition_key;
- primary_key = other.primary_key;
- sorting_key = other.sorting_key;
- sampling_key = other.sampling_key;
- column_ttls_by_name = other.column_ttls_by_name;
- table_ttl = other.table_ttl;
- if (other.settings_changes)
- settings_changes = other.settings_changes->clone();
- else
- settings_changes.reset();
- select = other.select;
- comment = other.comment;
- return *this;
-}
-
-void StorageInMemoryMetadata::setComment(const String & comment_)
-{
- comment = comment_;
-}
-
-void StorageInMemoryMetadata::setColumns(ColumnsDescription columns_)
-{
- if (columns_.getAllPhysical().empty())
- throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
- columns = std::move(columns_);
-}
-
-void StorageInMemoryMetadata::setSecondaryIndices(IndicesDescription secondary_indices_)
-{
- secondary_indices = std::move(secondary_indices_);
-}
-
-void StorageInMemoryMetadata::setConstraints(ConstraintsDescription constraints_)
-{
- constraints = std::move(constraints_);
-}
-
-void StorageInMemoryMetadata::setProjections(ProjectionsDescription projections_)
-{
- projections = std::move(projections_);
-}
-
-void StorageInMemoryMetadata::setTableTTLs(const TTLTableDescription & table_ttl_)
-{
- table_ttl = table_ttl_;
-}
-
-void StorageInMemoryMetadata::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_)
-{
- column_ttls_by_name = column_ttls_by_name_;
-}
-
-void StorageInMemoryMetadata::setSettingsChanges(const ASTPtr & settings_changes_)
-{
- if (settings_changes_)
- settings_changes = settings_changes_;
- else
- settings_changes = nullptr;
-}
-
-void StorageInMemoryMetadata::setSelectQuery(const SelectQueryDescription & select_)
-{
- select = select_;
-}
-
-const ColumnsDescription & StorageInMemoryMetadata::getColumns() const
-{
- return columns;
-}
-
-const IndicesDescription & StorageInMemoryMetadata::getSecondaryIndices() const
-{
- return secondary_indices;
-}
-
-bool StorageInMemoryMetadata::hasSecondaryIndices() const
-{
- return !secondary_indices.empty();
-}
-
-const ConstraintsDescription & StorageInMemoryMetadata::getConstraints() const
-{
- return constraints;
-}
-
-const ProjectionsDescription & StorageInMemoryMetadata::getProjections() const
-{
- return projections;
-}
-
-bool StorageInMemoryMetadata::hasProjections() const
-{
- return !projections.empty();
-}
-
-TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const
-{
- return table_ttl;
-}
-
-bool StorageInMemoryMetadata::hasAnyTableTTL() const
-{
- return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL() || hasAnyRowsWhereTTL();
-}
-
-TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const
-{
- return column_ttls_by_name;
-}
-
-bool StorageInMemoryMetadata::hasAnyColumnTTL() const
-{
- return !column_ttls_by_name.empty();
-}
-
-TTLDescription StorageInMemoryMetadata::getRowsTTL() const
-{
- return table_ttl.rows_ttl;
-}
-
-bool StorageInMemoryMetadata::hasRowsTTL() const
-{
- return table_ttl.rows_ttl.expression != nullptr;
-}
-
-TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTLs() const
-{
- return table_ttl.rows_where_ttl;
-}
-
-bool StorageInMemoryMetadata::hasAnyRowsWhereTTL() const
-{
- return !table_ttl.rows_where_ttl.empty();
-}
-
-TTLDescriptions StorageInMemoryMetadata::getMoveTTLs() const
-{
- return table_ttl.move_ttl;
-}
-
-bool StorageInMemoryMetadata::hasAnyMoveTTL() const
-{
- return !table_ttl.move_ttl.empty();
-}
-
-TTLDescriptions StorageInMemoryMetadata::getRecompressionTTLs() const
-{
- return table_ttl.recompression_ttl;
-}
-
-bool StorageInMemoryMetadata::hasAnyRecompressionTTL() const
-{
- return !table_ttl.recompression_ttl.empty();
-}
-
-TTLDescriptions StorageInMemoryMetadata::getGroupByTTLs() const
-{
- return table_ttl.group_by_ttl;
-}
-
-bool StorageInMemoryMetadata::hasAnyGroupByTTL() const
-{
- return !table_ttl.group_by_ttl.empty();
-}
-
-ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns, bool include_ttl_target) const
-{
- if (updated_columns.empty())
- return {};
-
- ColumnDependencies res;
-
- NameSet indices_columns;
- NameSet projections_columns;
- NameSet required_ttl_columns;
- NameSet updated_ttl_columns;
-
- auto add_dependent_columns = [&updated_columns](const auto & expression, auto & to_set)
- {
- auto required_columns = expression->getRequiredColumns();
- for (const auto & dependency : required_columns)
- {
- if (updated_columns.count(dependency))
- {
- to_set.insert(required_columns.begin(), required_columns.end());
- return true;
- }
- }
-
- return false;
- };
-
- for (const auto & index : getSecondaryIndices())
- add_dependent_columns(index.expression, indices_columns);
-
- for (const auto & projection : getProjections())
- add_dependent_columns(&projection, projections_columns);
-
- if (hasRowsTTL())
- {
- auto rows_expression = getRowsTTL().expression;
- if (add_dependent_columns(rows_expression, required_ttl_columns) && include_ttl_target)
- {
- /// Filter all columns, if rows TTL expression have to be recalculated.
- for (const auto & column : getColumns().getAllPhysical())
- updated_ttl_columns.insert(column.name);
- }
- }
-
- for (const auto & entry : getRecompressionTTLs())
- add_dependent_columns(entry.expression, required_ttl_columns);
-
- for (const auto & [name, entry] : getColumnTTLs())
- {
- if (add_dependent_columns(entry.expression, required_ttl_columns) && include_ttl_target)
- updated_ttl_columns.insert(name);
- }
-
- for (const auto & entry : getMoveTTLs())
- add_dependent_columns(entry.expression, required_ttl_columns);
-
- //TODO what about rows_where_ttl and group_by_ttl ??
-
- for (const auto & column : indices_columns)
- res.emplace(column, ColumnDependency::SKIP_INDEX);
- for (const auto & column : projections_columns)
- res.emplace(column, ColumnDependency::PROJECTION);
- for (const auto & column : required_ttl_columns)
- res.emplace(column, ColumnDependency::TTL_EXPRESSION);
- for (const auto & column : updated_ttl_columns)
- res.emplace(column, ColumnDependency::TTL_TARGET);
-
- return res;
-
-}
-
-Block StorageInMemoryMetadata::getSampleBlockNonMaterialized() const
-{
- Block res;
-
- for (const auto & column : getColumns().getOrdinary())
- res.insert({column.type->createColumn(), column.type, column.name});
-
- return res;
-}
-
-Block StorageInMemoryMetadata::getSampleBlockWithVirtuals(const NamesAndTypesList & virtuals) const
-{
- auto res = getSampleBlock();
-
- /// Virtual columns must be appended after ordinary, because user can
- /// override them.
- for (const auto & column : virtuals)
- res.insert({column.type->createColumn(), column.type, column.name});
-
- return res;
-}
-
-Block StorageInMemoryMetadata::getSampleBlock() const
-{
- Block res;
-
- for (const auto & column : getColumns().getAllPhysical())
- res.insert({column.type->createColumn(), column.type, column.name});
-
- return res;
-}
-
-Block StorageInMemoryMetadata::getSampleBlockForColumns(
- const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const
-{
- Block res;
-
- HashMapWithSavedHash<StringRef, const DataTypePtr *, StringRefHash> virtuals_map;
-
- /// Virtual columns must be appended after ordinary, because user can
- /// override them.
- for (const auto & column : virtuals)
- virtuals_map[column.name] = &column.type;
-
- for (const auto & name : column_names)
- {
- auto column = getColumns().tryGetColumnOrSubcolumn(ColumnsDescription::All, name);
- if (column)
- {
- res.insert({column->type->createColumn(), column->type, column->name});
- }
- else if (auto * it = virtuals_map.find(name); it != virtuals_map.end())
- {
- const auto & type = *it->getMapped();
- res.insert({type->createColumn(), type, name});
- }
- else
- throw Exception(
- "Column " + backQuote(name) + " not found in table " + (storage_id.empty() ? "" : storage_id.getNameForLogs()),
- ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
- }
-
- return res;
-}
-
-const KeyDescription & StorageInMemoryMetadata::getPartitionKey() const
-{
- return partition_key;
-}
-
-bool StorageInMemoryMetadata::isPartitionKeyDefined() const
-{
- return partition_key.definition_ast != nullptr;
-}
-
-bool StorageInMemoryMetadata::hasPartitionKey() const
-{
- return !partition_key.column_names.empty();
-}
-
-Names StorageInMemoryMetadata::getColumnsRequiredForPartitionKey() const
-{
- if (hasPartitionKey())
- return partition_key.expression->getRequiredColumns();
- return {};
-}
-
-
-const KeyDescription & StorageInMemoryMetadata::getSortingKey() const
-{
- return sorting_key;
-}
-
-bool StorageInMemoryMetadata::isSortingKeyDefined() const
-{
- return sorting_key.definition_ast != nullptr;
-}
-
-bool StorageInMemoryMetadata::hasSortingKey() const
-{
- return !sorting_key.column_names.empty();
-}
-
-Names StorageInMemoryMetadata::getColumnsRequiredForSortingKey() const
-{
- if (hasSortingKey())
- return sorting_key.expression->getRequiredColumns();
- return {};
-}
-
-Names StorageInMemoryMetadata::getSortingKeyColumns() const
-{
- if (hasSortingKey())
- return sorting_key.column_names;
- return {};
-}
-
-const KeyDescription & StorageInMemoryMetadata::getSamplingKey() const
-{
- return sampling_key;
-}
-
-bool StorageInMemoryMetadata::isSamplingKeyDefined() const
-{
- return sampling_key.definition_ast != nullptr;
-}
-
-bool StorageInMemoryMetadata::hasSamplingKey() const
-{
- return !sampling_key.column_names.empty();
-}
-
-Names StorageInMemoryMetadata::getColumnsRequiredForSampling() const
-{
- if (hasSamplingKey())
- return sampling_key.expression->getRequiredColumns();
- return {};
-}
-
-const KeyDescription & StorageInMemoryMetadata::getPrimaryKey() const
-{
- return primary_key;
-}
-
-bool StorageInMemoryMetadata::isPrimaryKeyDefined() const
-{
- return primary_key.definition_ast != nullptr;
-}
-
-bool StorageInMemoryMetadata::hasPrimaryKey() const
-{
- return !primary_key.column_names.empty();
-}
-
-Names StorageInMemoryMetadata::getColumnsRequiredForPrimaryKey() const
-{
- if (hasPrimaryKey())
- return primary_key.expression->getRequiredColumns();
- return {};
-}
-
-Names StorageInMemoryMetadata::getPrimaryKeyColumns() const
-{
- if (!primary_key.column_names.empty())
- return primary_key.column_names;
- return {};
-}
-
-ASTPtr StorageInMemoryMetadata::getSettingsChanges() const
-{
- if (settings_changes)
- return settings_changes->clone();
- return nullptr;
-}
-const SelectQueryDescription & StorageInMemoryMetadata::getSelectQuery() const
-{
- return select;
-}
-
-bool StorageInMemoryMetadata::hasSelectQuery() const
-{
- return select.select_query != nullptr;
-}
-
-namespace
-{
- using NamesAndTypesMap = HashMapWithSavedHash<StringRef, const IDataType *, StringRefHash>;
- using UniqueStrings = HashSetWithSavedHash<StringRef, StringRefHash>;
-
- String listOfColumns(const NamesAndTypesList & available_columns)
- {
- WriteBufferFromOwnString ss;
- for (auto it = available_columns.begin(); it != available_columns.end(); ++it)
- {
- if (it != available_columns.begin())
- ss << ", ";
- ss << it->name;
- }
- return ss.str();
- }
-
- NamesAndTypesMap getColumnsMap(const NamesAndTypesList & columns)
- {
- NamesAndTypesMap res;
-
- for (const auto & column : columns)
- res.insert({column.name, column.type.get()});
-
- return res;
- }
-
- /*
- * This function checks compatibility of enums. It returns true if:
- * 1. Both types are enums.
- * 2. The first type can represent all possible values of the second one.
- * 3. Both types require the same amount of memory.
- */
- bool isCompatibleEnumTypes(const IDataType * lhs, const IDataType * rhs)
- {
- if (IDataTypeEnum const * enum_type = dynamic_cast<IDataTypeEnum const *>(lhs))
- {
- if (!enum_type->contains(*rhs))
- return false;
- return enum_type->getMaximumSizeOfValueInMemory() == rhs->getMaximumSizeOfValueInMemory();
- }
- return false;
- }
-}
-
-void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const
-{
- if (column_names.empty())
- {
- auto list_of_columns = listOfColumns(getColumns().getAllPhysicalWithSubcolumns());
- throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED,
- "Empty list of columns queried. There are columns: {}", list_of_columns);
- }
-
- const auto virtuals_map = getColumnsMap(virtuals);
- UniqueStrings unique_names;
-
- for (const auto & name : column_names)
- {
- bool has_column = getColumns().hasColumnOrSubcolumn(ColumnsDescription::AllPhysical, name)
- || virtuals_map.find(name) != nullptr;
-
- if (!has_column)
- {
- auto list_of_columns = listOfColumns(getColumns().getAllPhysicalWithSubcolumns());
- throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
- "There is no column with name {} in table {}. There are columns: {}",
- backQuote(name), storage_id.getNameForLogs(), list_of_columns);
- }
-
- if (unique_names.end() != unique_names.find(name))
- throw Exception(ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE, "Column {} queried more than once", name);
-
- unique_names.insert(name);
- }
-}
-
-void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns) const
-{
- const NamesAndTypesList & available_columns = getColumns().getAllPhysical();
- const auto columns_map = getColumnsMap(available_columns);
-
- UniqueStrings unique_names;
-
- for (const NameAndTypePair & column : provided_columns)
- {
- const auto * it = columns_map.find(column.name);
- if (columns_map.end() == it)
- throw Exception(
- ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
- "There is no column with name {}. There are columns: {}",
- column.name,
- listOfColumns(available_columns));
-
- const auto * available_type = it->getMapped();
- if (!column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get()))
- throw Exception(
- ErrorCodes::TYPE_MISMATCH,
- "Type mismatch for column {}. Column has type {}, got type {}",
- column.name,
- available_type->getName(),
- column.type->getName());
-
- if (unique_names.end() != unique_names.find(column.name))
- throw Exception(ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE,
- "Column {} queried more than once",
- column.name);
-
- unique_names.insert(column.name);
- }
-}
-
-void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, const Names & column_names) const
-{
- const NamesAndTypesList & available_columns = getColumns().getAllPhysical();
- const auto available_columns_map = getColumnsMap(available_columns);
- const auto & provided_columns_map = getColumnsMap(provided_columns);
-
- if (column_names.empty())
- throw Exception(
- "Empty list of columns queried. There are columns: " + listOfColumns(available_columns),
- ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED);
-
- UniqueStrings unique_names;
-
- for (const String & name : column_names)
- {
- const auto * it = provided_columns_map.find(name);
- if (provided_columns_map.end() == it)
- continue;
-
- const auto * jt = available_columns_map.find(name);
- if (available_columns_map.end() == jt)
- throw Exception(
- ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
- "There is no column with name {}. There are columns: {}",
- name,
- listOfColumns(available_columns));
-
- const auto * provided_column_type = it->getMapped();
- const auto * available_column_type = jt->getMapped();
-
- if (!provided_column_type->equals(*available_column_type) && !isCompatibleEnumTypes(available_column_type, provided_column_type))
- throw Exception(
- ErrorCodes::TYPE_MISMATCH,
- "Type mismatch for column {}. Column has type {}, got type {}",
- name,
- available_column_type->getName(),
- provided_column_type->getName());
-
- if (unique_names.end() != unique_names.find(name))
- throw Exception(ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE,
- "Column {} queried more than once",
- name);
-
- unique_names.insert(name);
- }
-}
-
-void StorageInMemoryMetadata::check(const Block & block, bool need_all) const
-{
- const NamesAndTypesList & available_columns = getColumns().getAllPhysical();
- const auto columns_map = getColumnsMap(available_columns);
-
- NameSet names_in_block;
-
- block.checkNumberOfRows();
-
- for (const auto & column : block)
- {
- if (names_in_block.count(column.name))
- throw Exception("Duplicate column " + column.name + " in block", ErrorCodes::DUPLICATE_COLUMN);
-
- names_in_block.insert(column.name);
-
- const auto * it = columns_map.find(column.name);
- if (columns_map.end() == it)
- throw Exception(
- ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
- "There is no column with name {}. There are columns: {}",
- column.name,
- listOfColumns(available_columns));
-
- const auto * available_type = it->getMapped();
- if (!column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get()))
- throw Exception(
- ErrorCodes::TYPE_MISMATCH,
- "Type mismatch for column {}. Column has type {}, got type {}",
- column.name,
- available_type->getName(),
- column.type->getName());
- }
-
- if (need_all && names_in_block.size() < columns_map.size())
- {
- for (const auto & available_column : available_columns)
- {
- if (!names_in_block.count(available_column.name))
- throw Exception("Expected column " + available_column.name, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
- }
- }
-}
-
-
-}
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/TTLDescription.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/TTLDescription.cpp
deleted file mode 100644
index ba8cdc57a6c..00000000000
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Storages/TTLDescription.cpp
+++ /dev/null
@@ -1,377 +0,0 @@
-#include <Storages/TTLDescription.h>
-
-#include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <Functions/IFunction.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/TreeRewriter.h>
-#include <Interpreters/InDepthNodeVisitor.h>
-#include <Interpreters/addTypeConversionToAST.h>
-#include <Parsers/ASTExpressionList.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTTTLElement.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTAssignment.h>
-#include <Parsers/ASTLiteral.h>
-#include <Storages/ColumnsDescription.h>
-#include <Interpreters/Context.h>
-
-#include <Parsers/queryToString.h>
-
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDateTime.h>
-
-
-namespace NDB
-{
-
-namespace ErrorCodes
-{
-extern const int BAD_ARGUMENTS;
-extern const int BAD_TTL_EXPRESSION;
-}
-
-
-TTLAggregateDescription::TTLAggregateDescription(const TTLAggregateDescription & other)
- : column_name(other.column_name)
- , expression_result_column_name(other.expression_result_column_name)
-{
- if (other.expression)
- expression = other.expression->clone();
-}
-
-TTLAggregateDescription & TTLAggregateDescription::operator=(const TTLAggregateDescription & other)
-{
- if (&other == this)
- return *this;
-
- column_name = other.column_name;
- expression_result_column_name = other.expression_result_column_name;
- if (other.expression)
- expression = other.expression->clone();
- else
- expression.reset();
- return *this;
-}
-
-namespace
-{
-
-void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const String & result_column_name)
-{
- for (const auto & action : ttl_expression->getActions())
- {
- if (action.node->type == ActionsDAG::ActionType::FUNCTION)
- {
- IFunctionBase & func = *action.node->function_base;
- if (!func.isDeterministic())
- throw Exception(
- "TTL expression cannot contain non-deterministic functions, "
- "but contains function "
- + func.getName(),
- ErrorCodes::BAD_ARGUMENTS);
- }
- }
-
- const auto & result_column = ttl_expression->getSampleBlock().getByName(result_column_name);
-
- if (!typeid_cast<const DataTypeDateTime *>(result_column.type.get())
- && !typeid_cast<const DataTypeDate *>(result_column.type.get()))
- {
- throw Exception(
- "TTL expression result column should have DateTime or Date type, but has " + result_column.type->getName(),
- ErrorCodes::BAD_TTL_EXPRESSION);
- }
-}
-
-class FindAggregateFunctionData
-{
-public:
- using TypeToVisit = ASTFunction;
- bool has_aggregate_function = false;
-
- void visit(const ASTFunction & func, ASTPtr &)
- {
- /// Do not throw if found aggregate function inside another aggregate function,
- /// because it will be checked, while creating expressions.
- if (AggregateFunctionFactory::instance().isAggregateFunctionName(func.name))
- has_aggregate_function = true;
- }
-};
-
-using FindAggregateFunctionFinderMatcher = OneTypeMatcher<FindAggregateFunctionData>;
-using FindAggregateFunctionVisitor = InDepthNodeVisitor<FindAggregateFunctionFinderMatcher, true>;
-
-}
-
-TTLDescription::TTLDescription(const TTLDescription & other)
- : mode(other.mode)
- , expression_ast(other.expression_ast ? other.expression_ast->clone() : nullptr)
- , result_column(other.result_column)
- , where_result_column(other.where_result_column)
- , group_by_keys(other.group_by_keys)
- , set_parts(other.set_parts)
- , aggregate_descriptions(other.aggregate_descriptions)
- , destination_type(other.destination_type)
- , destination_name(other.destination_name)
- , recompression_codec(other.recompression_codec)
-{
- if (other.expression)
- expression = other.expression->clone();
-
- if (other.where_expression)
- where_expression = other.where_expression->clone();
-}
-
-TTLDescription & TTLDescription::operator=(const TTLDescription & other)
-{
- if (&other == this)
- return *this;
-
- mode = other.mode;
- if (other.expression_ast)
- expression_ast = other.expression_ast->clone();
- else
- expression_ast.reset();
-
- if (other.expression)
- expression = other.expression->clone();
- else
- expression.reset();
-
- result_column = other.result_column;
- if (other.where_expression)
- where_expression = other.where_expression->clone();
- else
- where_expression.reset();
-
- where_result_column = other.where_result_column;
- group_by_keys = other.group_by_keys;
- set_parts = other.set_parts;
- aggregate_descriptions = other.aggregate_descriptions;
- destination_type = other.destination_type;
- destination_name = other.destination_name;
-
- if (other.recompression_codec)
- recompression_codec = other.recompression_codec->clone();
- else
- recompression_codec.reset();
-
- return * this;
-}
-
-TTLDescription TTLDescription::getTTLFromAST(
- const ASTPtr & definition_ast,
- const ColumnsDescription & columns,
- ContextPtr context,
- const KeyDescription & primary_key)
-{
- TTLDescription result;
- const auto * ttl_element = definition_ast->as<ASTTTLElement>();
-
- /// First child is expression: `TTL expr TO DISK`
- if (ttl_element != nullptr)
- result.expression_ast = ttl_element->children.front()->clone();
- else /// It's columns TTL without any additions, just copy it
- result.expression_ast = definition_ast->clone();
-
- auto ttl_ast = result.expression_ast->clone();
- auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
- result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
- result.result_column = ttl_ast->getColumnName();
-
- if (ttl_element == nullptr) /// columns TTL
- {
- result.destination_type = DataDestinationType::DELETE;
- result.mode = TTLMode::DELETE;
- }
- else /// rows TTL
- {
- result.destination_type = ttl_element->destination_type;
- result.destination_name = ttl_element->destination_name;
- result.mode = ttl_element->mode;
-
- if (ttl_element->mode == TTLMode::DELETE)
- {
- if (ASTPtr where_expr_ast = ttl_element->where())
- {
- auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical());
- result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
- result.where_result_column = where_expr_ast->getColumnName();
- }
- }
- else if (ttl_element->mode == TTLMode::GROUP_BY)
- {
- const auto & pk_columns = primary_key.column_names;
-
- if (ttl_element->group_by_key.size() > pk_columns.size())
- throw Exception("TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION);
-
- NameSet aggregation_columns_set;
- NameSet used_primary_key_columns_set;
-
- for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i)
- {
- if (ttl_element->group_by_key[i]->getColumnName() != pk_columns[i])
- throw Exception(
- "TTL Expression GROUP BY key should be a prefix of primary key",
- ErrorCodes::BAD_TTL_EXPRESSION);
-
- used_primary_key_columns_set.insert(pk_columns[i]);
- }
-
- std::vector<std::pair<String, ASTPtr>> aggregations;
- for (const auto & ast : ttl_element->group_by_assignments)
- {
- const auto assignment = ast->as<const ASTAssignment &>();
- auto expression = assignment.expression();
-
- FindAggregateFunctionVisitor::Data data{false};
- FindAggregateFunctionVisitor(data).visit(expression);
-
- if (!data.has_aggregate_function)
- throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
- "Invalid expression for assignment of column {}. Should contain an aggregate function", assignment.column_name);
-
- expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
- aggregations.emplace_back(assignment.column_name, std::move(expression));
- aggregation_columns_set.insert(assignment.column_name);
- }
-
- if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size())
- throw Exception(
- "Multiple aggregations set for one column in TTL Expression",
- ErrorCodes::BAD_TTL_EXPRESSION);
-
- result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size());
-
- const auto & primary_key_expressions = primary_key.expression_list_ast->children;
-
- /// Wrap with 'any' aggregate function primary key columns,
- /// which are not in 'GROUP BY' key and was not set explicitly.
- /// The separate step, because not all primary key columns are ordinary columns.
- for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i)
- {
- if (!aggregation_columns_set.count(pk_columns[i]))
- {
- ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone());
- aggregations.emplace_back(pk_columns[i], std::move(expr));
- aggregation_columns_set.insert(pk_columns[i]);
- }
- }
-
- /// Wrap with 'any' aggregate function other columns, which was not set explicitly.
- for (const auto & column : columns.getOrdinary())
- {
- if (!aggregation_columns_set.count(column.name) && !used_primary_key_columns_set.count(column.name))
- {
- ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column.name));
- aggregations.emplace_back(column.name, std::move(expr));
- }
- }
-
- for (auto [name, value] : aggregations)
- {
- auto syntax_result = TreeRewriter(context).analyze(value, columns.getAllPhysical(), {}, {}, true);
- auto expr_analyzer = ExpressionAnalyzer(value, syntax_result, context);
-
- TTLAggregateDescription set_part;
- set_part.column_name = name;
- set_part.expression_result_column_name = value->getColumnName();
- set_part.expression = expr_analyzer.getActions(false);
-
- result.set_parts.emplace_back(set_part);
-
- for (const auto & descr : expr_analyzer.getAnalyzedData().aggregate_descriptions)
- result.aggregate_descriptions.push_back(descr);
- }
- }
- else if (ttl_element->mode == TTLMode::RECOMPRESS)
- {
- result.recompression_codec =
- CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
- ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
- }
- }
-
- checkTTLExpression(result.expression, result.result_column);
- return result;
-}
-
-
-TTLTableDescription::TTLTableDescription(const TTLTableDescription & other)
- : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
- , rows_ttl(other.rows_ttl)
- , rows_where_ttl(other.rows_where_ttl)
- , move_ttl(other.move_ttl)
- , recompression_ttl(other.recompression_ttl)
- , group_by_ttl(other.group_by_ttl)
-{
-}
-
-TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription & other)
-{
- if (&other == this)
- return *this;
-
- if (other.definition_ast)
- definition_ast = other.definition_ast->clone();
- else
- definition_ast.reset();
-
- rows_ttl = other.rows_ttl;
- rows_where_ttl = other.rows_where_ttl;
- move_ttl = other.move_ttl;
- recompression_ttl = other.recompression_ttl;
- group_by_ttl = other.group_by_ttl;
-
- return *this;
-}
-
-TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
- const ASTPtr & definition_ast,
- const ColumnsDescription & columns,
- ContextPtr context,
- const KeyDescription & primary_key)
-{
- TTLTableDescription result;
- if (!definition_ast)
- return result;
-
- result.definition_ast = definition_ast->clone();
-
- bool have_unconditional_delete_ttl = false;
- for (const auto & ttl_element_ptr : definition_ast->children)
- {
- auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key);
- if (ttl.mode == TTLMode::DELETE)
- {
- if (!ttl.where_expression)
- {
- if (have_unconditional_delete_ttl)
- throw Exception("More than one DELETE TTL expression without WHERE expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
-
- have_unconditional_delete_ttl = true;
- result.rows_ttl = ttl;
- }
- else
- {
- result.rows_where_ttl.emplace_back(std::move(ttl));
- }
- }
- else if (ttl.mode == TTLMode::RECOMPRESS)
- {
- result.recompression_ttl.emplace_back(std::move(ttl));
- }
- else if (ttl.mode == TTLMode::GROUP_BY)
- {
- result.group_by_ttl.emplace_back(std::move(ttl));
- }
- else
- {
- result.move_ttl.emplace_back(std::move(ttl));
- }
- }
- return result;
-}
-
-}