aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2023-07-25 13:14:34 +0300
committerroot <root@qavm-2ed34686.qemu>2023-07-25 13:14:34 +0300
commit22d9f8e8361fb30d4a132e3fe8536550b13d0628 (patch)
treed53e4250eb5c32f67bebbc058fda076c68c4c149
parent102ed5791ddc7a3c7859c5359bb5a209c6bb45dc (diff)
downloadydb-22d9f8e8361fb30d4a132e3fe8536550b13d0628.tar.gz
use TStoreReplaceKey for store special record in long term (automatically reduce memory consumption)
-rw-r--r--ydb/core/formats/arrow/CMakeLists.darwin-x86_64.txt5
-rw-r--r--ydb/core/formats/arrow/CMakeLists.linux-aarch64.txt5
-rw-r--r--ydb/core/formats/arrow/CMakeLists.linux-x86_64.txt5
-rw-r--r--ydb/core/formats/arrow/CMakeLists.windows-x86_64.txt5
-rw-r--r--ydb/core/formats/arrow/arrow_helpers.cpp10
-rw-r--r--ydb/core/formats/arrow/arrow_helpers.h6
-rw-r--r--ydb/core/formats/arrow/permutations.cpp1
-rw-r--r--ydb/core/formats/arrow/replace_key.cpp15
-rw-r--r--ydb/core/formats/arrow/replace_key.h20
-rw-r--r--ydb/core/formats/arrow/ya.make9
-rw-r--r--ydb/core/tx/columnshard/engines/changes/mark.h2
-rw-r--r--ydb/core/tx/columnshard/engines/changes/mark_granules.cpp2
-rw-r--r--ydb/core/tx/columnshard/engines/changes/split_compaction.cpp2
-rw-r--r--ydb/core/tx/columnshard/engines/granules_table.h3
-rw-r--r--ydb/core/tx/columnshard/engines/predicate/container.h1
-rw-r--r--ydb/core/tx/columnshard/engines/scheme/tier_info.h2
16 files changed, 58 insertions, 35 deletions
diff --git a/ydb/core/formats/arrow/CMakeLists.darwin-x86_64.txt b/ydb/core/formats/arrow/CMakeLists.darwin-x86_64.txt
index 80f3745ad37..a2b306eb306 100644
--- a/ydb/core/formats/arrow/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/formats/arrow/CMakeLists.darwin-x86_64.txt
@@ -39,13 +39,14 @@ target_link_libraries(core-formats-arrow PUBLIC
)
target_sources(core-formats-arrow PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_batch_builder.cpp
- ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_helpers.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_filter.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_helpers.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/converter.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/custom_registry.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/merging_sorted_input_stream.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/permutations.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/program.cpp
- ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ssa_program_optimizer.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/replace_key.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/size_calcer.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ssa_program_optimizer.cpp
)
diff --git a/ydb/core/formats/arrow/CMakeLists.linux-aarch64.txt b/ydb/core/formats/arrow/CMakeLists.linux-aarch64.txt
index 4b338876c23..9db7b6a92ae 100644
--- a/ydb/core/formats/arrow/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/formats/arrow/CMakeLists.linux-aarch64.txt
@@ -40,13 +40,14 @@ target_link_libraries(core-formats-arrow PUBLIC
)
target_sources(core-formats-arrow PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_batch_builder.cpp
- ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_helpers.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_filter.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_helpers.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/converter.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/custom_registry.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/merging_sorted_input_stream.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/permutations.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/program.cpp
- ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ssa_program_optimizer.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/replace_key.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/size_calcer.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ssa_program_optimizer.cpp
)
diff --git a/ydb/core/formats/arrow/CMakeLists.linux-x86_64.txt b/ydb/core/formats/arrow/CMakeLists.linux-x86_64.txt
index 4b338876c23..9db7b6a92ae 100644
--- a/ydb/core/formats/arrow/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/formats/arrow/CMakeLists.linux-x86_64.txt
@@ -40,13 +40,14 @@ target_link_libraries(core-formats-arrow PUBLIC
)
target_sources(core-formats-arrow PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_batch_builder.cpp
- ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_helpers.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_filter.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_helpers.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/converter.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/custom_registry.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/merging_sorted_input_stream.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/permutations.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/program.cpp
- ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ssa_program_optimizer.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/replace_key.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/size_calcer.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ssa_program_optimizer.cpp
)
diff --git a/ydb/core/formats/arrow/CMakeLists.windows-x86_64.txt b/ydb/core/formats/arrow/CMakeLists.windows-x86_64.txt
index 524a02842f8..ce82e9eac7e 100644
--- a/ydb/core/formats/arrow/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/formats/arrow/CMakeLists.windows-x86_64.txt
@@ -39,13 +39,14 @@ target_link_libraries(core-formats-arrow PUBLIC
)
target_sources(core-formats-arrow PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_batch_builder.cpp
- ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_helpers.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_filter.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/arrow_helpers.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/converter.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/custom_registry.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/merging_sorted_input_stream.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/permutations.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/program.cpp
- ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ssa_program_optimizer.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/replace_key.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/size_calcer.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ssa_program_optimizer.cpp
)
diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp
index b4837eae5d0..b757020f47f 100644
--- a/ydb/core/formats/arrow/arrow_helpers.cpp
+++ b/ydb/core/formats/arrow/arrow_helpers.cpp
@@ -501,16 +501,6 @@ std::vector<TString> ColumnNames(const std::shared_ptr<arrow::Schema>& schema) {
return out;
}
-size_t LowerBound(const std::vector<TRawReplaceKey>& batchKeys, const TReplaceKey& key, size_t offset) {
- Y_VERIFY(offset <= batchKeys.size());
- if (offset == batchKeys.size()) {
- return offset;
- }
- auto start = batchKeys.begin() + offset;
- auto it = std::lower_bound(start, batchKeys.end(), key.ToRaw());
- return it - batchKeys.begin();
-}
-
std::shared_ptr<arrow::UInt64Array> MakeUI64Array(ui64 value, i64 size) {
auto res = arrow::MakeArrayFromScalar(arrow::UInt64Scalar(value), size);
Y_VERIFY(res.ok());
diff --git a/ydb/core/formats/arrow/arrow_helpers.h b/ydb/core/formats/arrow/arrow_helpers.h
index 26a2a93b4c6..13e2893c152 100644
--- a/ydb/core/formats/arrow/arrow_helpers.h
+++ b/ydb/core/formats/arrow/arrow_helpers.h
@@ -1,7 +1,6 @@
#pragma once
#include "switch_type.h"
#include "size_calcer.h"
-#include "replace_key.h"
#include <ydb/core/formats/factory.h>
#include <ydb/core/scheme/scheme_tablecell.h>
#include <library/cpp/json/writer/json_value.h>
@@ -13,10 +12,6 @@
namespace NKikimr::NArrow {
using TArrayVec = std::vector<std::shared_ptr<arrow::Array>>;
-template<typename T>
-class TReplaceKeyTemplate;
-using TReplaceKey = TReplaceKeyTemplate<std::shared_ptr<TArrayVec>>;
-using TRawReplaceKey = TReplaceKeyTemplate<const TArrayVec*>;
std::shared_ptr<arrow::DataType> GetArrowType(NScheme::TTypeInfo typeInfo);
std::shared_ptr<arrow::DataType> GetCSVArrowType(NScheme::TTypeInfo typeId);
@@ -94,7 +89,6 @@ std::vector<std::shared_ptr<arrow::Array>> Finish(std::vector<std::unique_ptr<ar
std::shared_ptr<arrow::UInt64Array> MakeUI64Array(ui64 value, i64 size);
std::vector<TString> ColumnNames(const std::shared_ptr<arrow::Schema>& schema);
-size_t LowerBound(const std::vector<TRawReplaceKey>& batchKeys, const TReplaceKey& key, size_t offset = 0);
bool ReserveData(arrow::ArrayBuilder& builder, const size_t size);
bool MergeBatchColumns(const std::vector<std::shared_ptr<arrow::RecordBatch>>& batches, std::shared_ptr<arrow::RecordBatch>& result, const std::vector<std::string>& columnsOrder = {}, const bool orderFieldsAreNecessary = true);
diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp
index 51e188b5815..e1856b71a97 100644
--- a/ydb/core/formats/arrow/permutations.cpp
+++ b/ydb/core/formats/arrow/permutations.cpp
@@ -1,5 +1,6 @@
#include "arrow_helpers.h"
#include "permutations.h"
+#include "replace_key.h"
#include <ydb/core/formats/arrow/common/validation.h>
#include <contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h>
diff --git a/ydb/core/formats/arrow/replace_key.cpp b/ydb/core/formats/arrow/replace_key.cpp
new file mode 100644
index 00000000000..7a8ca9f1844
--- /dev/null
+++ b/ydb/core/formats/arrow/replace_key.cpp
@@ -0,0 +1,15 @@
+#include "replace_key.h"
+
+namespace NKikimr::NArrow {
+
+size_t TReplaceKeyHelper::LowerBound(const std::vector<TRawReplaceKey>& batchKeys, const TReplaceKey& key, size_t offset) {
+ Y_VERIFY(offset <= batchKeys.size());
+ if (offset == batchKeys.size()) {
+ return offset;
+ }
+ auto start = batchKeys.begin() + offset;
+ auto it = std::lower_bound(start, batchKeys.end(), key.ToRaw());
+ return it - batchKeys.begin();
+}
+
+}
diff --git a/ydb/core/formats/arrow/replace_key.h b/ydb/core/formats/arrow/replace_key.h
index 9e49a4a247b..fab522c7ab9 100644
--- a/ydb/core/formats/arrow/replace_key.h
+++ b/ydb/core/formats/arrow/replace_key.h
@@ -1,4 +1,5 @@
#pragma once
+#include "arrow_helpers.h"
#include "permutations.h"
#include "common/validation.h"
#include <ydb/core/base/defs.h>
@@ -8,8 +9,6 @@
namespace NKikimr::NArrow {
-bool IsGoodScalar(const std::shared_ptr<arrow::Scalar>& x);
-
using TArrayVec = std::vector<std::shared_ptr<arrow::Array>>;
template<typename TArrayVecPtr>
@@ -383,6 +382,22 @@ private:
using TReplaceKey = TReplaceKeyTemplate<std::shared_ptr<TArrayVec>>;
using TRawReplaceKey = TReplaceKeyTemplate<const TArrayVec*>;
+class TStoreReplaceKey: public TReplaceKey {
+private:
+ using TBase = TReplaceKey;
+public:
+ TStoreReplaceKey(const TReplaceKey& baseKey)
+ : TBase(baseKey)
+ {
+ TBase::ShrinkToFit();
+ }
+};
+
+class TReplaceKeyHelper {
+public:
+ static size_t LowerBound(const std::vector<TRawReplaceKey>& batchKeys, const TReplaceKey& key, size_t offset);
+};
+
}
template<>
@@ -398,3 +413,4 @@ struct THash<NKikimr::NArrow::TRawReplaceKey> {
return x.Hash();
}
};
+
diff --git a/ydb/core/formats/arrow/ya.make b/ydb/core/formats/arrow/ya.make
index f53171a5927..37000a29807 100644
--- a/ydb/core/formats/arrow/ya.make
+++ b/ydb/core/formats/arrow/ya.make
@@ -35,20 +35,21 @@ YQL_LAST_ABI_VERSION()
SRCS(
arrow_batch_builder.cpp
- arrow_helpers.cpp
arrow_filter.cpp
- converter.h
+ arrow_helpers.cpp
converter.cpp
+ converter.h
custom_registry.cpp
input_stream.h
merging_sorted_input_stream.cpp
merging_sorted_input_stream.h
one_batch_input_stream.h
permutations.cpp
- sort_cursor.h
program.cpp
- ssa_program_optimizer.cpp
+ replace_key.cpp
size_calcer.cpp
+ sort_cursor.h
+ ssa_program_optimizer.cpp
)
END()
diff --git a/ydb/core/tx/columnshard/engines/changes/mark.h b/ydb/core/tx/columnshard/engines/changes/mark.h
index d34260d65a8..c1aeeeaad62 100644
--- a/ydb/core/tx/columnshard/engines/changes/mark.h
+++ b/ydb/core/tx/columnshard/engines/changes/mark.h
@@ -1,6 +1,6 @@
#pragma once
-#include <ydb/core/formats/arrow/arrow_helpers.h>
+#include <ydb/core/formats/arrow/replace_key.h>
#include <contrib/libs/apache/arrow/cpp/src/arrow/scalar.h>
namespace NKikimr::NOlap {
diff --git a/ydb/core/tx/columnshard/engines/changes/mark_granules.cpp b/ydb/core/tx/columnshard/engines/changes/mark_granules.cpp
index 57ede5644a6..2ee755687f4 100644
--- a/ydb/core/tx/columnshard/engines/changes/mark_granules.cpp
+++ b/ydb/core/tx/columnshard/engines/changes/mark_granules.cpp
@@ -85,7 +85,7 @@ THashMap<ui64, std::shared_ptr<arrow::RecordBatch>> TMarksGranules::SliceIntoGra
// Just take the number of elements in the key column for the last granule.
? effKey->num_rows()
// Locate position of the next granule in the key.
- : NArrow::LowerBound(keys, granules[i + 1].first.GetBorder(), offset);
+ : NArrow::TReplaceKeyHelper::LowerBound(keys, granules[i + 1].first.GetBorder(), offset);
if (const i64 size = end - offset) {
Y_VERIFY(out.emplace(granules[i].second, batch->Slice(offset, size)).second);
diff --git a/ydb/core/tx/columnshard/engines/changes/split_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/split_compaction.cpp
index 6db2a5a988d..01118f3c379 100644
--- a/ydb/core/tx/columnshard/engines/changes/split_compaction.cpp
+++ b/ydb/core/tx/columnshard/engines/changes/split_compaction.cpp
@@ -232,7 +232,7 @@ std::vector<std::pair<NKikimr::NOlap::TMark, std::shared_ptr<arrow::RecordBatch>
batchOffsets.push_back(0);
for (const auto& border : borders) {
- int offset = NArrow::LowerBound(keys, border, batchOffsets.back());
+ int offset = NArrow::TReplaceKeyHelper::LowerBound(keys, border, batchOffsets.back());
Y_VERIFY(offset >= batchOffsets.back());
Y_VERIFY(offset <= batch->num_rows());
batchOffsets.push_back(offset);
diff --git a/ydb/core/tx/columnshard/engines/granules_table.h b/ydb/core/tx/columnshard/engines/granules_table.h
index b5561ac68df..5b333927cce 100644
--- a/ydb/core/tx/columnshard/engines/granules_table.h
+++ b/ydb/core/tx/columnshard/engines/granules_table.h
@@ -10,7 +10,7 @@ private:
public:
ui64 PathId;
ui64 Granule;
- NArrow::TReplaceKey Mark;
+ NArrow::TStoreReplaceKey Mark;
TGranuleRecord(ui64 pathId, ui64 granule, const TSnapshot& createdAt, const NArrow::TReplaceKey& mark)
: CreatedAt(createdAt)
@@ -18,7 +18,6 @@ public:
, Granule(granule)
, Mark(mark)
{
- Mark.ShrinkToFit();
Y_VERIFY(Mark.Size());
}
diff --git a/ydb/core/tx/columnshard/engines/predicate/container.h b/ydb/core/tx/columnshard/engines/predicate/container.h
index b43f6fbf3f7..22991579d86 100644
--- a/ydb/core/tx/columnshard/engines/predicate/container.h
+++ b/ydb/core/tx/columnshard/engines/predicate/container.h
@@ -1,6 +1,7 @@
#pragma once
#include "predicate.h"
#include <ydb/core/formats/arrow/arrow_filter.h>
+#include <ydb/core/formats/arrow/replace_key.h>
#include <ydb/library/accessor/accessor.h>
#include <contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h>
#include <optional>
diff --git a/ydb/core/tx/columnshard/engines/scheme/tier_info.h b/ydb/core/tx/columnshard/engines/scheme/tier_info.h
index b05a1d76557..2271e4c87c6 100644
--- a/ydb/core/tx/columnshard/engines/scheme/tier_info.h
+++ b/ydb/core/tx/columnshard/engines/scheme/tier_info.h
@@ -6,6 +6,8 @@
#include <ydb/core/formats/arrow/compression/object.h>
#include <ydb/core/tx/columnshard/common/scalars.h>
#include <contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h>
+#include <util/generic/set.h>
+#include <util/generic/hash_set.h>
namespace NKikimr::NOlap {