diff options
author | stanly <stanly@yandex-team.com> | 2023-04-05 16:33:13 +0300 |
---|---|---|
committer | stanly <stanly@yandex-team.com> | 2023-04-05 16:33:13 +0300 |
commit | 91b2e25f78a8c61578837d8cfa75acd1b427c4e8 (patch) | |
tree | f21be9abda56a74b46c9273b336beb8445b3e4d0 | |
parent | 88ed8f01edda1f7214899c218c9455492dde0b9e (diff) | |
download | ydb-91b2e25f78a8c61578837d8cfa75acd1b427c4e8.tar.gz |
Improve TPredicate
* move implementation details into cpp
* reduce sizeof(TPredicate) from 32 bytes to 24 by ordering the fields
6 files changed, 85 insertions, 66 deletions
diff --git a/ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt index 6e27fb3e510..398cfd3d0cc 100644 --- a/ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt @@ -33,5 +33,6 @@ target_sources(tx-columnshard-engines PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/indexed_read_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/filter.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/portion_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/predicate.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/scalars.cpp ) diff --git a/ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt index 10c6eaf1a01..ef42c703321 100644 --- a/ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt @@ -34,5 +34,6 @@ target_sources(tx-columnshard-engines PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/indexed_read_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/filter.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/portion_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/predicate.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/scalars.cpp ) diff --git a/ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt index 10c6eaf1a01..ef42c703321 100644 --- a/ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt @@ -34,5 +34,6 @@ target_sources(tx-columnshard-engines PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/indexed_read_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/filter.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/portion_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/predicate.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/scalars.cpp ) diff --git a/ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt index 6e27fb3e510..398cfd3d0cc 100644 --- a/ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt @@ -33,5 +33,6 @@ target_sources(tx-columnshard-engines PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/indexed_read_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/filter.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/portion_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/predicate.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/scalars.cpp ) diff --git a/ydb/core/tx/columnshard/engines/predicate.cpp b/ydb/core/tx/columnshard/engines/predicate.cpp new file mode 100644 index 00000000000..493bc06ae2a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/predicate.cpp @@ -0,0 +1,68 @@ +#include "predicate.h" + +#include <ydb/core/formats/arrow_helpers.h> +#include <ydb/core/formats/switch_type.h> + +namespace NKikimr::NOlap { + +TPredicate::TPredicate(EOperation op, std::shared_ptr<arrow::RecordBatch> batch, bool inclusive) noexcept + : Batch(std::move(batch)) + , Operation(op) + , Inclusive(inclusive) +{} + +TPredicate::TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr<arrow::Schema>& schema, bool inclusive) + : Operation(op) + , Inclusive(inclusive) +{ + if (!serializedBatch.empty()) { + Batch = NArrow::DeserializeBatch(serializedBatch, schema); + Y_VERIFY(Batch); + } +} + +TVector<TString> TPredicate::ColumnNames() const { + TVector<TString> out; + out.reserve(Batch->num_columns()); + for (const auto& field : Batch->schema()->fields()) { + out.emplace_back(field->name()); + } + return out; +} + +IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { + out << NSsa::GetFunctionName(pred.Operation); + if (pred.Inclusive) { + out << "(incl) "; + } else { + out << "(excl) "; + } + + for (i32 i = 0; i < pred.Batch->num_columns(); ++i) { + auto array = pred.Batch->column(i); + out << pred.Batch->schema()->field(i)->name() << ": "; + NArrow::SwitchType(array->type_id(), [&](const auto& type) { + using TWrap = std::decay_t<decltype(type)>; + using TArray = typename arrow::TypeTraits<typename TWrap::T>::ArrayType; + + auto& typedArray = static_cast<const TArray&>(*array); + if (typedArray.IsNull(0)) { + out << "NULL"; + } else { + auto value = typedArray.GetView(0); + using T = std::decay_t<decltype(value)>; + if constexpr (std::is_same_v<T, arrow::util::string_view>) { + out << "'" << std::string_view(value.data(), value.size()) << "'"; + } else { + out << "'" << value << "'"; + } + } + return true; + }); + out << " "; + } + + return out; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate.h b/ydb/core/tx/columnshard/engines/predicate.h index 59e5e01eeaa..525009dfbee 100644 --- a/ydb/core/tx/columnshard/engines/predicate.h +++ b/ydb/core/tx/columnshard/engines/predicate.h @@ -2,90 +2,37 @@ #include "defs.h" -#include <ydb/core/formats/arrow_helpers.h> #include <ydb/core/formats/program.h> -#include <ydb/core/formats/switch_type.h> + +#include <contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h> namespace NKikimr::NOlap { struct TPredicate { using EOperation = NArrow::EOperation; - EOperation Operation{EOperation::Unspecified}; std::shared_ptr<arrow::RecordBatch> Batch; - bool Inclusive; + EOperation Operation{EOperation::Unspecified}; + bool Inclusive{false}; - bool Empty() const { return Batch.get() == nullptr; } + bool Empty() const noexcept { return Batch.get() == nullptr; } bool Good() const { return !Empty() && Batch->num_columns() && Batch->num_rows() == 1; } - bool IsFrom() const { return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; } - bool IsTo() const { return Operation == EOperation::Less || Operation == EOperation::LessEqual; } + bool IsFrom() const noexcept { return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; } + bool IsTo() const noexcept { return Operation == EOperation::Less || Operation == EOperation::LessEqual; } - TVector<TString> ColumnNames() const { - TVector<TString> out; - out.reserve(Batch->num_columns()); - for (auto& field : Batch->schema()->fields()) { - TString name(field->name().data(), field->name().size()); - out.emplace_back(name); - } - return out; - } + TVector<TString> ColumnNames() const; std::string ToString() const { return Empty() ? "()" : Batch->schema()->ToString(); } - TPredicate() = default; + constexpr TPredicate() noexcept = default; - TPredicate(EOperation op, std::shared_ptr<arrow::RecordBatch> batch, bool inclusive = false) - : Operation(op) - , Batch(batch) - , Inclusive(inclusive) - {} + TPredicate(EOperation op, std::shared_ptr<arrow::RecordBatch> batch, bool inclusive = false) noexcept; - TPredicate(EOperation op, TString serializedBatch, std::shared_ptr<arrow::Schema> schema, bool inclusive) - : Operation(op) - , Inclusive(inclusive) - { - if (!serializedBatch.empty()) { - Batch = NArrow::DeserializeBatch(serializedBatch, schema); - Y_VERIFY(Batch); - } - } + TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr<arrow::Schema>& schema, bool inclusive); - friend IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { - out << NSsa::GetFunctionName(pred.Operation); - if (pred.Inclusive) { - out << "(incl) "; - } else { - out << "(excl) "; - } - - for (i32 i = 0; i < pred.Batch->num_columns(); ++i) { - auto array = pred.Batch->column(i); - out << pred.Batch->schema()->field(i)->name() << ": "; - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t<decltype(type)>; - using TArray = typename arrow::TypeTraits<typename TWrap::T>::ArrayType; - - auto& typedArray = static_cast<const TArray&>(*array); - if (typedArray.IsNull(0)) { - out << "NULL"; - } else { - auto value = typedArray.GetView(0); - using T = std::decay_t<decltype(value)>; - if constexpr (std::is_same_v<T, arrow::util::string_view>) { - out << "'" << std::string(value.data(), value.size()) << "'"; - } else { - out << "'" << value << "'"; - } - } - return true; - }); - out << " "; - } - - return out; - } + friend IOutputStream& operator << (IOutputStream& out, const TPredicate& pred); }; -} +} // namespace NKikimr::NOlap |