aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorstanly <stanly@yandex-team.com>2023-04-05 16:33:13 +0300
committerstanly <stanly@yandex-team.com>2023-04-05 16:33:13 +0300
commit91b2e25f78a8c61578837d8cfa75acd1b427c4e8 (patch)
treef21be9abda56a74b46c9273b336beb8445b3e4d0
parent88ed8f01edda1f7214899c218c9455492dde0b9e (diff)
downloadydb-91b2e25f78a8c61578837d8cfa75acd1b427c4e8.tar.gz
Improve TPredicate
* move implementation details into cpp * reduce sizeof(TPredicate) from 32 bytes to 24 by ordering the fields
-rw-r--r--ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/tx/columnshard/engines/predicate.cpp68
-rw-r--r--ydb/core/tx/columnshard/engines/predicate.h79
6 files changed, 85 insertions, 66 deletions
diff --git a/ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt
index 6e27fb3e510..398cfd3d0cc 100644
--- a/ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/tx/columnshard/engines/CMakeLists.darwin-x86_64.txt
@@ -33,5 +33,6 @@ target_sources(tx-columnshard-engines PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/indexed_read_data.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/filter.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/portion_info.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/predicate.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/scalars.cpp
)
diff --git a/ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt
index 10c6eaf1a01..ef42c703321 100644
--- a/ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/tx/columnshard/engines/CMakeLists.linux-aarch64.txt
@@ -34,5 +34,6 @@ target_sources(tx-columnshard-engines PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/indexed_read_data.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/filter.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/portion_info.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/predicate.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/scalars.cpp
)
diff --git a/ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt
index 10c6eaf1a01..ef42c703321 100644
--- a/ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/tx/columnshard/engines/CMakeLists.linux-x86_64.txt
@@ -34,5 +34,6 @@ target_sources(tx-columnshard-engines PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/indexed_read_data.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/filter.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/portion_info.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/predicate.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/scalars.cpp
)
diff --git a/ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt
index 6e27fb3e510..398cfd3d0cc 100644
--- a/ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/tx/columnshard/engines/CMakeLists.windows-x86_64.txt
@@ -33,5 +33,6 @@ target_sources(tx-columnshard-engines PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/indexed_read_data.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/filter.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/portion_info.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/predicate.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/scalars.cpp
)
diff --git a/ydb/core/tx/columnshard/engines/predicate.cpp b/ydb/core/tx/columnshard/engines/predicate.cpp
new file mode 100644
index 00000000000..493bc06ae2a
--- /dev/null
+++ b/ydb/core/tx/columnshard/engines/predicate.cpp
@@ -0,0 +1,68 @@
+#include "predicate.h"
+
+#include <ydb/core/formats/arrow_helpers.h>
+#include <ydb/core/formats/switch_type.h>
+
+namespace NKikimr::NOlap {
+
+TPredicate::TPredicate(EOperation op, std::shared_ptr<arrow::RecordBatch> batch, bool inclusive) noexcept
+ : Batch(std::move(batch))
+ , Operation(op)
+ , Inclusive(inclusive)
+{}
+
+TPredicate::TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr<arrow::Schema>& schema, bool inclusive)
+ : Operation(op)
+ , Inclusive(inclusive)
+{
+ if (!serializedBatch.empty()) {
+ Batch = NArrow::DeserializeBatch(serializedBatch, schema);
+ Y_VERIFY(Batch);
+ }
+}
+
+TVector<TString> TPredicate::ColumnNames() const {
+ TVector<TString> out;
+ out.reserve(Batch->num_columns());
+ for (const auto& field : Batch->schema()->fields()) {
+ out.emplace_back(field->name());
+ }
+ return out;
+}
+
+IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) {
+ out << NSsa::GetFunctionName(pred.Operation);
+ if (pred.Inclusive) {
+ out << "(incl) ";
+ } else {
+ out << "(excl) ";
+ }
+
+ for (i32 i = 0; i < pred.Batch->num_columns(); ++i) {
+ auto array = pred.Batch->column(i);
+ out << pred.Batch->schema()->field(i)->name() << ": ";
+ NArrow::SwitchType(array->type_id(), [&](const auto& type) {
+ using TWrap = std::decay_t<decltype(type)>;
+ using TArray = typename arrow::TypeTraits<typename TWrap::T>::ArrayType;
+
+ auto& typedArray = static_cast<const TArray&>(*array);
+ if (typedArray.IsNull(0)) {
+ out << "NULL";
+ } else {
+ auto value = typedArray.GetView(0);
+ using T = std::decay_t<decltype(value)>;
+ if constexpr (std::is_same_v<T, arrow::util::string_view>) {
+ out << "'" << std::string_view(value.data(), value.size()) << "'";
+ } else {
+ out << "'" << value << "'";
+ }
+ }
+ return true;
+ });
+ out << " ";
+ }
+
+ return out;
+}
+
+} // namespace NKikimr::NOlap
diff --git a/ydb/core/tx/columnshard/engines/predicate.h b/ydb/core/tx/columnshard/engines/predicate.h
index 59e5e01eeaa..525009dfbee 100644
--- a/ydb/core/tx/columnshard/engines/predicate.h
+++ b/ydb/core/tx/columnshard/engines/predicate.h
@@ -2,90 +2,37 @@
#include "defs.h"
-#include <ydb/core/formats/arrow_helpers.h>
#include <ydb/core/formats/program.h>
-#include <ydb/core/formats/switch_type.h>
+
+#include <contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h>
namespace NKikimr::NOlap {
struct TPredicate {
using EOperation = NArrow::EOperation;
- EOperation Operation{EOperation::Unspecified};
std::shared_ptr<arrow::RecordBatch> Batch;
- bool Inclusive;
+ EOperation Operation{EOperation::Unspecified};
+ bool Inclusive{false};
- bool Empty() const { return Batch.get() == nullptr; }
+ bool Empty() const noexcept { return Batch.get() == nullptr; }
bool Good() const { return !Empty() && Batch->num_columns() && Batch->num_rows() == 1; }
- bool IsFrom() const { return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; }
- bool IsTo() const { return Operation == EOperation::Less || Operation == EOperation::LessEqual; }
+ bool IsFrom() const noexcept { return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; }
+ bool IsTo() const noexcept { return Operation == EOperation::Less || Operation == EOperation::LessEqual; }
- TVector<TString> ColumnNames() const {
- TVector<TString> out;
- out.reserve(Batch->num_columns());
- for (auto& field : Batch->schema()->fields()) {
- TString name(field->name().data(), field->name().size());
- out.emplace_back(name);
- }
- return out;
- }
+ TVector<TString> ColumnNames() const;
std::string ToString() const {
return Empty() ? "()" : Batch->schema()->ToString();
}
- TPredicate() = default;
+ constexpr TPredicate() noexcept = default;
- TPredicate(EOperation op, std::shared_ptr<arrow::RecordBatch> batch, bool inclusive = false)
- : Operation(op)
- , Batch(batch)
- , Inclusive(inclusive)
- {}
+ TPredicate(EOperation op, std::shared_ptr<arrow::RecordBatch> batch, bool inclusive = false) noexcept;
- TPredicate(EOperation op, TString serializedBatch, std::shared_ptr<arrow::Schema> schema, bool inclusive)
- : Operation(op)
- , Inclusive(inclusive)
- {
- if (!serializedBatch.empty()) {
- Batch = NArrow::DeserializeBatch(serializedBatch, schema);
- Y_VERIFY(Batch);
- }
- }
+ TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr<arrow::Schema>& schema, bool inclusive);
- friend IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) {
- out << NSsa::GetFunctionName(pred.Operation);
- if (pred.Inclusive) {
- out << "(incl) ";
- } else {
- out << "(excl) ";
- }
-
- for (i32 i = 0; i < pred.Batch->num_columns(); ++i) {
- auto array = pred.Batch->column(i);
- out << pred.Batch->schema()->field(i)->name() << ": ";
- NArrow::SwitchType(array->type_id(), [&](const auto& type) {
- using TWrap = std::decay_t<decltype(type)>;
- using TArray = typename arrow::TypeTraits<typename TWrap::T>::ArrayType;
-
- auto& typedArray = static_cast<const TArray&>(*array);
- if (typedArray.IsNull(0)) {
- out << "NULL";
- } else {
- auto value = typedArray.GetView(0);
- using T = std::decay_t<decltype(value)>;
- if constexpr (std::is_same_v<T, arrow::util::string_view>) {
- out << "'" << std::string(value.data(), value.size()) << "'";
- } else {
- out << "'" << value << "'";
- }
- }
- return true;
- });
- out << " ";
- }
-
- return out;
- }
+ friend IOutputStream& operator << (IOutputStream& out, const TPredicate& pred);
};
-}
+} // namespace NKikimr::NOlap