diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-06-16 16:47:27 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-06-16 16:47:27 +0300 |
commit | 135a41cd35053f49e5423def5bc862ee440fd80c (patch) | |
tree | ba3de9689543b3e809dbfa16eabfc56974858083 | |
parent | bd4f9582965e0c4ec57a1f6b317afa964c642f3a (diff) | |
download | ydb-135a41cd35053f49e5423def5bc862ee440fd80c.tar.gz |
separate hash logic with columns aggregation
-rw-r--r-- | ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt | 15 | ||||
-rw-r--r-- | ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt | 15 | ||||
-rw-r--r-- | ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt | 15 | ||||
-rw-r--r-- | ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt | 15 | ||||
-rw-r--r-- | ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp (renamed from ydb/services/ext_index/metadata/extractor/city.cpp) | 31 | ||||
-rw-r--r-- | ydb/services/ext_index/metadata/extractor/hash_by_columns.h (renamed from ydb/services/ext_index/metadata/extractor/city.h) | 12 | ||||
-rw-r--r-- | ydb/services/ext_index/metadata/extractor/ya.make | 3 |
7 files changed, 91 insertions, 15 deletions
diff --git a/ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt b/ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt index 213856d9fd..c2dc6359ad 100644 --- a/ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt +++ b/ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt @@ -6,6 +6,12 @@ # original buildsystem will not be accepted. +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) add_library(ext_index-metadata-extractor) target_compile_options(ext_index-metadata-extractor PRIVATE @@ -17,11 +23,17 @@ target_link_libraries(ext_index-metadata-extractor PUBLIC libs-apache-arrow ydb-core-protos core-tx-sharding + tools-enum_parser-enum_serialization_runtime ) target_sources(ext_index-metadata-extractor PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/abstract.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/container.cpp ) +generate_enum_serilization(ext_index-metadata-extractor + ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.h + INCLUDE_HEADERS + ydb/services/ext_index/metadata/extractor/hash_by_columns.h +) add_global_library_for(ext_index-metadata-extractor.global ext_index-metadata-extractor) target_compile_options(ext_index-metadata-extractor.global PRIVATE @@ -33,7 +45,8 @@ target_link_libraries(ext_index-metadata-extractor.global PUBLIC libs-apache-arrow ydb-core-protos core-tx-sharding + tools-enum_parser-enum_serialization_runtime ) target_sources(ext_index-metadata-extractor.global PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/city.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp ) diff --git a/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt b/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt index c2abc92f8b..5be9c05a49 100644 --- a/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt +++ b/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt @@ -6,6 +6,12 @@ # original buildsystem will not be accepted. +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) add_library(ext_index-metadata-extractor) target_compile_options(ext_index-metadata-extractor PRIVATE @@ -18,11 +24,17 @@ target_link_libraries(ext_index-metadata-extractor PUBLIC libs-apache-arrow ydb-core-protos core-tx-sharding + tools-enum_parser-enum_serialization_runtime ) target_sources(ext_index-metadata-extractor PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/abstract.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/container.cpp ) +generate_enum_serilization(ext_index-metadata-extractor + ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.h + INCLUDE_HEADERS + ydb/services/ext_index/metadata/extractor/hash_by_columns.h +) add_global_library_for(ext_index-metadata-extractor.global ext_index-metadata-extractor) target_compile_options(ext_index-metadata-extractor.global PRIVATE @@ -35,7 +47,8 @@ target_link_libraries(ext_index-metadata-extractor.global PUBLIC libs-apache-arrow ydb-core-protos core-tx-sharding + tools-enum_parser-enum_serialization_runtime ) target_sources(ext_index-metadata-extractor.global PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/city.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp ) diff --git a/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt b/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt index c2abc92f8b..5be9c05a49 100644 --- a/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt +++ b/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt @@ -6,6 +6,12 @@ # original buildsystem will not be accepted. +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) add_library(ext_index-metadata-extractor) target_compile_options(ext_index-metadata-extractor PRIVATE @@ -18,11 +24,17 @@ target_link_libraries(ext_index-metadata-extractor PUBLIC libs-apache-arrow ydb-core-protos core-tx-sharding + tools-enum_parser-enum_serialization_runtime ) target_sources(ext_index-metadata-extractor PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/abstract.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/container.cpp ) +generate_enum_serilization(ext_index-metadata-extractor + ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.h + INCLUDE_HEADERS + ydb/services/ext_index/metadata/extractor/hash_by_columns.h +) add_global_library_for(ext_index-metadata-extractor.global ext_index-metadata-extractor) target_compile_options(ext_index-metadata-extractor.global PRIVATE @@ -35,7 +47,8 @@ target_link_libraries(ext_index-metadata-extractor.global PUBLIC libs-apache-arrow ydb-core-protos core-tx-sharding + tools-enum_parser-enum_serialization_runtime ) target_sources(ext_index-metadata-extractor.global PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/city.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp ) diff --git a/ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt b/ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt index 213856d9fd..c2dc6359ad 100644 --- a/ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt +++ b/ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt @@ -6,6 +6,12 @@ # original buildsystem will not be accepted. +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) add_library(ext_index-metadata-extractor) target_compile_options(ext_index-metadata-extractor PRIVATE @@ -17,11 +23,17 @@ target_link_libraries(ext_index-metadata-extractor PUBLIC libs-apache-arrow ydb-core-protos core-tx-sharding + tools-enum_parser-enum_serialization_runtime ) target_sources(ext_index-metadata-extractor PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/abstract.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/container.cpp ) +generate_enum_serilization(ext_index-metadata-extractor + ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.h + INCLUDE_HEADERS + ydb/services/ext_index/metadata/extractor/hash_by_columns.h +) add_global_library_for(ext_index-metadata-extractor.global ext_index-metadata-extractor) target_compile_options(ext_index-metadata-extractor.global PRIVATE @@ -33,7 +45,8 @@ target_link_libraries(ext_index-metadata-extractor.global PUBLIC libs-apache-arrow ydb-core-protos core-tx-sharding + tools-enum_parser-enum_serialization_runtime ) target_sources(ext_index-metadata-extractor.global PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/city.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp ) diff --git a/ydb/services/ext_index/metadata/extractor/city.cpp b/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp index d9b5150871..d26aa85f7b 100644 --- a/ydb/services/ext_index/metadata/extractor/city.cpp +++ b/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp @@ -1,4 +1,4 @@ -#include "city.h" +#include "hash_by_columns.h" #include <ydb/core/protos/services.pb.h> #include <ydb/core/tx/sharding/sharding.h> #include <ydb/library/yql/utils/yql_panic.h> @@ -13,7 +13,8 @@ namespace NKikimr::NMetadata::NCSIndex { -TExtractorCityHash64::TFactory::TRegistrator<TExtractorCityHash64> TExtractorCityHash64::Registrator(TExtractorCityHash64::ClassName); +THashByColumns::TFactory::TRegistrator<THashByColumns> THashByColumns::Registrator(THashByColumns::ClassName); +THashByColumns::TFactory::TRegistrator<THashByColumns> THashByColumns::RegistratorDeprecated("city64"); template <class TArrayBuilder> class TArrayInserter { @@ -46,7 +47,7 @@ public: -std::vector<ui64> TExtractorCityHash64::DoExtractIndex(const std::shared_ptr<arrow::RecordBatch>& batch) const { +std::vector<ui64> THashByColumns::DoExtractIndex(const std::shared_ptr<arrow::RecordBatch>& batch) const { auto schema = batch->schema(); std::vector<std::shared_ptr<arrow::Field>> fields; std::vector<std::shared_ptr<arrow::Array>> columns; @@ -127,11 +128,16 @@ std::vector<ui64> TExtractorCityHash64::DoExtractIndex(const std::shared_ptr<arr return {}; } auto newBatch = arrow::RecordBatch::Make(*newSchema, batch->num_rows(), columns); - NSharding::THashSharding hashSharding(0, fieldIds); - return hashSharding.MakeHashes(newBatch); + if (HashType == EHashType::XX64) { + NSharding::THashSharding hashSharding(0, fieldIds); + return hashSharding.MakeHashes(newBatch); + } else { + ALS_ERROR(NKikimrServices::EXT_INDEX) << "undefined hash type: " << HashType; + return {}; + } } -bool TExtractorCityHash64::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { +bool THashByColumns::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { const NJson::TJsonValue::TArray* jsonFields; if (!jsonInfo["fields"].GetArrayPointer(&jsonFields)) { return false; @@ -146,11 +152,22 @@ bool TExtractorCityHash64::DoDeserializeFromJson(const NJson::TJsonValue& jsonIn if (Fields.size() == 0) { return false; } + + if (jsonInfo.Has("hash_type")) { + if (!jsonInfo["hash_type"].IsString()) { + return false; + } + if (!TryFromString(jsonInfo["hash_type"].GetString(), HashType)) { + return false; + } + } + return true; } -NJson::TJsonValue TExtractorCityHash64::DoSerializeToJson() const { +NJson::TJsonValue THashByColumns::DoSerializeToJson() const { NJson::TJsonValue result; + result.InsertValue("hash_type", ::ToString(HashType)); auto& jsonFields = result.InsertValue("fields", NJson::JSON_ARRAY); for (auto&& i : Fields) { jsonFields.AppendValue(i.SerializeToJson()); diff --git a/ydb/services/ext_index/metadata/extractor/city.h b/ydb/services/ext_index/metadata/extractor/hash_by_columns.h index 99011b0a92..e8747f9288 100644 --- a/ydb/services/ext_index/metadata/extractor/city.h +++ b/ydb/services/ext_index/metadata/extractor/hash_by_columns.h @@ -35,16 +35,22 @@ public: } }; -class TExtractorCityHash64: public IIndexExtractor { +class THashByColumns: public IIndexExtractor { +public: + enum class EHashType { + XX64 /* "xx64" */ + }; private: YDB_READONLY_DEF(std::vector<TExtractorField>, Fields); - static TFactory::TRegistrator<TExtractorCityHash64> Registrator; + YDB_READONLY(EHashType, HashType, EHashType::XX64); + static TFactory::TRegistrator<THashByColumns> Registrator; + static TFactory::TRegistrator<THashByColumns> RegistratorDeprecated; protected: virtual std::vector<ui64> DoExtractIndex(const std::shared_ptr<arrow::RecordBatch>& batch) const override; virtual bool DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) override; virtual NJson::TJsonValue DoSerializeToJson() const override; public: - static inline TString ClassName = "city64"; + static inline TString ClassName = "hash_by_columns"; virtual TString GetClassName() const override { return ClassName; diff --git a/ydb/services/ext_index/metadata/extractor/ya.make b/ydb/services/ext_index/metadata/extractor/ya.make index 7d673a81f7..83ab0eeb16 100644 --- a/ydb/services/ext_index/metadata/extractor/ya.make +++ b/ydb/services/ext_index/metadata/extractor/ya.make @@ -2,7 +2,7 @@ LIBRARY() SRCS( abstract.cpp - GLOBAL city.cpp + GLOBAL hash_by_columns.cpp container.cpp ) @@ -13,5 +13,6 @@ PEERDIR( ) YQL_LAST_ABI_VERSION() +GENERATE_ENUM_SERIALIZATION(hash_by_columns.h) END() |