aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2023-06-16 16:47:27 +0300
committerivanmorozov <ivanmorozov@yandex-team.com>2023-06-16 16:47:27 +0300
commit135a41cd35053f49e5423def5bc862ee440fd80c (patch)
treeba3de9689543b3e809dbfa16eabfc56974858083
parentbd4f9582965e0c4ec57a1f6b317afa964c642f3a (diff)
downloadydb-135a41cd35053f49e5423def5bc862ee440fd80c.tar.gz
separate hash logic with columns aggregation
-rw-r--r--ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt15
-rw-r--r--ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt15
-rw-r--r--ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt15
-rw-r--r--ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt15
-rw-r--r--ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp (renamed from ydb/services/ext_index/metadata/extractor/city.cpp)31
-rw-r--r--ydb/services/ext_index/metadata/extractor/hash_by_columns.h (renamed from ydb/services/ext_index/metadata/extractor/city.h)12
-rw-r--r--ydb/services/ext_index/metadata/extractor/ya.make3
7 files changed, 91 insertions, 15 deletions
diff --git a/ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt b/ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt
index 213856d9fd..c2dc6359ad 100644
--- a/ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt
+++ b/ydb/services/ext_index/metadata/extractor/CMakeLists.darwin-x86_64.txt
@@ -6,6 +6,12 @@
# original buildsystem will not be accepted.
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
add_library(ext_index-metadata-extractor)
target_compile_options(ext_index-metadata-extractor PRIVATE
@@ -17,11 +23,17 @@ target_link_libraries(ext_index-metadata-extractor PUBLIC
libs-apache-arrow
ydb-core-protos
core-tx-sharding
+ tools-enum_parser-enum_serialization_runtime
)
target_sources(ext_index-metadata-extractor PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/abstract.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/container.cpp
)
+generate_enum_serilization(ext_index-metadata-extractor
+ ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.h
+ INCLUDE_HEADERS
+ ydb/services/ext_index/metadata/extractor/hash_by_columns.h
+)
add_global_library_for(ext_index-metadata-extractor.global ext_index-metadata-extractor)
target_compile_options(ext_index-metadata-extractor.global PRIVATE
@@ -33,7 +45,8 @@ target_link_libraries(ext_index-metadata-extractor.global PUBLIC
libs-apache-arrow
ydb-core-protos
core-tx-sharding
+ tools-enum_parser-enum_serialization_runtime
)
target_sources(ext_index-metadata-extractor.global PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/city.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp
)
diff --git a/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt b/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt
index c2abc92f8b..5be9c05a49 100644
--- a/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt
+++ b/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-aarch64.txt
@@ -6,6 +6,12 @@
# original buildsystem will not be accepted.
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
add_library(ext_index-metadata-extractor)
target_compile_options(ext_index-metadata-extractor PRIVATE
@@ -18,11 +24,17 @@ target_link_libraries(ext_index-metadata-extractor PUBLIC
libs-apache-arrow
ydb-core-protos
core-tx-sharding
+ tools-enum_parser-enum_serialization_runtime
)
target_sources(ext_index-metadata-extractor PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/abstract.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/container.cpp
)
+generate_enum_serilization(ext_index-metadata-extractor
+ ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.h
+ INCLUDE_HEADERS
+ ydb/services/ext_index/metadata/extractor/hash_by_columns.h
+)
add_global_library_for(ext_index-metadata-extractor.global ext_index-metadata-extractor)
target_compile_options(ext_index-metadata-extractor.global PRIVATE
@@ -35,7 +47,8 @@ target_link_libraries(ext_index-metadata-extractor.global PUBLIC
libs-apache-arrow
ydb-core-protos
core-tx-sharding
+ tools-enum_parser-enum_serialization_runtime
)
target_sources(ext_index-metadata-extractor.global PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/city.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp
)
diff --git a/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt b/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt
index c2abc92f8b..5be9c05a49 100644
--- a/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt
+++ b/ydb/services/ext_index/metadata/extractor/CMakeLists.linux-x86_64.txt
@@ -6,6 +6,12 @@
# original buildsystem will not be accepted.
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
add_library(ext_index-metadata-extractor)
target_compile_options(ext_index-metadata-extractor PRIVATE
@@ -18,11 +24,17 @@ target_link_libraries(ext_index-metadata-extractor PUBLIC
libs-apache-arrow
ydb-core-protos
core-tx-sharding
+ tools-enum_parser-enum_serialization_runtime
)
target_sources(ext_index-metadata-extractor PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/abstract.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/container.cpp
)
+generate_enum_serilization(ext_index-metadata-extractor
+ ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.h
+ INCLUDE_HEADERS
+ ydb/services/ext_index/metadata/extractor/hash_by_columns.h
+)
add_global_library_for(ext_index-metadata-extractor.global ext_index-metadata-extractor)
target_compile_options(ext_index-metadata-extractor.global PRIVATE
@@ -35,7 +47,8 @@ target_link_libraries(ext_index-metadata-extractor.global PUBLIC
libs-apache-arrow
ydb-core-protos
core-tx-sharding
+ tools-enum_parser-enum_serialization_runtime
)
target_sources(ext_index-metadata-extractor.global PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/city.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp
)
diff --git a/ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt b/ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt
index 213856d9fd..c2dc6359ad 100644
--- a/ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt
+++ b/ydb/services/ext_index/metadata/extractor/CMakeLists.windows-x86_64.txt
@@ -6,6 +6,12 @@
# original buildsystem will not be accepted.
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
add_library(ext_index-metadata-extractor)
target_compile_options(ext_index-metadata-extractor PRIVATE
@@ -17,11 +23,17 @@ target_link_libraries(ext_index-metadata-extractor PUBLIC
libs-apache-arrow
ydb-core-protos
core-tx-sharding
+ tools-enum_parser-enum_serialization_runtime
)
target_sources(ext_index-metadata-extractor PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/abstract.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/container.cpp
)
+generate_enum_serilization(ext_index-metadata-extractor
+ ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.h
+ INCLUDE_HEADERS
+ ydb/services/ext_index/metadata/extractor/hash_by_columns.h
+)
add_global_library_for(ext_index-metadata-extractor.global ext_index-metadata-extractor)
target_compile_options(ext_index-metadata-extractor.global PRIVATE
@@ -33,7 +45,8 @@ target_link_libraries(ext_index-metadata-extractor.global PUBLIC
libs-apache-arrow
ydb-core-protos
core-tx-sharding
+ tools-enum_parser-enum_serialization_runtime
)
target_sources(ext_index-metadata-extractor.global PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/city.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp
)
diff --git a/ydb/services/ext_index/metadata/extractor/city.cpp b/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp
index d9b5150871..d26aa85f7b 100644
--- a/ydb/services/ext_index/metadata/extractor/city.cpp
+++ b/ydb/services/ext_index/metadata/extractor/hash_by_columns.cpp
@@ -1,4 +1,4 @@
-#include "city.h"
+#include "hash_by_columns.h"
#include <ydb/core/protos/services.pb.h>
#include <ydb/core/tx/sharding/sharding.h>
#include <ydb/library/yql/utils/yql_panic.h>
@@ -13,7 +13,8 @@
namespace NKikimr::NMetadata::NCSIndex {
-TExtractorCityHash64::TFactory::TRegistrator<TExtractorCityHash64> TExtractorCityHash64::Registrator(TExtractorCityHash64::ClassName);
+THashByColumns::TFactory::TRegistrator<THashByColumns> THashByColumns::Registrator(THashByColumns::ClassName);
+THashByColumns::TFactory::TRegistrator<THashByColumns> THashByColumns::RegistratorDeprecated("city64");
template <class TArrayBuilder>
class TArrayInserter {
@@ -46,7 +47,7 @@ public:
-std::vector<ui64> TExtractorCityHash64::DoExtractIndex(const std::shared_ptr<arrow::RecordBatch>& batch) const {
+std::vector<ui64> THashByColumns::DoExtractIndex(const std::shared_ptr<arrow::RecordBatch>& batch) const {
auto schema = batch->schema();
std::vector<std::shared_ptr<arrow::Field>> fields;
std::vector<std::shared_ptr<arrow::Array>> columns;
@@ -127,11 +128,16 @@ std::vector<ui64> TExtractorCityHash64::DoExtractIndex(const std::shared_ptr<arr
return {};
}
auto newBatch = arrow::RecordBatch::Make(*newSchema, batch->num_rows(), columns);
- NSharding::THashSharding hashSharding(0, fieldIds);
- return hashSharding.MakeHashes(newBatch);
+ if (HashType == EHashType::XX64) {
+ NSharding::THashSharding hashSharding(0, fieldIds);
+ return hashSharding.MakeHashes(newBatch);
+ } else {
+ ALS_ERROR(NKikimrServices::EXT_INDEX) << "undefined hash type: " << HashType;
+ return {};
+ }
}
-bool TExtractorCityHash64::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) {
+bool THashByColumns::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) {
const NJson::TJsonValue::TArray* jsonFields;
if (!jsonInfo["fields"].GetArrayPointer(&jsonFields)) {
return false;
@@ -146,11 +152,22 @@ bool TExtractorCityHash64::DoDeserializeFromJson(const NJson::TJsonValue& jsonIn
if (Fields.size() == 0) {
return false;
}
+
+ if (jsonInfo.Has("hash_type")) {
+ if (!jsonInfo["hash_type"].IsString()) {
+ return false;
+ }
+ if (!TryFromString(jsonInfo["hash_type"].GetString(), HashType)) {
+ return false;
+ }
+ }
+
return true;
}
-NJson::TJsonValue TExtractorCityHash64::DoSerializeToJson() const {
+NJson::TJsonValue THashByColumns::DoSerializeToJson() const {
NJson::TJsonValue result;
+ result.InsertValue("hash_type", ::ToString(HashType));
auto& jsonFields = result.InsertValue("fields", NJson::JSON_ARRAY);
for (auto&& i : Fields) {
jsonFields.AppendValue(i.SerializeToJson());
diff --git a/ydb/services/ext_index/metadata/extractor/city.h b/ydb/services/ext_index/metadata/extractor/hash_by_columns.h
index 99011b0a92..e8747f9288 100644
--- a/ydb/services/ext_index/metadata/extractor/city.h
+++ b/ydb/services/ext_index/metadata/extractor/hash_by_columns.h
@@ -35,16 +35,22 @@ public:
}
};
-class TExtractorCityHash64: public IIndexExtractor {
+class THashByColumns: public IIndexExtractor {
+public:
+ enum class EHashType {
+ XX64 /* "xx64" */
+ };
private:
YDB_READONLY_DEF(std::vector<TExtractorField>, Fields);
- static TFactory::TRegistrator<TExtractorCityHash64> Registrator;
+ YDB_READONLY(EHashType, HashType, EHashType::XX64);
+ static TFactory::TRegistrator<THashByColumns> Registrator;
+ static TFactory::TRegistrator<THashByColumns> RegistratorDeprecated;
protected:
virtual std::vector<ui64> DoExtractIndex(const std::shared_ptr<arrow::RecordBatch>& batch) const override;
virtual bool DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) override;
virtual NJson::TJsonValue DoSerializeToJson() const override;
public:
- static inline TString ClassName = "city64";
+ static inline TString ClassName = "hash_by_columns";
virtual TString GetClassName() const override {
return ClassName;
diff --git a/ydb/services/ext_index/metadata/extractor/ya.make b/ydb/services/ext_index/metadata/extractor/ya.make
index 7d673a81f7..83ab0eeb16 100644
--- a/ydb/services/ext_index/metadata/extractor/ya.make
+++ b/ydb/services/ext_index/metadata/extractor/ya.make
@@ -2,7 +2,7 @@ LIBRARY()
SRCS(
abstract.cpp
- GLOBAL city.cpp
+ GLOBAL hash_by_columns.cpp
container.cpp
)
@@ -13,5 +13,6 @@ PEERDIR(
)
YQL_LAST_ABI_VERSION()
+GENERATE_ENUM_SERIALIZATION(hash_by_columns.h)
END()