diff options
author | cherepashka <cherepashka@yandex-team.com> | 2025-03-06 01:26:29 +0300 |
---|---|---|
committer | cherepashka <cherepashka@yandex-team.com> | 2025-03-06 01:55:54 +0300 |
commit | 16867b871a3abf548c504976fb8ca49c882b1ffd (patch) | |
tree | 697481902ad3f992fc84b77e827db444a7f7ff40 | |
parent | aa3749c1aaa8d692dfd08cefe82ff5eb29c5418f (diff) | |
download | ydb-16867b871a3abf548c504976fb8ca49c882b1ffd.tar.gz |
YT-21910: Master compact table schema
- Changelog entry
Type: feature
Component: master
Introduce TCompactTableSchema, that holds wire protobuf schema representation and lighter than TTableSchema
commit_hash:21801854b37fc25c5004ee01e5b79a3b3b6ea983
-rw-r--r-- | library/cpp/yt/misc/property.h | 13 | ||||
-rw-r--r-- | yt/yt/client/api/rpc_proxy/helpers.cpp | 4 | ||||
-rw-r--r-- | yt/yt/client/table_client/check_schema_compatibility.cpp | 12 | ||||
-rw-r--r-- | yt/yt/client/table_client/helpers.cpp | 5 | ||||
-rw-r--r-- | yt/yt/client/table_client/merge_table_schemas.cpp | 12 | ||||
-rw-r--r-- | yt/yt/client/table_client/schema.cpp | 61 | ||||
-rw-r--r-- | yt/yt/client/table_client/schema.h | 9 | ||||
-rw-r--r-- | yt/yt/client/table_client/schema_serialization_helpers.cpp | 4 | ||||
-rw-r--r-- | yt/yt/client/table_client/timestamped_schema_helpers.cpp | 4 |
9 files changed, 81 insertions, 43 deletions
diff --git a/library/cpp/yt/misc/property.h b/library/cpp/yt/misc/property.h index 3c42693ef3..722e7cb2b8 100644 --- a/library/cpp/yt/misc/property.h +++ b/library/cpp/yt/misc/property.h @@ -175,6 +175,19 @@ public: \ } \ static_assert(true) +//! Defines a trivial public read-only boolean property that is passed by value. +//! All arguments after name are used as default value (via braced-init-list). +#define DEFINE_BYVAL_RO_BOOLEAN_PROPERTY(name, ...) \ +protected: \ + bool name##_ { __VA_ARGS__ }; \ + \ +public: \ + Y_FORCE_INLINE bool Is##name() const \ + { \ + return name##_; \ + } \ + static_assert(true) + //! Defines a trivial public read-write property that is passed by value. //! All arguments after name are used as default value (via braced-init-list). #define DEFINE_BYVAL_RW_PROPERTY_WITH_FLUENT_SETTER(declaringType, type, name, ...) \ diff --git a/yt/yt/client/api/rpc_proxy/helpers.cpp b/yt/yt/client/api/rpc_proxy/helpers.cpp index f445a9c17f..f5886f18a7 100644 --- a/yt/yt/client/api/rpc_proxy/helpers.cpp +++ b/yt/yt/client/api/rpc_proxy/helpers.cpp @@ -505,8 +505,8 @@ void FromProto(NTableClient::TColumnSchema* schema, const NProto::TColumnSchema& void ToProto(NProto::TTableSchema* protoSchema, const NTableClient::TTableSchema& schema) { ToProto(protoSchema->mutable_columns(), schema.Columns()); - protoSchema->set_strict(schema.GetStrict()); - protoSchema->set_unique_keys(schema.GetUniqueKeys()); + protoSchema->set_strict(schema.IsStrict()); + protoSchema->set_unique_keys(schema.IsUniqueKeys()); } void FromProto(NTableClient::TTableSchema* schema, const NProto::TTableSchema& protoSchema) diff --git a/yt/yt/client/table_client/check_schema_compatibility.cpp b/yt/yt/client/table_client/check_schema_compatibility.cpp index d2fd29b903..1bf791f721 100644 --- a/yt/yt/client/table_client/check_schema_compatibility.cpp +++ b/yt/yt/client/table_client/check_schema_compatibility.cpp @@ -18,8 +18,8 @@ std::pair<ESchemaCompatibility, TError> CheckTableSchemaCompatibilityImpl( TTableSchemaCompatibilityOptions options) { // If output schema is strict, check that input columns are subset of output columns. - if (outputSchema.GetStrict()) { - if (!inputSchema.GetStrict()) { + if (outputSchema.IsStrict()) { + if (!inputSchema.IsStrict()) { return { ESchemaCompatibility::Incompatible, TError("Incompatible strictness: input schema is not strict while output schema is"), @@ -116,7 +116,7 @@ std::pair<ESchemaCompatibility, TError> CheckTableSchemaCompatibilityImpl( TError("Unexpected computed column %v in output schema", outputColumn.GetDiagnosticNameString()), }; - } else if (!inputSchema.GetStrict()) { + } else if (!inputSchema.IsStrict()) { return { ESchemaCompatibility::Incompatible, TError("Column %v is present in output schema and is missing in non-strict input schema", @@ -160,7 +160,7 @@ std::pair<ESchemaCompatibility, TError> CheckTableSchemaCompatibilityImpl( // Check that we don't lose complex types. // We never want to teleport complex types to schemaless part of the chunk because we want to change their type from // EValueType::Composite to EValueType::Any. - if (!outputSchema.GetStrict()) { + if (!outputSchema.IsStrict()) { for (const auto& inputColumn : inputSchema.Columns()) { if (!IsV3Composite(inputColumn.LogicalType())) { continue; @@ -189,8 +189,8 @@ std::pair<ESchemaCompatibility, TError> CheckTableSchemaCompatibilityImpl( }; } - if (outputSchema.GetUniqueKeys()) { - if (!inputSchema.GetUniqueKeys()) { + if (outputSchema.IsUniqueKeys()) { + if (!inputSchema.IsUniqueKeys()) { return { ESchemaCompatibility::Incompatible, TError("Input schema \"unique_keys\" attribute is false"), diff --git a/yt/yt/client/table_client/helpers.cpp b/yt/yt/client/table_client/helpers.cpp index b2ef61558f..299008c50a 100644 --- a/yt/yt/client/table_client/helpers.cpp +++ b/yt/yt/client/table_client/helpers.cpp @@ -253,8 +253,9 @@ TUnversionedOwningRow YsonToSchemafulRow( for (const auto& [name, value] : rowParts) { int id = nameTable->GetIdOrRegisterName(name); if (id >= std::ssize(tableSchema.Columns())) { - if (validateValues && tableSchema.GetStrict()) { - THROW_ERROR_EXCEPTION(NTableClient::EErrorCode::SchemaViolation, + if (validateValues && tableSchema.IsStrict()) { + THROW_ERROR_EXCEPTION( + EErrorCode::SchemaViolation, "Unknown column %Qv in strict schema", name); } diff --git a/yt/yt/client/table_client/merge_table_schemas.cpp b/yt/yt/client/table_client/merge_table_schemas.cpp index ef7d10fcfc..8821bc5edd 100644 --- a/yt/yt/client/table_client/merge_table_schemas.cpp +++ b/yt/yt/client/table_client/merge_table_schemas.cpp @@ -80,7 +80,7 @@ TTableSchemaPtr MergeTableSchemas( << ex; } - } else if (!firstSchema->GetStrict()) { + } else if (!firstSchema->IsStrict()) { THROW_ERROR_EXCEPTION("Column %v is present in second schema and is missing in non-strict first schema", secondSchemaColumn.GetDiagnosticNameString()); } else { @@ -90,7 +90,7 @@ TTableSchemaPtr MergeTableSchemas( for (const auto& firstSchemaColumn : firstSchema->Columns()) { if (!secondSchema->FindColumn(firstSchemaColumn.Name())) { - if (!secondSchema->GetStrict()) { + if (!secondSchema->IsStrict()) { THROW_ERROR_EXCEPTION("Column %v is present in first schema and is missing in non-strict second schema", firstSchemaColumn.GetDiagnosticNameString()); } @@ -114,8 +114,8 @@ TTableSchemaPtr MergeTableSchemas( return { New<TTableSchema>( resultColumns, - /*strict*/ firstSchema->GetStrict() && secondSchema->GetStrict(), - firstSchema->GetUniqueKeys() && secondSchema->GetUniqueKeys(), + firstSchema->IsStrict() && secondSchema->IsStrict(), + firstSchema->IsUniqueKeys() && secondSchema->IsUniqueKeys(), ETableSchemaModification::None, firstSchema->DeletedColumns()) }; @@ -123,8 +123,8 @@ TTableSchemaPtr MergeTableSchemas( return { New<TTableSchema>( resultColumns, - /*strict*/ firstSchema->GetStrict() && secondSchema->GetStrict(), - firstSchema->GetUniqueKeys() && secondSchema->GetUniqueKeys()) + firstSchema->IsStrict() && secondSchema->IsStrict(), + firstSchema->IsUniqueKeys() && secondSchema->IsUniqueKeys()) }; } } diff --git a/yt/yt/client/table_client/schema.cpp b/yt/yt/client/table_client/schema.cpp index a7715d96db..e6d7756515 100644 --- a/yt/yt/client/table_client/schema.cpp +++ b/yt/yt/client/table_client/schema.cpp @@ -498,7 +498,7 @@ std::string TTableSchema::TNameMapping::StableNameToName(const TColumnStableName { auto* column = Schema_.FindColumnByStableName(stableName); if (!column) { - if (Schema_.GetStrict()) { + if (Schema_.IsStrict()) { THROW_ERROR_EXCEPTION("No column with stable name %Qv in strict schema", stableName); } return stableName.Underlying(); @@ -510,7 +510,7 @@ TColumnStableName TTableSchema::TNameMapping::NameToStableName(TStringBuf name) { auto* column = Schema_.FindColumn(name); if (!column) { - if (Schema_.GetStrict()) { + if (Schema_.IsStrict()) { if (auto originalColumnName = GetTimestampColumnOriginalNameOrNull(name); !originalColumnName || !Schema_.FindColumn(*originalColumnName)) { @@ -759,11 +759,6 @@ bool TTableSchema::IsSorted() const return KeyColumnCount_ > 0; } -bool TTableSchema::IsUniqueKeys() const -{ - return UniqueKeys_; -} - bool TTableSchema::HasRenamedColumns() const { return std::any_of(Columns().begin(), Columns().end(), [] (const TColumnSchema& column) { @@ -873,7 +868,7 @@ std::vector<TColumnStableName> MapNamesToStableNames( const auto* column = schema.FindColumn(name); if (column) { stableNames.push_back(column->StableName()); - } else if (!schema.GetStrict()) { + } else if (!schema.IsStrict()) { stableNames.push_back(TColumnStableName(name)); } else if (missingColumnReplacement) { stableNames.push_back(TColumnStableName(std::string(*missingColumnReplacement))); @@ -1496,7 +1491,7 @@ TKeyColumnTypes TTableSchema::GetKeyColumnTypes() const void FormatValue(TStringBuilderBase* builder, const TTableSchema& schema, TStringBuf /*spec*/) { - builder->AppendFormat("<strict=%v;unique_keys=%v", schema.GetStrict(), schema.GetUniqueKeys()); + builder->AppendFormat("<strict=%v;unique_keys=%v", schema.IsStrict(), schema.IsUniqueKeys()); if (schema.HasNontrivialSchemaModification()) { builder->AppendFormat(";schema_modification=%v", schema.GetSchemaModification()); } @@ -1531,9 +1526,17 @@ void FormatValue(TStringBuilderBase* builder, const TTableSchemaPtr& schema, TSt std::string SerializeToWireProto(const TTableSchemaPtr& schema) { + return schema ? SerializeToWireProto(*schema) : ""; +} + +std::string SerializeToWireProto(const TTableSchema& schema) +{ NTableClient::NProto::TTableSchemaExt protoSchema; ToProto(&protoSchema, schema); - return protoSchema.SerializeAsString(); + if (protoSchema.IsInitialized()) { + return protoSchema.SerializeAsString(); + } + THROW_ERROR_EXCEPTION("Table schema is not initialized"); } void DeserializeFromWireProto(TTableSchemaPtr* schema, const std::string& serializedProto) @@ -1549,8 +1552,8 @@ void ToProto(NProto::TTableSchemaExt* protoSchema, const TTableSchema& schema) { ToProto(protoSchema->mutable_columns(), schema.Columns()); ToProto(protoSchema->mutable_deleted_columns(), schema.DeletedColumns()); - protoSchema->set_strict(schema.GetStrict()); - protoSchema->set_unique_keys(schema.GetUniqueKeys()); + protoSchema->set_strict(schema.IsStrict()); + protoSchema->set_unique_keys(schema.IsUniqueKeys()); protoSchema->set_schema_modification(ToProto(schema.GetSchemaModification())); } @@ -1674,8 +1677,8 @@ bool operator==(const TTableSchema& lhs, const TTableSchema& rhs) { return lhs.Columns() == rhs.Columns() && - lhs.GetStrict() == rhs.GetStrict() && - lhs.GetUniqueKeys() == rhs.GetUniqueKeys() && + lhs.IsStrict() == rhs.IsStrict() && + lhs.IsUniqueKeys() == rhs.IsUniqueKeys() && lhs.GetSchemaModification() == rhs.GetSchemaModification() && lhs.DeletedColumns() == rhs.DeletedColumns(); } @@ -1691,7 +1694,7 @@ bool IsEqualIgnoringRequiredness(const TTableSchema& lhs, const TTableSchema& rh } resultColumns.emplace_back(column); } - return TTableSchema(resultColumns, schema.GetStrict(), schema.GetUniqueKeys()); + return TTableSchema(resultColumns, schema.IsStrict(), schema.IsUniqueKeys()); }; return dropRequiredness(lhs) == dropRequiredness(rhs); } @@ -2090,11 +2093,11 @@ void ValidateColumnSchema( void ValidateDynamicTableConstraints(const TTableSchema& schema) { - if (!schema.GetStrict()) { + if (!schema.IsStrict()) { THROW_ERROR_EXCEPTION("\"strict\" cannot be \"false\" for a dynamic table"); } - if (schema.IsSorted() && !schema.GetUniqueKeys()) { + if (schema.IsSorted() && !schema.IsUniqueKeys()) { THROW_ERROR_EXCEPTION("\"unique_keys\" cannot be \"false\" for a sorted dynamic table"); } @@ -2308,7 +2311,7 @@ void ValidateCumulativeDataWeightColumn(const TTableSchema& schema) // Validate schema attributes. void ValidateSchemaAttributes(const TTableSchema& schema) { - if (schema.GetUniqueKeys() && schema.GetKeyColumnCount() == 0) { + if (schema.IsUniqueKeys() && schema.GetKeyColumnCount() == 0) { THROW_ERROR_EXCEPTION("\"unique_keys\" can only be true if key columns are present"); } } @@ -2325,7 +2328,7 @@ void ValidateTableSchema( schema.IsSorted(), isTableDynamic, options); - if (!schema.GetStrict() && column.IsRenamed()) { + if (!schema.IsStrict() && column.IsRenamed()) { THROW_ERROR_EXCEPTION("Renamed column %v in non-strict schema", column.GetDiagnosticNameString()); } @@ -2396,6 +2399,24 @@ void ValidateNoDescendingSortOrder(const TTableSchema& schema) } } +void ValidateNoDescendingSortOrder( + const std::vector<ESortOrder>& sortOrders, + const TKeyColumns& keyColumns) +{ + YT_VERIFY(keyColumns.size() == sortOrders.size()); + + for (int index = 0; index < std::ssize(sortOrders); ++index) { + auto sortOrder = sortOrders[index]; + const auto& column = keyColumns[index]; + if (sortOrder == ESortOrder::Descending) { + THROW_ERROR_EXCEPTION( + NTableClient::EErrorCode::InvalidSchemaValue, + "Descending sort order is not available in this context yet") + << TErrorAttribute("column_name", column); + } + } +} + void ValidateNoRenamedColumns(const TTableSchema& schema) { for (const auto& column : schema.Columns()) { @@ -2556,7 +2577,7 @@ size_t THash<NYT::NTableClient::TDeletedColumn>::operator()(const NYT::NTableCli size_t THash<NYT::NTableClient::TTableSchema>::operator()(const NYT::NTableClient::TTableSchema& tableSchema) const { - size_t result = CombineHashes(THash<bool>()(tableSchema.GetUniqueKeys()), THash<bool>()(tableSchema.GetStrict())); + size_t result = CombineHashes(THash<bool>()(tableSchema.IsUniqueKeys()), THash<bool>()(tableSchema.IsStrict())); if (tableSchema.HasNontrivialSchemaModification()) { result = CombineHashes( result, diff --git a/yt/yt/client/table_client/schema.h b/yt/yt/client/table_client/schema.h index f6c9f147a1..bb31f4c9d3 100644 --- a/yt/yt/client/table_client/schema.h +++ b/yt/yt/client/table_client/schema.h @@ -242,8 +242,8 @@ public: const std::vector<TDeletedColumn>& DeletedColumns() const; //! Strict schema forbids columns not specified in the schema. - DEFINE_BYVAL_RO_PROPERTY(bool, Strict, false); - DEFINE_BYVAL_RO_PROPERTY(bool, UniqueKeys, false); + DEFINE_BYVAL_RO_BOOLEAN_PROPERTY(Strict, false); + DEFINE_BYVAL_RO_BOOLEAN_PROPERTY(UniqueKeys, false); DEFINE_BYVAL_RO_PROPERTY(ETableSchemaModification, SchemaModification, ETableSchemaModification::None); //! Constructs an empty non-strict schema. @@ -296,7 +296,6 @@ public: bool HasTimestampColumn() const; bool HasTtlColumn() const; bool IsSorted() const; - bool IsUniqueKeys() const; bool HasRenamedColumns() const; bool IsEmpty() const; bool IsCGComparatorApplicable() const; @@ -446,6 +445,7 @@ void FormatValue(TStringBuilderBase* builder, const TTableSchemaPtr& schema, TSt //! Returns serialized NTableClient.NProto.TTableSchemaExt. std::string SerializeToWireProto(const TTableSchemaPtr& schema); +std::string SerializeToWireProto(const TTableSchema& schema); void DeserializeFromWireProto(TTableSchemaPtr* schema, const std::string& serializedProto); @@ -554,6 +554,9 @@ void ValidateTableSchema( //////////////////////////////////////////////////////////////////////////////// void ValidateNoDescendingSortOrder(const TTableSchema& schema); +void ValidateNoDescendingSortOrder( + const std::vector<ESortOrder>& sortOrders, + const TKeyColumns& keyColumns); void ValidateNoRenamedColumns(const TTableSchema& schema); diff --git a/yt/yt/client/table_client/schema_serialization_helpers.cpp b/yt/yt/client/table_client/schema_serialization_helpers.cpp index 3ff0b2bec0..d079832c77 100644 --- a/yt/yt/client/table_client/schema_serialization_helpers.cpp +++ b/yt/yt/client/table_client/schema_serialization_helpers.cpp @@ -231,8 +231,8 @@ void Serialize(const TTableSchema& schema, NYson::IYsonConsumer* consumer) { auto position = NYTree::BuildYsonFluently(consumer) .BeginAttributes() - .Item("strict").Value(schema.GetStrict()) - .Item("unique_keys").Value(schema.GetUniqueKeys()) + .Item("strict").Value(schema.IsStrict()) + .Item("unique_keys").Value(schema.IsUniqueKeys()) .DoIf(schema.HasNontrivialSchemaModification(), [&] (NYTree::TFluentMap fluent) { fluent.Item("schema_modification").Value(schema.GetSchemaModification()); }) diff --git a/yt/yt/client/table_client/timestamped_schema_helpers.cpp b/yt/yt/client/table_client/timestamped_schema_helpers.cpp index 4b0feff80f..e4e90ab821 100644 --- a/yt/yt/client/table_client/timestamped_schema_helpers.cpp +++ b/yt/yt/client/table_client/timestamped_schema_helpers.cpp @@ -21,8 +21,8 @@ TTableSchemaPtr ToLatestTimestampSchema(const TTableSchemaPtr& schema) return New<TTableSchema>( std::move(columns), - schema->GetStrict(), - schema->GetUniqueKeys(), + schema->IsStrict(), + schema->IsUniqueKeys(), schema->GetSchemaModification(), schema->DeletedColumns()); } |