aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcherepashka <cherepashka@yandex-team.com>2025-03-06 01:26:29 +0300
committercherepashka <cherepashka@yandex-team.com>2025-03-06 01:55:54 +0300
commit16867b871a3abf548c504976fb8ca49c882b1ffd (patch)
tree697481902ad3f992fc84b77e827db444a7f7ff40
parentaa3749c1aaa8d692dfd08cefe82ff5eb29c5418f (diff)
downloadydb-16867b871a3abf548c504976fb8ca49c882b1ffd.tar.gz
YT-21910: Master compact table schema
- Changelog entry Type: feature Component: master Introduce TCompactTableSchema, that holds wire protobuf schema representation and lighter than TTableSchema commit_hash:21801854b37fc25c5004ee01e5b79a3b3b6ea983
-rw-r--r--library/cpp/yt/misc/property.h13
-rw-r--r--yt/yt/client/api/rpc_proxy/helpers.cpp4
-rw-r--r--yt/yt/client/table_client/check_schema_compatibility.cpp12
-rw-r--r--yt/yt/client/table_client/helpers.cpp5
-rw-r--r--yt/yt/client/table_client/merge_table_schemas.cpp12
-rw-r--r--yt/yt/client/table_client/schema.cpp61
-rw-r--r--yt/yt/client/table_client/schema.h9
-rw-r--r--yt/yt/client/table_client/schema_serialization_helpers.cpp4
-rw-r--r--yt/yt/client/table_client/timestamped_schema_helpers.cpp4
9 files changed, 81 insertions, 43 deletions
diff --git a/library/cpp/yt/misc/property.h b/library/cpp/yt/misc/property.h
index 3c42693ef3..722e7cb2b8 100644
--- a/library/cpp/yt/misc/property.h
+++ b/library/cpp/yt/misc/property.h
@@ -175,6 +175,19 @@ public: \
} \
static_assert(true)
+//! Defines a trivial public read-only boolean property that is passed by value.
+//! All arguments after name are used as default value (via braced-init-list).
+#define DEFINE_BYVAL_RO_BOOLEAN_PROPERTY(name, ...) \
+protected: \
+ bool name##_ { __VA_ARGS__ }; \
+ \
+public: \
+ Y_FORCE_INLINE bool Is##name() const \
+ { \
+ return name##_; \
+ } \
+ static_assert(true)
+
//! Defines a trivial public read-write property that is passed by value.
//! All arguments after name are used as default value (via braced-init-list).
#define DEFINE_BYVAL_RW_PROPERTY_WITH_FLUENT_SETTER(declaringType, type, name, ...) \
diff --git a/yt/yt/client/api/rpc_proxy/helpers.cpp b/yt/yt/client/api/rpc_proxy/helpers.cpp
index f445a9c17f..f5886f18a7 100644
--- a/yt/yt/client/api/rpc_proxy/helpers.cpp
+++ b/yt/yt/client/api/rpc_proxy/helpers.cpp
@@ -505,8 +505,8 @@ void FromProto(NTableClient::TColumnSchema* schema, const NProto::TColumnSchema&
void ToProto(NProto::TTableSchema* protoSchema, const NTableClient::TTableSchema& schema)
{
ToProto(protoSchema->mutable_columns(), schema.Columns());
- protoSchema->set_strict(schema.GetStrict());
- protoSchema->set_unique_keys(schema.GetUniqueKeys());
+ protoSchema->set_strict(schema.IsStrict());
+ protoSchema->set_unique_keys(schema.IsUniqueKeys());
}
void FromProto(NTableClient::TTableSchema* schema, const NProto::TTableSchema& protoSchema)
diff --git a/yt/yt/client/table_client/check_schema_compatibility.cpp b/yt/yt/client/table_client/check_schema_compatibility.cpp
index d2fd29b903..1bf791f721 100644
--- a/yt/yt/client/table_client/check_schema_compatibility.cpp
+++ b/yt/yt/client/table_client/check_schema_compatibility.cpp
@@ -18,8 +18,8 @@ std::pair<ESchemaCompatibility, TError> CheckTableSchemaCompatibilityImpl(
TTableSchemaCompatibilityOptions options)
{
// If output schema is strict, check that input columns are subset of output columns.
- if (outputSchema.GetStrict()) {
- if (!inputSchema.GetStrict()) {
+ if (outputSchema.IsStrict()) {
+ if (!inputSchema.IsStrict()) {
return {
ESchemaCompatibility::Incompatible,
TError("Incompatible strictness: input schema is not strict while output schema is"),
@@ -116,7 +116,7 @@ std::pair<ESchemaCompatibility, TError> CheckTableSchemaCompatibilityImpl(
TError("Unexpected computed column %v in output schema",
outputColumn.GetDiagnosticNameString()),
};
- } else if (!inputSchema.GetStrict()) {
+ } else if (!inputSchema.IsStrict()) {
return {
ESchemaCompatibility::Incompatible,
TError("Column %v is present in output schema and is missing in non-strict input schema",
@@ -160,7 +160,7 @@ std::pair<ESchemaCompatibility, TError> CheckTableSchemaCompatibilityImpl(
// Check that we don't lose complex types.
// We never want to teleport complex types to schemaless part of the chunk because we want to change their type from
// EValueType::Composite to EValueType::Any.
- if (!outputSchema.GetStrict()) {
+ if (!outputSchema.IsStrict()) {
for (const auto& inputColumn : inputSchema.Columns()) {
if (!IsV3Composite(inputColumn.LogicalType())) {
continue;
@@ -189,8 +189,8 @@ std::pair<ESchemaCompatibility, TError> CheckTableSchemaCompatibilityImpl(
};
}
- if (outputSchema.GetUniqueKeys()) {
- if (!inputSchema.GetUniqueKeys()) {
+ if (outputSchema.IsUniqueKeys()) {
+ if (!inputSchema.IsUniqueKeys()) {
return {
ESchemaCompatibility::Incompatible,
TError("Input schema \"unique_keys\" attribute is false"),
diff --git a/yt/yt/client/table_client/helpers.cpp b/yt/yt/client/table_client/helpers.cpp
index b2ef61558f..299008c50a 100644
--- a/yt/yt/client/table_client/helpers.cpp
+++ b/yt/yt/client/table_client/helpers.cpp
@@ -253,8 +253,9 @@ TUnversionedOwningRow YsonToSchemafulRow(
for (const auto& [name, value] : rowParts) {
int id = nameTable->GetIdOrRegisterName(name);
if (id >= std::ssize(tableSchema.Columns())) {
- if (validateValues && tableSchema.GetStrict()) {
- THROW_ERROR_EXCEPTION(NTableClient::EErrorCode::SchemaViolation,
+ if (validateValues && tableSchema.IsStrict()) {
+ THROW_ERROR_EXCEPTION(
+ EErrorCode::SchemaViolation,
"Unknown column %Qv in strict schema",
name);
}
diff --git a/yt/yt/client/table_client/merge_table_schemas.cpp b/yt/yt/client/table_client/merge_table_schemas.cpp
index ef7d10fcfc..8821bc5edd 100644
--- a/yt/yt/client/table_client/merge_table_schemas.cpp
+++ b/yt/yt/client/table_client/merge_table_schemas.cpp
@@ -80,7 +80,7 @@ TTableSchemaPtr MergeTableSchemas(
<< ex;
}
- } else if (!firstSchema->GetStrict()) {
+ } else if (!firstSchema->IsStrict()) {
THROW_ERROR_EXCEPTION("Column %v is present in second schema and is missing in non-strict first schema",
secondSchemaColumn.GetDiagnosticNameString());
} else {
@@ -90,7 +90,7 @@ TTableSchemaPtr MergeTableSchemas(
for (const auto& firstSchemaColumn : firstSchema->Columns()) {
if (!secondSchema->FindColumn(firstSchemaColumn.Name())) {
- if (!secondSchema->GetStrict()) {
+ if (!secondSchema->IsStrict()) {
THROW_ERROR_EXCEPTION("Column %v is present in first schema and is missing in non-strict second schema",
firstSchemaColumn.GetDiagnosticNameString());
}
@@ -114,8 +114,8 @@ TTableSchemaPtr MergeTableSchemas(
return {
New<TTableSchema>(
resultColumns,
- /*strict*/ firstSchema->GetStrict() && secondSchema->GetStrict(),
- firstSchema->GetUniqueKeys() && secondSchema->GetUniqueKeys(),
+ firstSchema->IsStrict() && secondSchema->IsStrict(),
+ firstSchema->IsUniqueKeys() && secondSchema->IsUniqueKeys(),
ETableSchemaModification::None,
firstSchema->DeletedColumns())
};
@@ -123,8 +123,8 @@ TTableSchemaPtr MergeTableSchemas(
return {
New<TTableSchema>(
resultColumns,
- /*strict*/ firstSchema->GetStrict() && secondSchema->GetStrict(),
- firstSchema->GetUniqueKeys() && secondSchema->GetUniqueKeys())
+ firstSchema->IsStrict() && secondSchema->IsStrict(),
+ firstSchema->IsUniqueKeys() && secondSchema->IsUniqueKeys())
};
}
}
diff --git a/yt/yt/client/table_client/schema.cpp b/yt/yt/client/table_client/schema.cpp
index a7715d96db..e6d7756515 100644
--- a/yt/yt/client/table_client/schema.cpp
+++ b/yt/yt/client/table_client/schema.cpp
@@ -498,7 +498,7 @@ std::string TTableSchema::TNameMapping::StableNameToName(const TColumnStableName
{
auto* column = Schema_.FindColumnByStableName(stableName);
if (!column) {
- if (Schema_.GetStrict()) {
+ if (Schema_.IsStrict()) {
THROW_ERROR_EXCEPTION("No column with stable name %Qv in strict schema", stableName);
}
return stableName.Underlying();
@@ -510,7 +510,7 @@ TColumnStableName TTableSchema::TNameMapping::NameToStableName(TStringBuf name)
{
auto* column = Schema_.FindColumn(name);
if (!column) {
- if (Schema_.GetStrict()) {
+ if (Schema_.IsStrict()) {
if (auto originalColumnName = GetTimestampColumnOriginalNameOrNull(name);
!originalColumnName || !Schema_.FindColumn(*originalColumnName))
{
@@ -759,11 +759,6 @@ bool TTableSchema::IsSorted() const
return KeyColumnCount_ > 0;
}
-bool TTableSchema::IsUniqueKeys() const
-{
- return UniqueKeys_;
-}
-
bool TTableSchema::HasRenamedColumns() const
{
return std::any_of(Columns().begin(), Columns().end(), [] (const TColumnSchema& column) {
@@ -873,7 +868,7 @@ std::vector<TColumnStableName> MapNamesToStableNames(
const auto* column = schema.FindColumn(name);
if (column) {
stableNames.push_back(column->StableName());
- } else if (!schema.GetStrict()) {
+ } else if (!schema.IsStrict()) {
stableNames.push_back(TColumnStableName(name));
} else if (missingColumnReplacement) {
stableNames.push_back(TColumnStableName(std::string(*missingColumnReplacement)));
@@ -1496,7 +1491,7 @@ TKeyColumnTypes TTableSchema::GetKeyColumnTypes() const
void FormatValue(TStringBuilderBase* builder, const TTableSchema& schema, TStringBuf /*spec*/)
{
- builder->AppendFormat("<strict=%v;unique_keys=%v", schema.GetStrict(), schema.GetUniqueKeys());
+ builder->AppendFormat("<strict=%v;unique_keys=%v", schema.IsStrict(), schema.IsUniqueKeys());
if (schema.HasNontrivialSchemaModification()) {
builder->AppendFormat(";schema_modification=%v", schema.GetSchemaModification());
}
@@ -1531,9 +1526,17 @@ void FormatValue(TStringBuilderBase* builder, const TTableSchemaPtr& schema, TSt
std::string SerializeToWireProto(const TTableSchemaPtr& schema)
{
+ return schema ? SerializeToWireProto(*schema) : "";
+}
+
+std::string SerializeToWireProto(const TTableSchema& schema)
+{
NTableClient::NProto::TTableSchemaExt protoSchema;
ToProto(&protoSchema, schema);
- return protoSchema.SerializeAsString();
+ if (protoSchema.IsInitialized()) {
+ return protoSchema.SerializeAsString();
+ }
+ THROW_ERROR_EXCEPTION("Table schema is not initialized");
}
void DeserializeFromWireProto(TTableSchemaPtr* schema, const std::string& serializedProto)
@@ -1549,8 +1552,8 @@ void ToProto(NProto::TTableSchemaExt* protoSchema, const TTableSchema& schema)
{
ToProto(protoSchema->mutable_columns(), schema.Columns());
ToProto(protoSchema->mutable_deleted_columns(), schema.DeletedColumns());
- protoSchema->set_strict(schema.GetStrict());
- protoSchema->set_unique_keys(schema.GetUniqueKeys());
+ protoSchema->set_strict(schema.IsStrict());
+ protoSchema->set_unique_keys(schema.IsUniqueKeys());
protoSchema->set_schema_modification(ToProto(schema.GetSchemaModification()));
}
@@ -1674,8 +1677,8 @@ bool operator==(const TTableSchema& lhs, const TTableSchema& rhs)
{
return
lhs.Columns() == rhs.Columns() &&
- lhs.GetStrict() == rhs.GetStrict() &&
- lhs.GetUniqueKeys() == rhs.GetUniqueKeys() &&
+ lhs.IsStrict() == rhs.IsStrict() &&
+ lhs.IsUniqueKeys() == rhs.IsUniqueKeys() &&
lhs.GetSchemaModification() == rhs.GetSchemaModification() &&
lhs.DeletedColumns() == rhs.DeletedColumns();
}
@@ -1691,7 +1694,7 @@ bool IsEqualIgnoringRequiredness(const TTableSchema& lhs, const TTableSchema& rh
}
resultColumns.emplace_back(column);
}
- return TTableSchema(resultColumns, schema.GetStrict(), schema.GetUniqueKeys());
+ return TTableSchema(resultColumns, schema.IsStrict(), schema.IsUniqueKeys());
};
return dropRequiredness(lhs) == dropRequiredness(rhs);
}
@@ -2090,11 +2093,11 @@ void ValidateColumnSchema(
void ValidateDynamicTableConstraints(const TTableSchema& schema)
{
- if (!schema.GetStrict()) {
+ if (!schema.IsStrict()) {
THROW_ERROR_EXCEPTION("\"strict\" cannot be \"false\" for a dynamic table");
}
- if (schema.IsSorted() && !schema.GetUniqueKeys()) {
+ if (schema.IsSorted() && !schema.IsUniqueKeys()) {
THROW_ERROR_EXCEPTION("\"unique_keys\" cannot be \"false\" for a sorted dynamic table");
}
@@ -2308,7 +2311,7 @@ void ValidateCumulativeDataWeightColumn(const TTableSchema& schema)
// Validate schema attributes.
void ValidateSchemaAttributes(const TTableSchema& schema)
{
- if (schema.GetUniqueKeys() && schema.GetKeyColumnCount() == 0) {
+ if (schema.IsUniqueKeys() && schema.GetKeyColumnCount() == 0) {
THROW_ERROR_EXCEPTION("\"unique_keys\" can only be true if key columns are present");
}
}
@@ -2325,7 +2328,7 @@ void ValidateTableSchema(
schema.IsSorted(),
isTableDynamic,
options);
- if (!schema.GetStrict() && column.IsRenamed()) {
+ if (!schema.IsStrict() && column.IsRenamed()) {
THROW_ERROR_EXCEPTION("Renamed column %v in non-strict schema",
column.GetDiagnosticNameString());
}
@@ -2396,6 +2399,24 @@ void ValidateNoDescendingSortOrder(const TTableSchema& schema)
}
}
+void ValidateNoDescendingSortOrder(
+ const std::vector<ESortOrder>& sortOrders,
+ const TKeyColumns& keyColumns)
+{
+ YT_VERIFY(keyColumns.size() == sortOrders.size());
+
+ for (int index = 0; index < std::ssize(sortOrders); ++index) {
+ auto sortOrder = sortOrders[index];
+ const auto& column = keyColumns[index];
+ if (sortOrder == ESortOrder::Descending) {
+ THROW_ERROR_EXCEPTION(
+ NTableClient::EErrorCode::InvalidSchemaValue,
+ "Descending sort order is not available in this context yet")
+ << TErrorAttribute("column_name", column);
+ }
+ }
+}
+
void ValidateNoRenamedColumns(const TTableSchema& schema)
{
for (const auto& column : schema.Columns()) {
@@ -2556,7 +2577,7 @@ size_t THash<NYT::NTableClient::TDeletedColumn>::operator()(const NYT::NTableCli
size_t THash<NYT::NTableClient::TTableSchema>::operator()(const NYT::NTableClient::TTableSchema& tableSchema) const
{
- size_t result = CombineHashes(THash<bool>()(tableSchema.GetUniqueKeys()), THash<bool>()(tableSchema.GetStrict()));
+ size_t result = CombineHashes(THash<bool>()(tableSchema.IsUniqueKeys()), THash<bool>()(tableSchema.IsStrict()));
if (tableSchema.HasNontrivialSchemaModification()) {
result = CombineHashes(
result,
diff --git a/yt/yt/client/table_client/schema.h b/yt/yt/client/table_client/schema.h
index f6c9f147a1..bb31f4c9d3 100644
--- a/yt/yt/client/table_client/schema.h
+++ b/yt/yt/client/table_client/schema.h
@@ -242,8 +242,8 @@ public:
const std::vector<TDeletedColumn>& DeletedColumns() const;
//! Strict schema forbids columns not specified in the schema.
- DEFINE_BYVAL_RO_PROPERTY(bool, Strict, false);
- DEFINE_BYVAL_RO_PROPERTY(bool, UniqueKeys, false);
+ DEFINE_BYVAL_RO_BOOLEAN_PROPERTY(Strict, false);
+ DEFINE_BYVAL_RO_BOOLEAN_PROPERTY(UniqueKeys, false);
DEFINE_BYVAL_RO_PROPERTY(ETableSchemaModification, SchemaModification, ETableSchemaModification::None);
//! Constructs an empty non-strict schema.
@@ -296,7 +296,6 @@ public:
bool HasTimestampColumn() const;
bool HasTtlColumn() const;
bool IsSorted() const;
- bool IsUniqueKeys() const;
bool HasRenamedColumns() const;
bool IsEmpty() const;
bool IsCGComparatorApplicable() const;
@@ -446,6 +445,7 @@ void FormatValue(TStringBuilderBase* builder, const TTableSchemaPtr& schema, TSt
//! Returns serialized NTableClient.NProto.TTableSchemaExt.
std::string SerializeToWireProto(const TTableSchemaPtr& schema);
+std::string SerializeToWireProto(const TTableSchema& schema);
void DeserializeFromWireProto(TTableSchemaPtr* schema, const std::string& serializedProto);
@@ -554,6 +554,9 @@ void ValidateTableSchema(
////////////////////////////////////////////////////////////////////////////////
void ValidateNoDescendingSortOrder(const TTableSchema& schema);
+void ValidateNoDescendingSortOrder(
+ const std::vector<ESortOrder>& sortOrders,
+ const TKeyColumns& keyColumns);
void ValidateNoRenamedColumns(const TTableSchema& schema);
diff --git a/yt/yt/client/table_client/schema_serialization_helpers.cpp b/yt/yt/client/table_client/schema_serialization_helpers.cpp
index 3ff0b2bec0..d079832c77 100644
--- a/yt/yt/client/table_client/schema_serialization_helpers.cpp
+++ b/yt/yt/client/table_client/schema_serialization_helpers.cpp
@@ -231,8 +231,8 @@ void Serialize(const TTableSchema& schema, NYson::IYsonConsumer* consumer)
{
auto position = NYTree::BuildYsonFluently(consumer)
.BeginAttributes()
- .Item("strict").Value(schema.GetStrict())
- .Item("unique_keys").Value(schema.GetUniqueKeys())
+ .Item("strict").Value(schema.IsStrict())
+ .Item("unique_keys").Value(schema.IsUniqueKeys())
.DoIf(schema.HasNontrivialSchemaModification(), [&] (NYTree::TFluentMap fluent) {
fluent.Item("schema_modification").Value(schema.GetSchemaModification());
})
diff --git a/yt/yt/client/table_client/timestamped_schema_helpers.cpp b/yt/yt/client/table_client/timestamped_schema_helpers.cpp
index 4b0feff80f..e4e90ab821 100644
--- a/yt/yt/client/table_client/timestamped_schema_helpers.cpp
+++ b/yt/yt/client/table_client/timestamped_schema_helpers.cpp
@@ -21,8 +21,8 @@ TTableSchemaPtr ToLatestTimestampSchema(const TTableSchemaPtr& schema)
return New<TTableSchema>(
std::move(columns),
- schema->GetStrict(),
- schema->GetUniqueKeys(),
+ schema->IsStrict(),
+ schema->IsUniqueKeys(),
schema->GetSchemaModification(),
schema->DeletedColumns());
}