diff options
author | whatsername <whatsername@yandex-team.com> | 2024-04-15 23:09:20 +0300 |
---|---|---|
committer | whatsername <whatsername@yandex-team.com> | 2024-04-15 23:18:00 +0300 |
commit | a124ba45ea6873bedcd2c81515ee399c8de92363 (patch) | |
tree | 1f8e232f8d003fa0339b44ce6acd50056292d31c | |
parent | 522b983401ad0e6aec915a0be5d69685e9ed188a (diff) | |
download | ydb-a124ba45ea6873bedcd2c81515ee399c8de92363.tar.gz |
YT-21169: Compare any types
ea6fc0df4ca824207ae49ffb843b090a6cf66f1d
-rw-r--r-- | yt/yt/client/table_client/composite_compare.cpp | 48 | ||||
-rw-r--r-- | yt/yt/client/table_client/composite_compare.h | 2 | ||||
-rw-r--r-- | yt/yt/client/table_client/helpers.cpp | 8 | ||||
-rw-r--r-- | yt/yt/client/table_client/row_base.cpp | 1 | ||||
-rw-r--r-- | yt/yt/client/table_client/unversioned_row.cpp | 51 | ||||
-rw-r--r-- | yt/yt/client/unittests/composite_compare_ut.cpp | 6 | ||||
-rw-r--r-- | yt/yt/client/unittests/row_ut.cpp | 57 |
7 files changed, 145 insertions, 28 deletions
diff --git a/yt/yt/client/table_client/composite_compare.cpp b/yt/yt/client/table_client/composite_compare.cpp index b2ddd498ec..0031bef401 100644 --- a/yt/yt/client/table_client/composite_compare.cpp +++ b/yt/yt/client/table_client/composite_compare.cpp @@ -1,5 +1,6 @@ #include "composite_compare.h" +#include <yt/yt/client/table_client/row_base.h> #include <yt/yt/core/yson/pull_parser.h> #include <yt/yt/core/yson/token_writer.h> @@ -16,27 +17,28 @@ using namespace NYson; //////////////////////////////////////////////////////////////////////////////// -static const auto Logger = NLogging::TLogger{"YsonCompositveCompare"}; +static const auto Logger = NLogging::TLogger{"YsonCompositeCompare"}; //////////////////////////////////////////////////////////////////////////////// namespace { -// This file implements comparison for composite values. +// This file implements comparison for composite and any values. // Composite types that supports comparison are: // 1. Optional // 2. List // 3. Tuple // 4. Variant // -// When we compare composite values we assume that they are well-formed yson representations of same type supporting comparison. +// When we compare composite or any values we assume that they are well-formed yson representations of same type supporting comparison. // And we compare them in following manner: // 1. We scan two values simultaneously and look at their yson tokens and find first mismatching token. // 2. If one of the token is EndList (this only can happen if we parsing values of list type // and one list is shorter than another) that means that value containing EndList is less that other. // 3. Otherwise if one of the values is Entity (other value have to be non null value) that means // that value containing Entity is less than other. -// 4. Otherwise it's 2 values of the same type and we can easily compare them. +// 4. Otherwise if values have different types we compare them using EValueType order (via MapItemTypeToValueType) +// 5. Otherwise it's values of the same type and we can easily compare them. DEFINE_ENUM_WITH_UNDERLYING_TYPE(ECompareClass, ui32, ((Incomparable)(0)) ((EndList)(1)) @@ -108,6 +110,30 @@ Y_FORCE_INLINE static int GetSign(int x) return static_cast<int>(0 < x) - static_cast<int>(0 > x); } +Y_FORCE_INLINE static EValueType MapItemTypeToValueType(EYsonItemType itemType) +{ + static const TEnumIndexedArray<EYsonItemType, EValueType> mapping = { + {EYsonItemType::EndOfStream, EValueType::Min}, + {EYsonItemType::BeginMap, EValueType::Min}, + {EYsonItemType::EndMap, EValueType::Min}, + {EYsonItemType::BeginAttributes, EValueType::Min}, + {EYsonItemType::EndAttributes, EValueType::Min}, + {EYsonItemType::BeginList, EValueType::Any}, + {EYsonItemType::EndList, EValueType::Min}, + {EYsonItemType::EntityValue, EValueType::Min}, + {EYsonItemType::BooleanValue, EValueType::Boolean}, + {EYsonItemType::Int64Value, EValueType::Int64}, + {EYsonItemType::Uint64Value, EValueType::Uint64}, + {EYsonItemType::DoubleValue, EValueType::Double}, + {EYsonItemType::StringValue, EValueType::String}, + }; + auto valueType = mapping[itemType]; + if (valueType == EValueType::Min) { + ThrowIncomparableYsonToken(itemType); + } + return valueType; +} + Y_FORCE_INLINE static int CompareYsonItems(const TYsonItem& lhs, const TYsonItem& rhs) { if (lhs.GetType() == rhs.GetType()) { @@ -148,9 +174,7 @@ Y_FORCE_INLINE static int CompareYsonItems(const TYsonItem& lhs, const TYsonItem } if (lhsClass == ECompareClass::BeginValue && rhsClass == ECompareClass::BeginValue) { - THROW_ERROR_EXCEPTION("Incomparable scalar types %Qlv and %Qlv in YSON representation", - lhs.GetType(), - rhs.GetType()); + return static_cast<int>(MapItemTypeToValueType(lhs.GetType())) - static_cast<int>(MapItemTypeToValueType(rhs.GetType())); } return ComparePrimitive(static_cast<ui32>(lhsClass), static_cast<ui32>(rhsClass)); } @@ -314,21 +338,21 @@ TFingerprint CompositeFarmHash(TYsonStringBuf value) //////////////////////////////////////////////////////////////////////////////// -std::optional<TYsonString> TruncateCompositeValue(TYsonStringBuf value, i64 size) +std::optional<TYsonString> TruncateYsonValue(TYsonStringBuf originalYson, i64 size) { - YT_VERIFY(value.GetType() == EYsonType::Node); + YT_VERIFY(originalYson.GetType() == EYsonType::Node); YT_VERIFY(size >= 0); if (!size) { return {}; } - TMemoryInput valueIn(value.AsStringBuf()); + TMemoryInput valueIn(originalYson.AsStringBuf()); TYsonPullParser valueParser(&valueIn, EYsonType::Node); TString truncatedYson; TStringOutput output(truncatedYson); - output.Reserve(std::min(size, std::ssize(value.AsStringBuf()))); + output.Reserve(std::min(size, std::ssize(originalYson.AsStringBuf()))); TCheckedInDebugYsonTokenWriter writer(&output); i64 unclosedListCount = 0; @@ -421,7 +445,7 @@ std::optional<TYsonString> TruncateCompositeValue(TYsonStringBuf value, i64 size YT_LOG_ALERT_IF( std::ssize(truncatedYson) > size, "Composite YSON truncation increased the value's binary size (OriginalValue: %v, TruncatedValue: %v)", - value.AsStringBuf(), + originalYson.AsStringBuf(), truncatedYson); return TYsonString(std::move(truncatedYson)); diff --git a/yt/yt/client/table_client/composite_compare.h b/yt/yt/client/table_client/composite_compare.h index 04737d8b7b..aa64c01f25 100644 --- a/yt/yt/client/table_client/composite_compare.h +++ b/yt/yt/client/table_client/composite_compare.h @@ -22,7 +22,7 @@ TFingerprint CompositeFarmHash(NYson::TYsonStringBuf compositeValue); //! //! NB: The current implementation guarantees that the size of the returned string is not larger then the provided limit. However, //! this might be hard to maintain and is not something one should rely on. It is better to think of this function as an approximate one. -std::optional<NYson::TYsonString> TruncateCompositeValue(NYson::TYsonStringBuf value, i64 size); +std::optional<NYson::TYsonString> TruncateYsonValue(NYson::TYsonStringBuf value, i64 size); //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/client/table_client/helpers.cpp b/yt/yt/client/table_client/helpers.cpp index 54733a18b4..38c55c933d 100644 --- a/yt/yt/client/table_client/helpers.cpp +++ b/yt/yt/client/table_client/helpers.cpp @@ -1596,11 +1596,11 @@ TUnversionedValueRangeTruncationResult TruncateUnversionedValues( truncatedValues.push_back(value); auto& truncatedValue = truncatedValues.back(); - if (clipped || value.Type == EValueType::Any) { + if (clipped) { truncatedValue = MakeUnversionedNullValue(value.Id, value.Flags); - } else if (value.Type == EValueType::Composite) { - if (auto truncatedCompositeValue = TruncateCompositeValue(TYsonStringBuf(value.AsStringBuf()), maxSizePerValue)) { - truncatedValue = rowBuffer->CaptureValue(MakeUnversionedCompositeValue(truncatedCompositeValue->AsStringBuf(), value.Id, value.Flags)); + } else if (value.Type == EValueType::Any || value.Type == EValueType::Composite) { + if (auto truncatedYsonValue = TruncateYsonValue(TYsonStringBuf(value.AsStringBuf()), maxSizePerValue)) { + truncatedValue = rowBuffer->CaptureValue(MakeUnversionedStringLikeValue(value.Type, truncatedYsonValue->AsStringBuf(), value.Id, value.Flags)); } else { truncatedValue = MakeUnversionedNullValue(value.Id, value.Flags); } diff --git a/yt/yt/client/table_client/row_base.cpp b/yt/yt/client/table_client/row_base.cpp index 2eb15d37fd..b2970d58d6 100644 --- a/yt/yt/client/table_client/row_base.cpp +++ b/yt/yt/client/table_client/row_base.cpp @@ -118,6 +118,7 @@ void ValidateKeyValueType(EValueType type) type != EValueType::Double && type != EValueType::Boolean && type != EValueType::String && + type != EValueType::Any && type != EValueType::Composite && type != EValueType::Null && type != EValueType::Min && diff --git a/yt/yt/client/table_client/unversioned_row.cpp b/yt/yt/client/table_client/unversioned_row.cpp index 6211bffdb5..18cfd89d7e 100644 --- a/yt/yt/client/table_client/unversioned_row.cpp +++ b/yt/yt/client/table_client/unversioned_row.cpp @@ -320,9 +320,23 @@ int CompareRowValues(const TUnversionedValue& lhs, const TUnversionedValue& rhs) // TODO(babenko): check flags; forbid comparing hunks and aggregates. if (lhs.Type == EValueType::Any || rhs.Type == EValueType::Any) { - if (!IsSentinel(lhs.Type) && !IsSentinel(rhs.Type)) { - // Never compare composite values with non-sentinels. - ThrowIncomparableTypes(lhs, rhs); + if (lhs.Type != rhs.Type) { + if (lhs.Type == EValueType::Composite || rhs.Type == EValueType::Composite) { + ThrowIncomparableTypes(lhs, rhs); + } + return static_cast<int>(lhs.Type) - static_cast<int>(rhs.Type); + } + try { + auto lhsData = TYsonStringBuf(lhs.AsStringBuf()); + auto rhsData = TYsonStringBuf(rhs.AsStringBuf()); + return CompareCompositeValues(lhsData, rhsData); + } catch (const std::exception& ex) { + THROW_ERROR_EXCEPTION( + NTableClient::EErrorCode::IncomparableComplexValues, + "Cannot compare complex values") + << TErrorAttribute("lhs_value", lhs) + << TErrorAttribute("rhs_value", rhs) + << ex; } } @@ -649,6 +663,7 @@ public: void OnBeginMap() override { ++Depth_; + MapFound_ = true; } void OnKeyedItem(TStringBuf /*key*/) override @@ -664,6 +679,7 @@ public: if (Depth_ == 0) { THROW_ERROR_EXCEPTION("Table values cannot have top-level attributes"); } + AttributesFound_ = true; } void OnEndAttributes() override @@ -672,8 +688,15 @@ public: void OnRaw(TStringBuf /*yson*/, EYsonType /*type*/) override { } + bool CanBeSorted() const + { + return !MapFound_ && !AttributesFound_; + } + private: int Depth_ = 0; + bool MapFound_ = false; + bool AttributesFound_ = false; }; void ValidateAnyValue(TStringBuf yson) @@ -682,6 +705,14 @@ void ValidateAnyValue(TStringBuf yson) ParseYsonStringBuffer(yson, EYsonType::Node, &validator); } +bool ValidateSortedAnyValue(TStringBuf yson) +{ + TYsonAnyValidator validator; + ParseYsonStringBuffer(yson, EYsonType::Node, &validator); + + return validator.CanBeSorted(); +} + void ValidateDynamicValue(const TUnversionedValue& value, bool isKey) { switch (value.Type) { @@ -1036,8 +1067,18 @@ void ValidateValueType( "Cannot write value of type %Qlv into type any column", value.Type); } - if (IsAnyOrComposite(value.Type) && validateAnyIsValidYson) { - ValidateAnyValue(value.AsStringBuf()); + if (IsAnyOrComposite(value.Type)) { + if (columnSchema.SortOrder()) { + bool canBeSorted = ValidateSortedAnyValue(value.AsStringBuf()); + if (!canBeSorted) { + THROW_ERROR_EXCEPTION( + NTableClient::EErrorCode::SchemaViolation, + "Cannot write value of type %Qlv, which contains a YSON map, into type any sorted column", + value.Type); + } + } else if (validateAnyIsValidYson) { + ValidateAnyValue(value.AsStringBuf()); + } } } else { ValidateColumnType(EValueType::Composite, value); diff --git a/yt/yt/client/unittests/composite_compare_ut.cpp b/yt/yt/client/unittests/composite_compare_ut.cpp index 5e792919fa..8f59fab630 100644 --- a/yt/yt/client/unittests/composite_compare_ut.cpp +++ b/yt/yt/client/unittests/composite_compare_ut.cpp @@ -72,15 +72,15 @@ TEST(TCompositeCompare, CompositeFingerprint) EXPECT_EQ(getFarmHash("#"), GetFarmFingerprint(MakeUnversionedNullValue())); } -TEST(TCompositeCompare, TruncateCompositeValue) +TEST(TCompositeCompare, TruncateYsonValue) { auto normalizeYson = [] (TStringBuf yson) { return yson.empty() ? TString(yson) : ConvertToYsonString(TYsonString(yson), EYsonFormat::Binary).ToString(); }; auto getTruncatedYson = [&] (TStringBuf original, i64 size) { - auto truncatedCompositeValue = TruncateCompositeValue(TYsonString(original), size); - return truncatedCompositeValue ? truncatedCompositeValue->ToString() : ""; + auto truncatedValue = TruncateYsonValue(TYsonString(original), size); + return truncatedValue ? truncatedValue->ToString() : ""; }; // When we rebuild the whole string during truncation, we should produce the correct normalized binary YSON version of the string as output. diff --git a/yt/yt/client/unittests/row_ut.cpp b/yt/yt/client/unittests/row_ut.cpp index 2eacf5e19c..91a2e67ab4 100644 --- a/yt/yt/client/unittests/row_ut.cpp +++ b/yt/yt/client/unittests/row_ut.cpp @@ -1,9 +1,9 @@ -#include <yt/yt/core/test_framework/framework.h> - +#include <yt/yt/client/table_client/public.h> +#include <yt/yt/client/table_client/row_buffer.h> #include <yt/yt/client/table_client/unversioned_row.h> #include <yt/yt/client/table_client/versioned_row.h> -#include <yt/yt/client/table_client/row_buffer.h> +#include <yt/yt/core/test_framework/framework.h> #include <yt/yt/core/misc/protobuf_helpers.h> #include <limits> @@ -96,6 +96,57 @@ TEST(TUnversionedValueTest, CompareComposite) EXPECT_TRUE(CompareRowValues(nullValue, compositeValue) < 0); } +TEST(TUnversionedValueTest, CompareAny) +{ + auto intListValue = MakeUnversionedAnyValue("[123]"); + auto stringListValue = MakeUnversionedAnyValue("[\"0\"]"); + auto emptyListValue = MakeUnversionedAnyValue("[]"); + auto listListValue = MakeUnversionedAnyValue("[[abc]]"); + auto stringValue = MakeUnversionedStringValue("foo"); + auto intValue = MakeUnversionedInt64Value(123); + auto nullValue = MakeUnversionedSentinelValue(EValueType::Null); + + // Any vs just value + EXPECT_TRUE(CompareRowValues(stringValue, intListValue) < 0); + EXPECT_TRUE(CompareRowValues(intListValue, stringValue) > 0); + + // String vs int as any & just value + EXPECT_TRUE(CompareRowValues(stringValue, intValue) > 0); + EXPECT_TRUE(CompareRowValues(intValue, stringValue) < 0); + + EXPECT_TRUE(CompareRowValues(stringListValue, intListValue) > 0); + EXPECT_TRUE(CompareRowValues(intListValue, stringListValue) < 0); + + // Null, empty list + EXPECT_TRUE(CompareRowValues(emptyListValue, intListValue) < 0); + EXPECT_TRUE(CompareRowValues(intListValue, emptyListValue) > 0); + + EXPECT_TRUE(CompareRowValues(intListValue, nullValue) > 0); + EXPECT_TRUE(CompareRowValues(nullValue, intListValue) < 0); + + EXPECT_TRUE(CompareRowValues(emptyListValue, nullValue) > 0); + EXPECT_TRUE(CompareRowValues(nullValue, emptyListValue) < 0); + + // List vs int as any & just value + EXPECT_TRUE(CompareRowValues(intValue, intListValue) < 0); + EXPECT_TRUE(CompareRowValues(intListValue, intValue) > 0); + + EXPECT_TRUE(CompareRowValues(intListValue, listListValue) < 0); + EXPECT_TRUE(CompareRowValues(listListValue, intListValue) > 0); + + // Any map & attrs + auto mapValue = MakeUnversionedAnyValue("{a=123}"); + EXPECT_THROW_WITH_ERROR_CODE(CompareRowValues(intListValue, mapValue), EErrorCode::IncomparableComplexValues); + + auto annotatedValue = MakeUnversionedAnyValue("[<a=10>123]"); + EXPECT_THROW_WITH_ERROR_CODE(CompareRowValues(intListValue, annotatedValue), EErrorCode::IncomparableComplexValues); + + // Lazy comparison: we assume that such values are filtered before write to sorted column + auto listWithMapValue = MakeUnversionedAnyValue("[122, {a=123}]"); + EXPECT_TRUE(CompareRowValues(intListValue, listWithMapValue) > 0); + EXPECT_TRUE(CompareRowValues(listWithMapValue, intListValue) < 0); +} + //////////////////////////////////////////////////////////////////////////////// TEST(TFormatTest, UnversionedValue) |