aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwhatsername <whatsername@yandex-team.com>2024-04-15 23:09:20 +0300
committerwhatsername <whatsername@yandex-team.com>2024-04-15 23:18:00 +0300
commita124ba45ea6873bedcd2c81515ee399c8de92363 (patch)
tree1f8e232f8d003fa0339b44ce6acd50056292d31c
parent522b983401ad0e6aec915a0be5d69685e9ed188a (diff)
downloadydb-a124ba45ea6873bedcd2c81515ee399c8de92363.tar.gz
YT-21169: Compare any types
ea6fc0df4ca824207ae49ffb843b090a6cf66f1d
-rw-r--r--yt/yt/client/table_client/composite_compare.cpp48
-rw-r--r--yt/yt/client/table_client/composite_compare.h2
-rw-r--r--yt/yt/client/table_client/helpers.cpp8
-rw-r--r--yt/yt/client/table_client/row_base.cpp1
-rw-r--r--yt/yt/client/table_client/unversioned_row.cpp51
-rw-r--r--yt/yt/client/unittests/composite_compare_ut.cpp6
-rw-r--r--yt/yt/client/unittests/row_ut.cpp57
7 files changed, 145 insertions, 28 deletions
diff --git a/yt/yt/client/table_client/composite_compare.cpp b/yt/yt/client/table_client/composite_compare.cpp
index b2ddd498ec..0031bef401 100644
--- a/yt/yt/client/table_client/composite_compare.cpp
+++ b/yt/yt/client/table_client/composite_compare.cpp
@@ -1,5 +1,6 @@
#include "composite_compare.h"
+#include <yt/yt/client/table_client/row_base.h>
#include <yt/yt/core/yson/pull_parser.h>
#include <yt/yt/core/yson/token_writer.h>
@@ -16,27 +17,28 @@ using namespace NYson;
////////////////////////////////////////////////////////////////////////////////
-static const auto Logger = NLogging::TLogger{"YsonCompositveCompare"};
+static const auto Logger = NLogging::TLogger{"YsonCompositeCompare"};
////////////////////////////////////////////////////////////////////////////////
namespace {
-// This file implements comparison for composite values.
+// This file implements comparison for composite and any values.
// Composite types that supports comparison are:
// 1. Optional
// 2. List
// 3. Tuple
// 4. Variant
//
-// When we compare composite values we assume that they are well-formed yson representations of same type supporting comparison.
+// When we compare composite or any values we assume that they are well-formed yson representations of same type supporting comparison.
// And we compare them in following manner:
// 1. We scan two values simultaneously and look at their yson tokens and find first mismatching token.
// 2. If one of the token is EndList (this only can happen if we parsing values of list type
// and one list is shorter than another) that means that value containing EndList is less that other.
// 3. Otherwise if one of the values is Entity (other value have to be non null value) that means
// that value containing Entity is less than other.
-// 4. Otherwise it's 2 values of the same type and we can easily compare them.
+// 4. Otherwise if values have different types we compare them using EValueType order (via MapItemTypeToValueType)
+// 5. Otherwise it's values of the same type and we can easily compare them.
DEFINE_ENUM_WITH_UNDERLYING_TYPE(ECompareClass, ui32,
((Incomparable)(0))
((EndList)(1))
@@ -108,6 +110,30 @@ Y_FORCE_INLINE static int GetSign(int x)
return static_cast<int>(0 < x) - static_cast<int>(0 > x);
}
+Y_FORCE_INLINE static EValueType MapItemTypeToValueType(EYsonItemType itemType)
+{
+ static const TEnumIndexedArray<EYsonItemType, EValueType> mapping = {
+ {EYsonItemType::EndOfStream, EValueType::Min},
+ {EYsonItemType::BeginMap, EValueType::Min},
+ {EYsonItemType::EndMap, EValueType::Min},
+ {EYsonItemType::BeginAttributes, EValueType::Min},
+ {EYsonItemType::EndAttributes, EValueType::Min},
+ {EYsonItemType::BeginList, EValueType::Any},
+ {EYsonItemType::EndList, EValueType::Min},
+ {EYsonItemType::EntityValue, EValueType::Min},
+ {EYsonItemType::BooleanValue, EValueType::Boolean},
+ {EYsonItemType::Int64Value, EValueType::Int64},
+ {EYsonItemType::Uint64Value, EValueType::Uint64},
+ {EYsonItemType::DoubleValue, EValueType::Double},
+ {EYsonItemType::StringValue, EValueType::String},
+ };
+ auto valueType = mapping[itemType];
+ if (valueType == EValueType::Min) {
+ ThrowIncomparableYsonToken(itemType);
+ }
+ return valueType;
+}
+
Y_FORCE_INLINE static int CompareYsonItems(const TYsonItem& lhs, const TYsonItem& rhs)
{
if (lhs.GetType() == rhs.GetType()) {
@@ -148,9 +174,7 @@ Y_FORCE_INLINE static int CompareYsonItems(const TYsonItem& lhs, const TYsonItem
}
if (lhsClass == ECompareClass::BeginValue && rhsClass == ECompareClass::BeginValue) {
- THROW_ERROR_EXCEPTION("Incomparable scalar types %Qlv and %Qlv in YSON representation",
- lhs.GetType(),
- rhs.GetType());
+ return static_cast<int>(MapItemTypeToValueType(lhs.GetType())) - static_cast<int>(MapItemTypeToValueType(rhs.GetType()));
}
return ComparePrimitive(static_cast<ui32>(lhsClass), static_cast<ui32>(rhsClass));
}
@@ -314,21 +338,21 @@ TFingerprint CompositeFarmHash(TYsonStringBuf value)
////////////////////////////////////////////////////////////////////////////////
-std::optional<TYsonString> TruncateCompositeValue(TYsonStringBuf value, i64 size)
+std::optional<TYsonString> TruncateYsonValue(TYsonStringBuf originalYson, i64 size)
{
- YT_VERIFY(value.GetType() == EYsonType::Node);
+ YT_VERIFY(originalYson.GetType() == EYsonType::Node);
YT_VERIFY(size >= 0);
if (!size) {
return {};
}
- TMemoryInput valueIn(value.AsStringBuf());
+ TMemoryInput valueIn(originalYson.AsStringBuf());
TYsonPullParser valueParser(&valueIn, EYsonType::Node);
TString truncatedYson;
TStringOutput output(truncatedYson);
- output.Reserve(std::min(size, std::ssize(value.AsStringBuf())));
+ output.Reserve(std::min(size, std::ssize(originalYson.AsStringBuf())));
TCheckedInDebugYsonTokenWriter writer(&output);
i64 unclosedListCount = 0;
@@ -421,7 +445,7 @@ std::optional<TYsonString> TruncateCompositeValue(TYsonStringBuf value, i64 size
YT_LOG_ALERT_IF(
std::ssize(truncatedYson) > size,
"Composite YSON truncation increased the value's binary size (OriginalValue: %v, TruncatedValue: %v)",
- value.AsStringBuf(),
+ originalYson.AsStringBuf(),
truncatedYson);
return TYsonString(std::move(truncatedYson));
diff --git a/yt/yt/client/table_client/composite_compare.h b/yt/yt/client/table_client/composite_compare.h
index 04737d8b7b..aa64c01f25 100644
--- a/yt/yt/client/table_client/composite_compare.h
+++ b/yt/yt/client/table_client/composite_compare.h
@@ -22,7 +22,7 @@ TFingerprint CompositeFarmHash(NYson::TYsonStringBuf compositeValue);
//!
//! NB: The current implementation guarantees that the size of the returned string is not larger then the provided limit. However,
//! this might be hard to maintain and is not something one should rely on. It is better to think of this function as an approximate one.
-std::optional<NYson::TYsonString> TruncateCompositeValue(NYson::TYsonStringBuf value, i64 size);
+std::optional<NYson::TYsonString> TruncateYsonValue(NYson::TYsonStringBuf value, i64 size);
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/client/table_client/helpers.cpp b/yt/yt/client/table_client/helpers.cpp
index 54733a18b4..38c55c933d 100644
--- a/yt/yt/client/table_client/helpers.cpp
+++ b/yt/yt/client/table_client/helpers.cpp
@@ -1596,11 +1596,11 @@ TUnversionedValueRangeTruncationResult TruncateUnversionedValues(
truncatedValues.push_back(value);
auto& truncatedValue = truncatedValues.back();
- if (clipped || value.Type == EValueType::Any) {
+ if (clipped) {
truncatedValue = MakeUnversionedNullValue(value.Id, value.Flags);
- } else if (value.Type == EValueType::Composite) {
- if (auto truncatedCompositeValue = TruncateCompositeValue(TYsonStringBuf(value.AsStringBuf()), maxSizePerValue)) {
- truncatedValue = rowBuffer->CaptureValue(MakeUnversionedCompositeValue(truncatedCompositeValue->AsStringBuf(), value.Id, value.Flags));
+ } else if (value.Type == EValueType::Any || value.Type == EValueType::Composite) {
+ if (auto truncatedYsonValue = TruncateYsonValue(TYsonStringBuf(value.AsStringBuf()), maxSizePerValue)) {
+ truncatedValue = rowBuffer->CaptureValue(MakeUnversionedStringLikeValue(value.Type, truncatedYsonValue->AsStringBuf(), value.Id, value.Flags));
} else {
truncatedValue = MakeUnversionedNullValue(value.Id, value.Flags);
}
diff --git a/yt/yt/client/table_client/row_base.cpp b/yt/yt/client/table_client/row_base.cpp
index 2eb15d37fd..b2970d58d6 100644
--- a/yt/yt/client/table_client/row_base.cpp
+++ b/yt/yt/client/table_client/row_base.cpp
@@ -118,6 +118,7 @@ void ValidateKeyValueType(EValueType type)
type != EValueType::Double &&
type != EValueType::Boolean &&
type != EValueType::String &&
+ type != EValueType::Any &&
type != EValueType::Composite &&
type != EValueType::Null &&
type != EValueType::Min &&
diff --git a/yt/yt/client/table_client/unversioned_row.cpp b/yt/yt/client/table_client/unversioned_row.cpp
index 6211bffdb5..18cfd89d7e 100644
--- a/yt/yt/client/table_client/unversioned_row.cpp
+++ b/yt/yt/client/table_client/unversioned_row.cpp
@@ -320,9 +320,23 @@ int CompareRowValues(const TUnversionedValue& lhs, const TUnversionedValue& rhs)
// TODO(babenko): check flags; forbid comparing hunks and aggregates.
if (lhs.Type == EValueType::Any || rhs.Type == EValueType::Any) {
- if (!IsSentinel(lhs.Type) && !IsSentinel(rhs.Type)) {
- // Never compare composite values with non-sentinels.
- ThrowIncomparableTypes(lhs, rhs);
+ if (lhs.Type != rhs.Type) {
+ if (lhs.Type == EValueType::Composite || rhs.Type == EValueType::Composite) {
+ ThrowIncomparableTypes(lhs, rhs);
+ }
+ return static_cast<int>(lhs.Type) - static_cast<int>(rhs.Type);
+ }
+ try {
+ auto lhsData = TYsonStringBuf(lhs.AsStringBuf());
+ auto rhsData = TYsonStringBuf(rhs.AsStringBuf());
+ return CompareCompositeValues(lhsData, rhsData);
+ } catch (const std::exception& ex) {
+ THROW_ERROR_EXCEPTION(
+ NTableClient::EErrorCode::IncomparableComplexValues,
+ "Cannot compare complex values")
+ << TErrorAttribute("lhs_value", lhs)
+ << TErrorAttribute("rhs_value", rhs)
+ << ex;
}
}
@@ -649,6 +663,7 @@ public:
void OnBeginMap() override
{
++Depth_;
+ MapFound_ = true;
}
void OnKeyedItem(TStringBuf /*key*/) override
@@ -664,6 +679,7 @@ public:
if (Depth_ == 0) {
THROW_ERROR_EXCEPTION("Table values cannot have top-level attributes");
}
+ AttributesFound_ = true;
}
void OnEndAttributes() override
@@ -672,8 +688,15 @@ public:
void OnRaw(TStringBuf /*yson*/, EYsonType /*type*/) override
{ }
+ bool CanBeSorted() const
+ {
+ return !MapFound_ && !AttributesFound_;
+ }
+
private:
int Depth_ = 0;
+ bool MapFound_ = false;
+ bool AttributesFound_ = false;
};
void ValidateAnyValue(TStringBuf yson)
@@ -682,6 +705,14 @@ void ValidateAnyValue(TStringBuf yson)
ParseYsonStringBuffer(yson, EYsonType::Node, &validator);
}
+bool ValidateSortedAnyValue(TStringBuf yson)
+{
+ TYsonAnyValidator validator;
+ ParseYsonStringBuffer(yson, EYsonType::Node, &validator);
+
+ return validator.CanBeSorted();
+}
+
void ValidateDynamicValue(const TUnversionedValue& value, bool isKey)
{
switch (value.Type) {
@@ -1036,8 +1067,18 @@ void ValidateValueType(
"Cannot write value of type %Qlv into type any column",
value.Type);
}
- if (IsAnyOrComposite(value.Type) && validateAnyIsValidYson) {
- ValidateAnyValue(value.AsStringBuf());
+ if (IsAnyOrComposite(value.Type)) {
+ if (columnSchema.SortOrder()) {
+ bool canBeSorted = ValidateSortedAnyValue(value.AsStringBuf());
+ if (!canBeSorted) {
+ THROW_ERROR_EXCEPTION(
+ NTableClient::EErrorCode::SchemaViolation,
+ "Cannot write value of type %Qlv, which contains a YSON map, into type any sorted column",
+ value.Type);
+ }
+ } else if (validateAnyIsValidYson) {
+ ValidateAnyValue(value.AsStringBuf());
+ }
}
} else {
ValidateColumnType(EValueType::Composite, value);
diff --git a/yt/yt/client/unittests/composite_compare_ut.cpp b/yt/yt/client/unittests/composite_compare_ut.cpp
index 5e792919fa..8f59fab630 100644
--- a/yt/yt/client/unittests/composite_compare_ut.cpp
+++ b/yt/yt/client/unittests/composite_compare_ut.cpp
@@ -72,15 +72,15 @@ TEST(TCompositeCompare, CompositeFingerprint)
EXPECT_EQ(getFarmHash("#"), GetFarmFingerprint(MakeUnversionedNullValue()));
}
-TEST(TCompositeCompare, TruncateCompositeValue)
+TEST(TCompositeCompare, TruncateYsonValue)
{
auto normalizeYson = [] (TStringBuf yson) {
return yson.empty() ? TString(yson) : ConvertToYsonString(TYsonString(yson), EYsonFormat::Binary).ToString();
};
auto getTruncatedYson = [&] (TStringBuf original, i64 size) {
- auto truncatedCompositeValue = TruncateCompositeValue(TYsonString(original), size);
- return truncatedCompositeValue ? truncatedCompositeValue->ToString() : "";
+ auto truncatedValue = TruncateYsonValue(TYsonString(original), size);
+ return truncatedValue ? truncatedValue->ToString() : "";
};
// When we rebuild the whole string during truncation, we should produce the correct normalized binary YSON version of the string as output.
diff --git a/yt/yt/client/unittests/row_ut.cpp b/yt/yt/client/unittests/row_ut.cpp
index 2eacf5e19c..91a2e67ab4 100644
--- a/yt/yt/client/unittests/row_ut.cpp
+++ b/yt/yt/client/unittests/row_ut.cpp
@@ -1,9 +1,9 @@
-#include <yt/yt/core/test_framework/framework.h>
-
+#include <yt/yt/client/table_client/public.h>
+#include <yt/yt/client/table_client/row_buffer.h>
#include <yt/yt/client/table_client/unversioned_row.h>
#include <yt/yt/client/table_client/versioned_row.h>
-#include <yt/yt/client/table_client/row_buffer.h>
+#include <yt/yt/core/test_framework/framework.h>
#include <yt/yt/core/misc/protobuf_helpers.h>
#include <limits>
@@ -96,6 +96,57 @@ TEST(TUnversionedValueTest, CompareComposite)
EXPECT_TRUE(CompareRowValues(nullValue, compositeValue) < 0);
}
+TEST(TUnversionedValueTest, CompareAny)
+{
+ auto intListValue = MakeUnversionedAnyValue("[123]");
+ auto stringListValue = MakeUnversionedAnyValue("[\"0\"]");
+ auto emptyListValue = MakeUnversionedAnyValue("[]");
+ auto listListValue = MakeUnversionedAnyValue("[[abc]]");
+ auto stringValue = MakeUnversionedStringValue("foo");
+ auto intValue = MakeUnversionedInt64Value(123);
+ auto nullValue = MakeUnversionedSentinelValue(EValueType::Null);
+
+ // Any vs just value
+ EXPECT_TRUE(CompareRowValues(stringValue, intListValue) < 0);
+ EXPECT_TRUE(CompareRowValues(intListValue, stringValue) > 0);
+
+ // String vs int as any & just value
+ EXPECT_TRUE(CompareRowValues(stringValue, intValue) > 0);
+ EXPECT_TRUE(CompareRowValues(intValue, stringValue) < 0);
+
+ EXPECT_TRUE(CompareRowValues(stringListValue, intListValue) > 0);
+ EXPECT_TRUE(CompareRowValues(intListValue, stringListValue) < 0);
+
+ // Null, empty list
+ EXPECT_TRUE(CompareRowValues(emptyListValue, intListValue) < 0);
+ EXPECT_TRUE(CompareRowValues(intListValue, emptyListValue) > 0);
+
+ EXPECT_TRUE(CompareRowValues(intListValue, nullValue) > 0);
+ EXPECT_TRUE(CompareRowValues(nullValue, intListValue) < 0);
+
+ EXPECT_TRUE(CompareRowValues(emptyListValue, nullValue) > 0);
+ EXPECT_TRUE(CompareRowValues(nullValue, emptyListValue) < 0);
+
+ // List vs int as any & just value
+ EXPECT_TRUE(CompareRowValues(intValue, intListValue) < 0);
+ EXPECT_TRUE(CompareRowValues(intListValue, intValue) > 0);
+
+ EXPECT_TRUE(CompareRowValues(intListValue, listListValue) < 0);
+ EXPECT_TRUE(CompareRowValues(listListValue, intListValue) > 0);
+
+ // Any map & attrs
+ auto mapValue = MakeUnversionedAnyValue("{a=123}");
+ EXPECT_THROW_WITH_ERROR_CODE(CompareRowValues(intListValue, mapValue), EErrorCode::IncomparableComplexValues);
+
+ auto annotatedValue = MakeUnversionedAnyValue("[<a=10>123]");
+ EXPECT_THROW_WITH_ERROR_CODE(CompareRowValues(intListValue, annotatedValue), EErrorCode::IncomparableComplexValues);
+
+ // Lazy comparison: we assume that such values are filtered before write to sorted column
+ auto listWithMapValue = MakeUnversionedAnyValue("[122, {a=123}]");
+ EXPECT_TRUE(CompareRowValues(intListValue, listWithMapValue) > 0);
+ EXPECT_TRUE(CompareRowValues(listWithMapValue, intListValue) < 0);
+}
+
////////////////////////////////////////////////////////////////////////////////
TEST(TFormatTest, UnversionedValue)