diff options
author | nadya02 <nadya02@yandex-team.com> | 2023-08-30 16:02:15 +0300 |
---|---|---|
committer | nadya02 <nadya02@yandex-team.com> | 2023-08-30 16:19:11 +0300 |
commit | f54e6b6e47c6bb1e42c0870eabc8e893f5217f83 (patch) | |
tree | 1bf75909a43582c61de898c84b8d0d2d6ce850b8 | |
parent | c0a64c3c1f6d2d39aa831b5da08c48be92447e8e (diff) | |
download | ydb-f54e6b6e47c6bb1e42c0870eabc8e893f5217f83.tar.gz |
YT-19430: Move converters from client to library and cosmetic
move converters
-rw-r--r-- | yt/yt/client/converters/boolean_converter.cpp | 101 | ||||
-rw-r--r-- | yt/yt/client/converters/boolean_converter.h | 15 | ||||
-rw-r--r-- | yt/yt/client/converters/converter.cpp | 89 | ||||
-rw-r--r-- | yt/yt/client/converters/converter.h | 53 | ||||
-rw-r--r-- | yt/yt/client/converters/floating_point_converter.cpp | 136 | ||||
-rw-r--r-- | yt/yt/client/converters/floating_point_converter.h | 15 | ||||
-rw-r--r-- | yt/yt/client/converters/helper.cpp | 60 | ||||
-rw-r--r-- | yt/yt/client/converters/helper.h | 49 | ||||
-rw-r--r-- | yt/yt/client/converters/integer_converter.cpp | 177 | ||||
-rw-r--r-- | yt/yt/client/converters/integer_converter.h | 17 | ||||
-rw-r--r-- | yt/yt/client/converters/null_converter.cpp | 53 | ||||
-rw-r--r-- | yt/yt/client/converters/null_converter.h | 13 | ||||
-rw-r--r-- | yt/yt/client/converters/string_converter.cpp | 380 | ||||
-rw-r--r-- | yt/yt/client/converters/string_converter.h | 25 | ||||
-rw-r--r-- | yt/yt/client/ya.make | 8 |
15 files changed, 0 insertions, 1191 deletions
diff --git a/yt/yt/client/converters/boolean_converter.cpp b/yt/yt/client/converters/boolean_converter.cpp deleted file mode 100644 index e083ff54c6d..00000000000 --- a/yt/yt/client/converters/boolean_converter.cpp +++ /dev/null @@ -1,101 +0,0 @@ -#include "boolean_converter.h" -#include "helper.h" - -#include <yt/yt/client/table_client/schema.h> -#include <yt/yt/client/table_client/versioned_row.h> - -namespace NYT::NConverters { - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -void FillColumnarBooleanValues( - TBatchColumn* column, - i64 startIndex, - i64 valueCount, - TRef bitmap) -{ - column->StartIndex = startIndex; - column->ValueCount = valueCount; - - auto& values = column->Values.emplace(); - values.BitWidth = 1; - values.Data = bitmap; -} - -//////////////////////////////////////////////////////////////////////////////// - -class TBooleanColumnConverter - : public IColumnConverter -{ -public: - TBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema) - : ColumnIndex_(columnIndex) - , ColumnSchema_(columnSchema) - { } - - TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override - { - Reset(); - AddValues(rowsValues); - - auto column = std::make_shared<TBatchColumn>(); - auto nullBitmapRef = NullBitmap_.Flush<TConverterTag>(); - auto valuesRef = Values_.Flush<TConverterTag>(); - - FillColumnarBooleanValues(column.get(), 0, rowsValues.size(), valuesRef); - FillColumnarNullBitmap(column.get(), 0, rowsValues.size(), nullBitmapRef); - - column->Type = ColumnSchema_.LogicalType(); - column->Id = ColumnIndex_; - - TOwningColumn owner = { - std::move(column), - std::move(nullBitmapRef), - std::move(valuesRef), - /*stringBuffer*/ std::nullopt - }; - - return {{owner}, owner.Column.get()}; - } - - -private: - const int ColumnIndex_; - NTableClient::TColumnSchema ColumnSchema_; - - TBitmapOutput Values_; - TBitmapOutput NullBitmap_; - - void Reset() - { - Values_ = TBitmapOutput(); - NullBitmap_ = TBitmapOutput(); - } - - void AddValues(const std::vector<TUnversionedRowValues>& rowsValues) - { - for (auto rowValues : rowsValues) { - auto value = rowValues[ColumnIndex_]; - bool isNull = value == nullptr || value->Type == NTableClient::EValueType::Null; - bool data = isNull ? false : value->Data.Boolean; - NullBitmap_.Append(isNull); - Values_.Append(data); - } - } -}; - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema) -{ - return std::make_unique<TBooleanColumnConverter>(columnIndex, columnSchema); -} - -//////////////////////////////////////////////////////////////////////////////// - - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/boolean_converter.h b/yt/yt/client/converters/boolean_converter.h deleted file mode 100644 index 7eee8b53494..00000000000 --- a/yt/yt/client/converters/boolean_converter.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include "converter.h" - -#include <yt/yt/client/table_client/schema.h> -#include <yt/yt/client/table_client/versioned_row.h> - -namespace NYT::NConverters { - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/converter.cpp b/yt/yt/client/converters/converter.cpp deleted file mode 100644 index a9d27d86317..00000000000 --- a/yt/yt/client/converters/converter.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include "converter.h" - -#include "boolean_converter.h" -#include "floating_point_converter.h" -#include "integer_converter.h" -#include "null_converter.h" -#include "string_converter.h" - -#include <yt/yt/client/table_client/row_base.h> -#include <yt/yt/client/table_client/schema.h> - -namespace NYT::NConverters { - -using namespace NTableClient; - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateColumnConvert( - const NTableClient::TColumnSchema& columnSchema, - int columnIndex) { - switch (columnSchema.GetWireType()) { - case EValueType::Int64: - return CreateInt64ColumnConverter(columnIndex, columnSchema); - - case EValueType::Uint64: - return CreateUint64ColumnConverter(columnIndex, columnSchema); - - case EValueType::Double: - switch (columnSchema.CastToV1Type()) { - case NTableClient::ESimpleLogicalValueType::Float: - return CreateFloatingPoint32ColumnConverter(columnIndex, columnSchema); - default: - return CreateFloatingPoint64ColumnConverter(columnIndex, columnSchema); - } - - case EValueType::String: - return CreateStringConverter(columnIndex, columnSchema); - - case EValueType::Boolean: - return CreateBooleanColumnConverter(columnIndex, columnSchema); - - case EValueType::Any: - return CreateAnyConverter(columnIndex, columnSchema); - - case EValueType::Composite: - return CreateCompositeConverter(columnIndex, columnSchema); - - case EValueType::Null: - return CreateNullConverter(columnIndex); - - case EValueType::Min: - case EValueType::TheBottom: - case EValueType::Max: - break; - } - ThrowUnexpectedValueType(columnSchema.GetWireType()); -} - -//////////////////////////////////////////////////////////////////////////////// - - -TConvertedColumnRange ConvertRowsToColumns( - TRange<TUnversionedRow> rows, - const std::vector<TColumnSchema> &columnSchema) -{ - TConvertedColumnRange convertedColumnsRange; - std::vector<TUnversionedRowValues> rowsValues; - rowsValues.reserve(rows.size()); - - for (const auto& row : rows) { - NConverters::TUnversionedRowValues rowValues; - rowValues.resize(columnSchema.size(), nullptr); - for (const auto* item = row.Begin(); item != row.End(); ++item) { - rowValues[item->Id] = item; - } - rowsValues.push_back(std::move(rowValues)); - } - - for (int columnId = 0; columnId < std::ssize(columnSchema); columnId++) { - auto converter = CreateColumnConvert(columnSchema[columnId], columnId); - auto columns = converter->Convert(rowsValues); - convertedColumnsRange.push_back(columns); - } - return convertedColumnsRange; -} - -//////////////////////////////////////////////////////////////////////////////// - -} diff --git a/yt/yt/client/converters/converter.h b/yt/yt/client/converters/converter.h deleted file mode 100644 index 30ad2188099..00000000000 --- a/yt/yt/client/converters/converter.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -#include <yt/yt/client/table_client/row_batch.h> - -#include <yt/yt/core/misc/bitmap.h> - -#include <library/cpp/yt/memory/ref.h> - -namespace NYT::NConverters { - -using TBatchColumn = NTableClient::IUnversionedColumnarRowBatch::TColumn; -using TBatchColumnPtr = std::shared_ptr<TBatchColumn>; -using TUnversionedRowValues = std::vector<const NTableClient::TUnversionedValue*>; - -//////////////////////////////////////////////////////////////////////////////// - -struct TOwningColumn -{ - TBatchColumnPtr Column; - std::optional<TSharedRef> NullBitmap; - std::optional<TSharedRef> ValueBuffer; - std::optional<TSharedRef> StringBuffer; -}; - -struct TConvertedColumn -{ - std::vector<TOwningColumn> Columns; - TBatchColumn* RootColumn; -}; - -using TConvertedColumnRange = std::vector<TConvertedColumn>; - -//////////////////////////////////////////////////////////////////////////////// - -struct IColumnConverter - : public TNonCopyable -{ - virtual ~IColumnConverter() = default; - virtual TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) = 0; -}; - -using IColumnConverterPtr = std::unique_ptr<IColumnConverter>; - -//////////////////////////////////////////////////////////////////////////////// - - -TConvertedColumnRange ConvertRowsToColumns( - TRange<NTableClient::TUnversionedRow> rows, - const std::vector<NTableClient::TColumnSchema> &columnSchema); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/floating_point_converter.cpp b/yt/yt/client/converters/floating_point_converter.cpp deleted file mode 100644 index 748a64cbd1e..00000000000 --- a/yt/yt/client/converters/floating_point_converter.cpp +++ /dev/null @@ -1,136 +0,0 @@ -#include "floating_point_converter.h" -#include "helper.h" - -#include <yt/yt/client/table_client/schema.h> -#include <yt/yt/client/table_client/versioned_row.h> - -namespace NYT::NConverters { - -using namespace NProto; -using namespace NTableClient; - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -template <typename T> -void FillColumnarFloatingPointValues( - NTableClient::IUnversionedColumnarRowBatch::TColumn* column, - i64 startIndex, - i64 valueCount, - TRef data) -{ - column->StartIndex = startIndex; - column->ValueCount = valueCount; - - auto& values = column->Values.emplace(); - values.BitWidth = sizeof(T) * 8; - values.Data = data; -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -template <typename T> -TSharedRef SerializeFloatingPointVector(const std::vector<T>& values) -{ - auto data = TSharedMutableRef::Allocate<TConverterTag>(values.size() * sizeof(T) + sizeof(ui64), {.InitializeStorage = false}); - *reinterpret_cast<ui64*>(data.Begin()) = static_cast<ui64>(values.size()); - std::memcpy( - data.Begin() + sizeof(ui64), - values.data(), - values.size() * sizeof(T)); - return data; -} - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -template <class TValue, NTableClient::EValueType ValueType> -class TFloatingPointColumnConverter - : public IColumnConverter -{ -public: - TFloatingPointColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema) - : ColumnIndex_(columnIndex) - , ColumnSchema_(columnSchema) - { - static_assert(std::is_floating_point_v<TValue>); - } - - TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) { - Reset(); - AddValues(rowsValues); - auto nullBitmapRef = NullBitmap_.Flush<TConverterTag>(); - auto valuesRef = TSharedRef::MakeCopy<TConverterTag>(TRef(Values_.data(), sizeof(TValue) * Values_.size())); - - auto column = std::make_shared<TBatchColumn>(); - - FillColumnarFloatingPointValues<TValue>( - column.get(), - 0, - rowsValues.size(), - valuesRef); - - FillColumnarNullBitmap( - column.get(), - 0, - rowsValues.size(), - nullBitmapRef); - - column->Type = ColumnSchema_.LogicalType(); - column->Id = ColumnIndex_; - - TOwningColumn owner = { - std::move(column), - std::move(nullBitmapRef), - std::move(valuesRef), - /*stringBuffer*/ std::nullopt - }; - - TConvertedColumn res = {{owner}, owner.Column.get()}; - return res; - - } - -private: - const int ColumnIndex_; - TColumnSchema ColumnSchema_; - std::vector<TValue> Values_; - TBitmapOutput NullBitmap_; - - void Reset() - { - Values_.clear(); - NullBitmap_ = TBitmapOutput(); - } - - void AddValues(const std::vector<TUnversionedRowValues>& rowsValues) - { - for (auto rowValues : rowsValues) { - auto value = rowValues[ColumnIndex_]; - bool isNull = value == nullptr || value->Type == NTableClient::EValueType::Null; - TValue data = isNull ? 0 : value->Data.Double; - NullBitmap_.Append(isNull); - Values_.push_back(data); - } - } -}; - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateFloatingPoint32ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema) { - return std::make_unique<TFloatingPointColumnConverter<float, NTableClient::EValueType::Double>>(columnIndex, columnSchema); -} - -IColumnConverterPtr CreateFloatingPoint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema) { - return std::make_unique<TFloatingPointColumnConverter<double, NTableClient::EValueType::Double>>(columnIndex, columnSchema); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/floating_point_converter.h b/yt/yt/client/converters/floating_point_converter.h deleted file mode 100644 index b5f049e29ec..00000000000 --- a/yt/yt/client/converters/floating_point_converter.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include "converter.h" - -namespace NYT::NConverters { - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateFloatingPoint32ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema); - -IColumnConverterPtr CreateFloatingPoint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/helper.cpp b/yt/yt/client/converters/helper.cpp deleted file mode 100644 index f531325421e..00000000000 --- a/yt/yt/client/converters/helper.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "helper.h" - -#include <yt/yt/client/table_client/schema.h> -#include <yt/yt/client/table_client/versioned_row.h> -#include <yt/yt/client/table_client/columnar.h> -#include <yt/yt/client/table_client/logical_type.h> - -#include <yt/yt/core/misc/bitmap.h> -#include <yt/yt/core/misc/common.h> - -namespace NYT::NConverters { - -using namespace NProto; -using namespace NTableClient; - -//////////////////////////////////////////////////////////////////////////////// - -void FillColumnarNullBitmap( - NTableClient::IUnversionedColumnarRowBatch::TColumn* column, - i64 startIndex, - i64 valueCount, - TRef bitmap) -{ - column->StartIndex = startIndex; - column->ValueCount = valueCount; - - auto& nullBitmap = column->NullBitmap.emplace(); - nullBitmap.Data = bitmap; -} - - -void FillColumnarDictionary( - NTableClient::IUnversionedColumnarRowBatch::TColumn* primaryColumn, - NTableClient::IUnversionedColumnarRowBatch::TColumn* dictionaryColumn, - NTableClient::IUnversionedColumnarRowBatch::TDictionaryId dictionaryId, - NTableClient::TLogicalTypePtr type, - i64 startIndex, - i64 valueCount, - TRef ids) -{ - primaryColumn->StartIndex = startIndex; - primaryColumn->ValueCount = valueCount; - - dictionaryColumn->Type = type && type->GetMetatype() == ELogicalMetatype::Optional - ? type->AsOptionalTypeRef().GetElement() - : type; - - auto& primaryValues = primaryColumn->Values.emplace(); - primaryValues.BitWidth = 32; - primaryValues.Data = ids; - - auto& dictionary = primaryColumn->Dictionary.emplace(); - dictionary.DictionaryId = dictionaryId; - dictionary.ZeroMeansNull = true; - dictionary.ValueColumn = dictionaryColumn; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/helper.h b/yt/yt/client/converters/helper.h deleted file mode 100644 index fc2e22bce77..00000000000 --- a/yt/yt/client/converters/helper.h +++ /dev/null @@ -1,49 +0,0 @@ -#pragma once - -#include <yt/yt/client/table_client/schema.h> -#include <yt/yt/client/table_client/row_batch.h> - -#include <yt/yt/core/misc/common.h> - -namespace NYT::NConverters { - -//////////////////////////////////////////////////////////////////////////////// - -void FillColumnarNullBitmap( - NTableClient::IUnversionedColumnarRowBatch::TColumn* column, - i64 startIndex, - i64 valueCount, - TRef bitmap); - -void FillColumnarDictionary( - NTableClient::IUnversionedColumnarRowBatch::TColumn* primaryColumn, - NTableClient::IUnversionedColumnarRowBatch::TColumn* dictionaryColumn, - NTableClient::IUnversionedColumnarRowBatch::TDictionaryId dictionaryId, - NTableClient::TLogicalTypePtr type, - i64 startIndex, - i64 valueCount, - TRef ids); - -//////////////////////////////////////////////////////////////////////////////// - -DEFINE_ENUM(EUnversionedStringSegmentType, - ((DictionaryDense) (0)) - ((DirectDense) (1)) -); - -/* -TODO: Dictionary for vector: - -DEFINE_ENUM(EUnversionedIntegerSegmentType, - ((DictionaryDense) (0)) - ((DirectDense) (1)) -); -*/ - -struct TConverterTag -{}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters - diff --git a/yt/yt/client/converters/integer_converter.cpp b/yt/yt/client/converters/integer_converter.cpp deleted file mode 100644 index a7956c56b4e..00000000000 --- a/yt/yt/client/converters/integer_converter.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include "integer_converter.h" -#include "helper.h" - -#include <yt/yt/client/table_client/schema.h> -#include <yt/yt/client/table_client/versioned_row.h> - -#include <library/cpp/yt/coding/zig_zag.h> - -namespace NYT::NConverters { - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -ui64 EncodeValue(i64 value) -{ - return ZigZagEncode64(value); -} - -ui64 EncodeValue(ui64 value) -{ - return value; -} - -template <class TValue> -typename std::enable_if<std::is_signed<TValue>::value, TValue>::type -GetValue(const NTableClient::TUnversionedValue& value) -{ - return value.Data.Int64; -} - -template <class TValue> -typename std::enable_if<std::is_unsigned<TValue>::value, TValue>::type -GetValue(const NTableClient::TUnversionedValue& value) -{ - return value.Data.Uint64; -} - -//////////////////////////////////////////////////////////////////////////////// - -void FillColumnarIntegerValues( - NTableClient::IUnversionedColumnarRowBatch::TColumn* column, - i64 startIndex, - i64 valueCount, - NTableClient::EValueType valueType, - ui64 baseValue, - TRef data) -{ - column->StartIndex = startIndex; - column->ValueCount = valueCount; - - auto& values = column->Values.emplace(); - values.BaseValue = baseValue; - values.BitWidth = 64; - values.ZigZagEncoded = (valueType == NTableClient::EValueType::Int64); - values.Data = data; -} - -//////////////////////////////////////////////////////////////////////////////// - -// TValue - i64 or ui64. -template <class TValue> -class TIntegerColumnConverter - : public IColumnConverter -{ -public: - TIntegerColumnConverter(int columnIndex, - NTableClient::EValueType ValueType, - NTableClient::TColumnSchema columnSchema) - - : ColumnIndex_(columnIndex), - ValueType_(ValueType), - ColumnSchema_(columnSchema) - {} - - TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override - { - Reset(); - AddValues(rowsValues); - for (i64 index = 0; index < std::ssize(Values_); ++index) { - if (!NullBitmap_[index]) { - Values_[index] -= MinValue_; - } - } - - auto nullBitmapRef = NullBitmap_.Flush<TConverterTag>(); - auto valuesRef = TSharedRef::MakeCopy<TConverterTag>(TRef(Values_.data(), sizeof(ui64) * Values_.size())); - auto column = std::make_shared<TBatchColumn>(); - - FillColumnarIntegerValues( - column.get(), - 0, - RowCount_, - ValueType_, - MinValue_, - valuesRef); - - FillColumnarNullBitmap( - column.get(), - 0, - RowCount_, - nullBitmapRef); - - column->Type = ColumnSchema_.LogicalType(); - column->Id = ColumnIndex_; - - TOwningColumn owner = { - std::move(column), - std::move(nullBitmapRef), - std::move(valuesRef), - /*stringBuffer*/ std::nullopt - }; - - TConvertedColumn res = {{owner}, owner.Column.get()}; - return res; - } - - -private: - const int ColumnIndex_; - NTableClient::EValueType ValueType_; - i64 RowCount_ = 0; - NTableClient::TColumnSchema ColumnSchema_; - - TBitmapOutput NullBitmap_; - std::vector<ui64> Values_; - - ui64 MaxValue_; - ui64 MinValue_; - - // TODO: Dictionary column - // THashMap<ui64, int> DistinctValues_; - - void Reset() - { - Values_.clear(); - RowCount_ = 0; - MaxValue_ = 0; - MinValue_ = std::numeric_limits<ui64>::max(); - NullBitmap_ = TBitmapOutput(); - } - - void AddValues(const std::vector<TUnversionedRowValues>& rowsValues) - { - for (auto rowValues : rowsValues) { - auto value = rowValues[ColumnIndex_]; - bool isNull = value == nullptr || value->Type == NTableClient::EValueType::Null; - ui64 data = 0; - if (!isNull) { - YT_VERIFY(value != nullptr); - data = EncodeValue(GetValue<TValue>(*value)); - } - Values_.push_back(data); - NullBitmap_.Append(isNull); - ++RowCount_; - } - } -}; - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateInt64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema) -{ - return std::make_unique<TIntegerColumnConverter<i64>>(columnIndex, NTableClient::EValueType::Int64, columnSchema); -} - - -IColumnConverterPtr CreateUint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema) -{ - return std::make_unique<TIntegerColumnConverter<ui64>>(columnIndex, NTableClient::EValueType::Uint64, columnSchema); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/integer_converter.h b/yt/yt/client/converters/integer_converter.h deleted file mode 100644 index 18c59954fc4..00000000000 --- a/yt/yt/client/converters/integer_converter.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once -#include "converter.h" - -#include <yt/yt/client/table_client/schema.h> -#include <yt/yt/client/table_client/versioned_row.h> - -namespace NYT::NConverters { - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateInt64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema); - -std::unique_ptr<IColumnConverter> CreateUint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/null_converter.cpp b/yt/yt/client/converters/null_converter.cpp deleted file mode 100644 index 76c1095cdad..00000000000 --- a/yt/yt/client/converters/null_converter.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "null_converter.h" - -#include <yt/yt/client/table_client/logical_type.h> - -namespace NYT::NConverters { - -using namespace NTableClient; - -//////////////////////////////////////////////////////////////////////////////// - -class TNullColumnWriterConverter - : public IColumnConverter -{ -public: - explicit TNullColumnWriterConverter(int columnIndex) - : ColumnIndex_(columnIndex) - {} - - TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override - { - auto rowCount = rowsValues.size(); - - auto column = std::make_shared<TBatchColumn>(); - - column->Id = ColumnIndex_; - column->Type = SimpleLogicalType(ESimpleLogicalValueType::Null); - column->ValueCount = rowCount; - - TOwningColumn owner = { - std::move(column), - /*NullBitmap*/ std::nullopt, - /*ValueBuffer*/ std::nullopt, - /*stringBuffer*/ std::nullopt - }; - - return {{owner}, owner.Column.get()}; - } - -private: - const int ColumnIndex_; - -}; - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateNullConverter(int columnIndex) -{ - return std::make_unique<TNullColumnWriterConverter>(columnIndex); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/null_converter.h b/yt/yt/client/converters/null_converter.h deleted file mode 100644 index d3eb3e14914..00000000000 --- a/yt/yt/client/converters/null_converter.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "converter.h" - -namespace NYT::NConverters { - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateNullConverter(int columnIndex); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/string_converter.cpp b/yt/yt/client/converters/string_converter.cpp deleted file mode 100644 index 903f5191bb4..00000000000 --- a/yt/yt/client/converters/string_converter.cpp +++ /dev/null @@ -1,380 +0,0 @@ -#include "string_converter.h" -#include "helper.h" - -#include <yt/yt/client/table_client/schema.h> -#include <yt/yt/client/table_client/versioned_row.h> - -#include <yt/yt/core/misc/bit_packed_unsigned_vector.h> - -#include <library/cpp/yt/string/string_builder.h> - -namespace NYT::NConverters { - -using namespace NTableClient; - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -void FillColumnarStringValues( - NTableClient::IUnversionedColumnarRowBatch::TColumn* column, - i64 startIndex, - i64 valueCount, - ui32 avgLength, - TRef offsets, - TRef stringData) -{ - column->StartIndex = startIndex; - column->ValueCount = valueCount; - - auto& values = column->Values.emplace(); - values.BitWidth = 32; - values.ZigZagEncoded = true; - values.Data = offsets; - - auto& strings = column->Strings.emplace(); - strings.AvgLength = avgLength; - strings.Data = stringData; -} - -//////////////////////////////////////////////////////////////////////////////// - - -template <EValueType ValueType> -class TStringConverter - : public IColumnConverter -{ -public: - TStringConverter( - int columnIndex, - const TColumnSchema& columnSchema) - : ColumnIndex_(columnIndex) - , ColumnSchema_(columnSchema) - {} - - TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override - { - Reset(); - AddValues(rowsValues); - return GetColumns(); - } - -private: - const int ColumnIndex_; - TColumnSchema ColumnSchema_; - ui32 RowCount_ = 0; - ui32 AllSize_ = 0; - - std::vector<TStringBuf> Values_; - - i64 DictionaryByteSize_; - THashMap<TStringBuf, ui32> Dictionary_; - TStringBuilder DirectBuffer_; - - void Reset() - { - AllSize_ = 0; - RowCount_ = 0; - DictionaryByteSize_ = 0; - - DirectBuffer_.Reset(); - Values_.clear(); - Dictionary_.clear(); - } - - TSharedRef GetDirectDenseNullBitmap() const - { - TBitmapOutput nullBitmap(Values_.size()); - - for (auto value : Values_) { - nullBitmap.Append(IsValueNull(value)); - } - - return nullBitmap.Flush<TConverterTag>(); - } - - std::vector<ui32> GetDirectDenseOffsets() const - { - std::vector<ui32> offsets; - offsets.reserve(Values_.size()); - - ui32 offset = 0; - for (auto value : Values_) { - offset += value.length(); - offsets.push_back(offset); - } - - return offsets; - } - - TConvertedColumn GetDirectColumn(TSharedRef nullBitmap) - { - auto offsets = GetDirectDenseOffsets(); - - // Save offsets as diff from expected. - ui32 expectedLength; - ui32 maxDiff; - PrepareDiffFromExpected(&offsets, &expectedLength, &maxDiff); - - auto directData = DirectBuffer_.GetBuffer(); - - auto offsetsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(offsets.data(), sizeof(ui32) * offsets.size())); - auto directDataPtr = TSharedRef::MakeCopy<TConverterTag>(TRef(directData.data(), directData.size())); - auto column = std::make_shared<TBatchColumn>(); - - FillColumnarStringValues( - column.get(), - 0, - RowCount_, - expectedLength, - TRef(offsetsRef), - TRef(directDataPtr)); - - FillColumnarNullBitmap( - column.get(), - 0, - RowCount_, - TRef(nullBitmap)); - - column->Type = ColumnSchema_.LogicalType(); - column->Id = ColumnIndex_; - - TOwningColumn owner = { - std::move(column), - std::move(nullBitmap), - std::move(offsetsRef), - std::move(directDataPtr) - }; - - TConvertedColumn res = {{owner}, owner.Column.get()}; - return res; - } - - TConvertedColumn GetDictionaryColumn() - { - auto dictionaryData = TSharedMutableRef::Allocate<TConverterTag>(DictionaryByteSize_, {.InitializeStorage = false}); - - std::vector<ui32> dictionaryOffsets; - dictionaryOffsets.reserve(Dictionary_.size()); - - std::vector<ui32> ids; - ids.reserve(Values_.size()); - - ui32 dictionarySize = 0; - ui32 dictionaryOffset = 0; - for (auto value : Values_) { - if (IsValueNull(value)) { - ids.push_back(0); - continue; - } - - ui32 id = GetOrCrash(Dictionary_, value); - ids.push_back(id); - - if (id > dictionarySize) { - std::memcpy( - dictionaryData.Begin() + dictionaryOffset, - value.data(), - value.length()); - dictionaryOffset += value.length(); - dictionaryOffsets.push_back(dictionaryOffset); - ++dictionarySize; - } - } - - YT_VERIFY(dictionaryOffset == DictionaryByteSize_); - - // 1. Value ids. - auto idsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(ids.data(), sizeof(ui32) * ids.size())); - - // 2. Dictionary offsets. - ui32 expectedLength; - ui32 maxDiff; - PrepareDiffFromExpected(&dictionaryOffsets, &expectedLength, &maxDiff); - auto dictionaryOffsetsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(dictionaryOffsets.data(), sizeof(ui32) * dictionaryOffsets.size())); - - auto primaryColumn = std::make_shared<TBatchColumn>(); - auto dictionaryColumn = std::make_shared<TBatchColumn>(); - - FillColumnarStringValues( - dictionaryColumn.get(), - 0, - dictionaryOffsets.size(), - expectedLength, - TRef(dictionaryOffsetsRef), - dictionaryData); - - FillColumnarDictionary( - primaryColumn.get(), - dictionaryColumn.get(), - NTableClient::IUnversionedColumnarRowBatch::GenerateDictionaryId(), - primaryColumn->Type, - 0, - RowCount_, - idsRef); - - dictionaryColumn->Type = ColumnSchema_.LogicalType(); - primaryColumn->Type = ColumnSchema_.LogicalType(); - primaryColumn->Id = ColumnIndex_; - - TOwningColumn dictOwner = { - std::move(dictionaryColumn), - /*NullBitmap*/ std::nullopt, - std::move(dictionaryOffsetsRef), - std::move(dictionaryData) - }; - - TOwningColumn primeOwner = { - std::move(primaryColumn), - /*NullBitmap*/ std::nullopt, - std::move(idsRef), - /*stringBuffer*/ std::nullopt - }; - - return {{primeOwner, dictOwner}, primeOwner.Column.get()}; - } - - TConvertedColumn GetColumns() - { - auto sizes = GetMethodsCosts(); - - auto minElement = std::min_element(sizes.begin(), sizes.end()); - auto type = EUnversionedStringSegmentType(std::distance(sizes.begin(), minElement)); - - switch (type) { - - case EUnversionedStringSegmentType::DirectDense: - return GetDirectColumn(GetDirectDenseNullBitmap()); - - case EUnversionedStringSegmentType::DictionaryDense: - return GetDictionaryColumn(); - - default: - YT_ABORT(); - } - } - - TEnumIndexedVector<EUnversionedStringSegmentType, i32> GetMethodsCosts() const - { - TEnumIndexedVector<EUnversionedStringSegmentType, i32> sizes; - for (auto type : TEnumTraits<EUnversionedStringSegmentType>::GetDomainValues()) { - sizes[type] = GetSpecificMethodCosts(type); - } - return sizes; - } - - i32 GetSpecificMethodCosts(EUnversionedStringSegmentType type) const - { - switch (type) { - - case EUnversionedStringSegmentType::DictionaryDense: - return GetDictionaryByteSize(); - - case EUnversionedStringSegmentType::DirectDense: - return GetDirectByteSize(); - - default: - YT_ABORT(); - } - } - - void AddValues(const std::vector<TUnversionedRowValues>& rowsValues) - { - for (auto rowValues : rowsValues) { - auto unversionedValue = rowValues[ColumnIndex_]; - YT_VERIFY(unversionedValue != nullptr); - auto value = CaptureValue(*unversionedValue); - Values_.push_back(value); - ++RowCount_; - } - } - - static bool IsValueNull(TStringBuf lhs) - { - return !lhs.data(); - } - - i64 GetDirectByteSize() const - { - return AllSize_; - } - - i64 GetDictionaryByteSize() const - { - return DictionaryByteSize_ + Values_.size() * sizeof(ui32); - } - - - TStringBuf CaptureValue(const TUnversionedValue& unversionedValue) - { - if (unversionedValue.Type == EValueType::Null) { - return {}; - } - - auto valueCapacity = IsAnyOrComposite(ValueType) && !IsAnyOrComposite(unversionedValue.Type) - ? GetYsonSize(unversionedValue) - : static_cast<i64>(unversionedValue.Length); - - char* buffer = DirectBuffer_.Preallocate(valueCapacity); - if (!buffer) { - // This means, that we reserved nothing, because all strings are either null or empty. - // To distinguish between null and empty, we set preallocated pointer to special value. - static char* const EmptyStringBase = reinterpret_cast<char*>(1); - buffer = EmptyStringBase; - } - - auto start = buffer; - - if (IsAnyOrComposite(ValueType) && !IsAnyOrComposite(unversionedValue.Type)) { - // Any non-any and non-null value convert to YSON. - buffer += WriteYson(buffer, unversionedValue); - } else { - std::memcpy( - buffer, - unversionedValue.Data.String, - unversionedValue.Length); - buffer += unversionedValue.Length; - } - - auto value = TStringBuf(start, buffer); - - YT_VERIFY(value.size() <= valueCapacity); - - DirectBuffer_.Advance(value.size()); - - if (Dictionary_.emplace(value, Dictionary_.size() + 1).second) { - DictionaryByteSize_ += value.size(); - } - AllSize_ += value.size(); - return value; - } -}; - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateStringConverter( - int columnIndex, - const NTableClient::TColumnSchema& columnSchema) -{ - return std::make_unique<TStringConverter<EValueType::String>>(columnIndex, columnSchema); -} - -IColumnConverterPtr CreateAnyConverter( - int columnIndex, - const NTableClient::TColumnSchema& columnSchema) -{ - return std::make_unique<TStringConverter<EValueType::Any>>(columnIndex, columnSchema); -} - -IColumnConverterPtr CreateCompositeConverter( - int columnIndex, - const NTableClient::TColumnSchema& columnSchema) -{ - return std::make_unique<TStringConverter<EValueType::Composite>>(columnIndex, columnSchema); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/converters/string_converter.h b/yt/yt/client/converters/string_converter.h deleted file mode 100644 index 7cda42526e6..00000000000 --- a/yt/yt/client/converters/string_converter.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include "converter.h" - -#include <yt/yt/client/table_client/schema.h> - -namespace NYT::NConverters { - -//////////////////////////////////////////////////////////////////////////////// - -IColumnConverterPtr CreateStringConverter( - int columnIndex, - const NTableClient::TColumnSchema& columnSchema); - -IColumnConverterPtr CreateAnyConverter( - int columnIndex, - const NTableClient::TColumnSchema& columnSchema); - -IColumnConverterPtr CreateCompositeConverter( - int columnIndex, - const NTableClient::TColumnSchema& columnSchema); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NConverters diff --git a/yt/yt/client/ya.make b/yt/yt/client/ya.make index 7025b59424f..b6fdcc3f787 100644 --- a/yt/yt/client/ya.make +++ b/yt/yt/client/ya.make @@ -68,14 +68,6 @@ SRCS( chunk_client/read_limit.cpp chunk_client/ready_event_reader_base.cpp - converters/boolean_converter.cpp - converters/converter.cpp - converters/floating_point_converter.cpp - converters/helper.cpp - converters/integer_converter.cpp - converters/null_converter.cpp - converters/string_converter.cpp - journal_client/public.cpp journal_client/config.cpp |